@exodus/bytes 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -6
- package/bech32.js +4 -0
- package/encoding-lite.js +2 -0
- package/encoding.js +2 -0
- package/fallback/_utils.js +2 -8
- package/fallback/encoding.js +83 -1
- package/fallback/latin1.js +1 -0
- package/fallback/multi-byte.js +1 -0
- package/fallback/single-byte.js +21 -2
- package/package.json +3 -1
- package/single-byte.js +45 -2
- package/single-byte.node.js +49 -1
package/README.md
CHANGED
|
@@ -31,6 +31,7 @@ See [Performance](./Performance.md) for more info
|
|
|
31
31
|
|
|
32
32
|
```js
|
|
33
33
|
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
|
|
34
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
|
|
34
35
|
```
|
|
35
36
|
|
|
36
37
|
Less than half the bundle size of [text-encoding](https://npmjs.com/text-encoding), [whatwg-encoding](https://npmjs.com/whatwg-encoding) or [iconv-lite](https://npmjs.com/iconv-lite) (gzipped or not).\
|
|
@@ -77,6 +78,7 @@ _These are only provided as a compatibility layer, prefer hardened APIs instead
|
|
|
77
78
|
If you don't need support for legacy multi-byte encodings, you can use the lite import:
|
|
78
79
|
```js
|
|
79
80
|
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
|
|
81
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
|
|
80
82
|
```
|
|
81
83
|
|
|
82
84
|
This reduces the bundle size 10x:\
|
|
@@ -121,8 +123,8 @@ import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js
|
|
|
121
123
|
### `@exodus/bytes/single-byte.js`
|
|
122
124
|
|
|
123
125
|
```js
|
|
124
|
-
import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
|
|
125
|
-
import { windows1252toString } from '@exodus/bytes/single-byte.js'
|
|
126
|
+
import { createSinglebyteDecoder, createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
|
|
127
|
+
import { windows1252toString, windows1252fromString } from '@exodus/bytes/single-byte.js'
|
|
126
128
|
```
|
|
127
129
|
|
|
128
130
|
Decode the legacy single-byte encodings according to the [Encoding standard](https://encoding.spec.whatwg.org/)
|
|
@@ -137,10 +139,19 @@ Supports all single-byte encodings listed in the standard:
|
|
|
137
139
|
|
|
138
140
|
##### `createSinglebyteDecoder(encoding, loose = false)`
|
|
139
141
|
|
|
140
|
-
Create a decoder for a supported one-byte `encoding`, given
|
|
142
|
+
Create a decoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
141
143
|
|
|
142
144
|
Returns a function `decode(arr)` that decodes bytes to a string.
|
|
143
145
|
|
|
146
|
+
##### `createSinglebyteEncoder(encoding, { mode = 'fatal' })`
|
|
147
|
+
|
|
148
|
+
Create an encoder for a supported one-byte `encoding`, given its lowercased name `encoding`.
|
|
149
|
+
|
|
150
|
+
Returns a function `encode(string)` that encodes a string to bytes.
|
|
151
|
+
|
|
152
|
+
In `'fatal'` mode (default), will throw on non well-formed strings or any codepoints which could
|
|
153
|
+
not be encoded in the target encoding.
|
|
154
|
+
|
|
144
155
|
##### `windows1252toString(arr)`
|
|
145
156
|
|
|
146
157
|
Decode `windows-1252` bytes to a string.
|
|
@@ -154,6 +165,19 @@ Same as:
|
|
|
154
165
|
const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
155
166
|
```
|
|
156
167
|
|
|
168
|
+
##### `windows1252fromString(string)`
|
|
169
|
+
|
|
170
|
+
Encode a string to `windows-1252` bytes.
|
|
171
|
+
|
|
172
|
+
Also supports `ascii` and `latin-1` as those are strict subsets of `windows-1252`.
|
|
173
|
+
|
|
174
|
+
Will throw on non well-formed strings or any codepoints which could not be encoded in `windows-1252`.
|
|
175
|
+
|
|
176
|
+
Same as:
|
|
177
|
+
```js
|
|
178
|
+
const windows1252fromString = createSinglebyteEncoder('windows-1252', { mode: 'fatal' })
|
|
179
|
+
```
|
|
180
|
+
|
|
157
181
|
### `@exodus/bytes/multi-byte.js`
|
|
158
182
|
|
|
159
183
|
```js
|
|
@@ -171,7 +195,7 @@ Supports all legacy multi-byte encodings listed in the standard:
|
|
|
171
195
|
|
|
172
196
|
##### `createMultibyteDecoder(encoding, loose = false)`
|
|
173
197
|
|
|
174
|
-
Create a decoder for a supported legacy multi-byte `encoding`, given
|
|
198
|
+
Create a decoder for a supported legacy multi-byte `encoding`, given its lowercased name `encoding`.
|
|
175
199
|
|
|
176
200
|
Returns a function `decode(arr, stream = false)` that decodes bytes to a string.
|
|
177
201
|
|
|
@@ -284,6 +308,7 @@ On non-Node.js, requires peer dependency [@exodus/crypto](https://www.npmjs.com/
|
|
|
284
308
|
|
|
285
309
|
```js
|
|
286
310
|
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding.js'
|
|
311
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding.js' // Requires Streams
|
|
287
312
|
|
|
288
313
|
// Hooks for standards
|
|
289
314
|
import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding.js'
|
|
@@ -291,7 +316,9 @@ import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from
|
|
|
291
316
|
|
|
292
317
|
Implements the [Encoding standard](https://encoding.spec.whatwg.org/):
|
|
293
318
|
[TextDecoder](https://encoding.spec.whatwg.org/#interface-textdecoder),
|
|
294
|
-
[TextEncoder](https://encoding.spec.whatwg.org/#interface-
|
|
319
|
+
[TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder),
|
|
320
|
+
[TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream),
|
|
321
|
+
[TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream),
|
|
295
322
|
some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
|
|
296
323
|
|
|
297
324
|
#### `new TextDecoder(label = 'utf-8', { fatal = false, ignoreBOM = false })`
|
|
@@ -300,7 +327,21 @@ some [hooks](https://encoding.spec.whatwg.org/#specification-hooks) (see below).
|
|
|
300
327
|
|
|
301
328
|
#### `new TextEncoder()`
|
|
302
329
|
|
|
303
|
-
[TextEncoder](https://encoding.spec.whatwg.org/#interface-
|
|
330
|
+
[TextEncoder](https://encoding.spec.whatwg.org/#interface-textencoder) implementation/polyfill.
|
|
331
|
+
|
|
332
|
+
#### `new TextDecoderStream(label = 'utf-8', { fatal = false, ignoreBOM = false })`
|
|
333
|
+
|
|
334
|
+
[TextDecoderStream](https://encoding.spec.whatwg.org/#interface-textdecoderstream) implementation/polyfill.
|
|
335
|
+
|
|
336
|
+
Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
337
|
+
[polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
338
|
+
|
|
339
|
+
#### `new TextEncoderStream()`
|
|
340
|
+
|
|
341
|
+
[TextEncoderStream](https://encoding.spec.whatwg.org/#interface-textencoderstream) implementation/polyfill.
|
|
342
|
+
|
|
343
|
+
Requires [Streams](https://streams.spec.whatwg.org/) to be either supported by the platform or
|
|
344
|
+
[polyfilled](https://npmjs.com/package/web-streams-polyfill).
|
|
304
345
|
|
|
305
346
|
#### `labelToName(label)`
|
|
306
347
|
|
|
@@ -370,6 +411,7 @@ new TextDecoder(getBOMEncoding(input) ?? fallbackEncoding).decode(input)
|
|
|
370
411
|
|
|
371
412
|
```js
|
|
372
413
|
import { TextDecoder, TextEncoder } from '@exodus/bytes/encoding-lite.js'
|
|
414
|
+
import { TextDecoderStream, TextEncoderStream } from '@exodus/bytes/encoding-lite.js' // Requires Streams
|
|
373
415
|
|
|
374
416
|
// Hooks for standards
|
|
375
417
|
import { getBOMEncoding, legacyHookDecode, labelToName, normalizeEncoding } from '@exodus/bytes/encoding-lite.js'
|
|
@@ -393,7 +435,9 @@ To avoid inconsistencies, the exported classes and methods are exactly the same
|
|
|
393
435
|
> lite = require('@exodus/bytes/encoding-lite.js')
|
|
394
436
|
[Module: null prototype] {
|
|
395
437
|
TextDecoder: [class TextDecoder],
|
|
438
|
+
TextDecoderStream: [class TextDecoderStream],
|
|
396
439
|
TextEncoder: [class TextEncoder],
|
|
440
|
+
TextEncoderStream: [class TextEncoderStream],
|
|
397
441
|
getBOMEncoding: [Function: getBOMEncoding],
|
|
398
442
|
labelToName: [Function: labelToName],
|
|
399
443
|
legacyHookDecode: [Function: legacyHookDecode],
|
|
@@ -406,7 +450,9 @@ Error: Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encod
|
|
|
406
450
|
> full = require('@exodus/bytes/encoding.js')
|
|
407
451
|
[Module: null prototype] {
|
|
408
452
|
TextDecoder: [class TextDecoder],
|
|
453
|
+
TextDecoderStream: [class TextDecoderStream],
|
|
409
454
|
TextEncoder: [class TextEncoder],
|
|
455
|
+
TextEncoderStream: [class TextEncoderStream],
|
|
410
456
|
getBOMEncoding: [Function: getBOMEncoding],
|
|
411
457
|
labelToName: [Function: labelToName],
|
|
412
458
|
legacyHookDecode: [Function: legacyHookDecode],
|
package/bech32.js
CHANGED
|
@@ -179,6 +179,9 @@ function assertDecodeArgs(str, limit) {
|
|
|
179
179
|
if (typeof limit !== 'number' || str.length < 8 || !(str.length <= limit)) throw new Error(E_SIZE)
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
// this is instant on 8-bit strings
|
|
183
|
+
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
184
|
+
|
|
182
185
|
function fromBech32enc(str, limit, encoding) {
|
|
183
186
|
assertDecodeArgs(str, limit)
|
|
184
187
|
const lower = str.toLowerCase()
|
|
@@ -195,6 +198,7 @@ function fromBech32enc(str, limit, encoding) {
|
|
|
195
198
|
if (wordsLength < 0) throw new Error(E_SIZE)
|
|
196
199
|
const bytesLength = (wordsLength * 5) >> 3
|
|
197
200
|
const slice = str.slice(split + 1)
|
|
201
|
+
if (!nativeEncoder && NON_LATIN.test(slice)) throw new SyntaxError(E_CHARACTER) // otherwise can't use encodeLatin1
|
|
198
202
|
const c = nativeEncoder ? encodeAscii(slice, E_CHARACTER) : encodeLatin1(slice) // suboptimal, but only affects non-Hermes barebones
|
|
199
203
|
const bytes = new Uint8Array(bytesLength)
|
|
200
204
|
|
package/encoding-lite.js
CHANGED
package/encoding.js
CHANGED
package/fallback/_utils.js
CHANGED
|
@@ -5,14 +5,8 @@ export const isHermes = Boolean(globalThis.HermesInternal)
|
|
|
5
5
|
export const isDeno = Boolean(globalThis.Deno)
|
|
6
6
|
export const isLE = new Uint8Array(Uint16Array.of(258).buffer)[0] === 2
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
if (haveNativeBuffer) return true // we consider Node.js TextDecoder/TextEncoder native
|
|
11
|
-
const s = `${x}`
|
|
12
|
-
// See https://github.com/facebook/hermes/pull/1855#issuecomment-3659386410
|
|
13
|
-
return s.includes('[native code]') || s.includes(`[bytecode]`) // Static Hermes has [bytecode] for contrib, which includes TextEncoder/TextDecoder
|
|
14
|
-
}
|
|
15
|
-
|
|
8
|
+
// We consider Node.js TextDecoder/TextEncoder native
|
|
9
|
+
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
|
16
10
|
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
|
|
17
11
|
|
|
18
12
|
export const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
package/fallback/encoding.js
CHANGED
|
@@ -68,7 +68,16 @@ function isAnyUint8Array(x) {
|
|
|
68
68
|
const fromSource = (x) => {
|
|
69
69
|
if (x instanceof Uint8Array) return x
|
|
70
70
|
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
|
71
|
-
if (isAnyArrayBuffer(x))
|
|
71
|
+
if (isAnyArrayBuffer(x)) {
|
|
72
|
+
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
|
73
|
+
// Old engines without .detached, try-catch
|
|
74
|
+
try {
|
|
75
|
+
return new Uint8Array(x)
|
|
76
|
+
} catch {
|
|
77
|
+
return new Uint8Array()
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
72
81
|
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
|
73
82
|
}
|
|
74
83
|
|
|
@@ -259,6 +268,79 @@ export class TextEncoder {
|
|
|
259
268
|
}
|
|
260
269
|
}
|
|
261
270
|
|
|
271
|
+
const E_NO_STREAMS = 'TransformStream global not present in the environment'
|
|
272
|
+
|
|
273
|
+
// https://encoding.spec.whatwg.org/#interface-textdecoderstream
|
|
274
|
+
export class TextDecoderStream {
|
|
275
|
+
constructor(encoding = 'utf-8', options = {}) {
|
|
276
|
+
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
|
|
277
|
+
const decoder = new TextDecoder(encoding, options)
|
|
278
|
+
const transform = new TransformStream({
|
|
279
|
+
transform: (chunk, controller) => {
|
|
280
|
+
const value = decoder.decode(fromSource(chunk), { stream: true })
|
|
281
|
+
if (value) controller.enqueue(value)
|
|
282
|
+
},
|
|
283
|
+
flush: (controller) => {
|
|
284
|
+
// https://streams.spec.whatwg.org/#dom-transformer-flush
|
|
285
|
+
const value = decoder.decode()
|
|
286
|
+
if (value) controller.enqueue(value)
|
|
287
|
+
// No need to call .terminate() (Node.js is wrong)
|
|
288
|
+
},
|
|
289
|
+
})
|
|
290
|
+
|
|
291
|
+
define(this, 'encoding', decoder.encoding)
|
|
292
|
+
define(this, 'fatal', decoder.fatal)
|
|
293
|
+
define(this, 'ignoreBOM', decoder.ignoreBOM)
|
|
294
|
+
define(this, 'readable', transform.readable)
|
|
295
|
+
define(this, 'writable', transform.writable)
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
get [Symbol.toStringTag]() {
|
|
299
|
+
return 'TextDecoderStream'
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// https://encoding.spec.whatwg.org/#interface-textencoderstream
|
|
304
|
+
// Only UTF-8 per spec
|
|
305
|
+
export class TextEncoderStream {
|
|
306
|
+
constructor() {
|
|
307
|
+
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
|
|
308
|
+
let lead
|
|
309
|
+
const transform = new TransformStream({
|
|
310
|
+
// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
|
|
311
|
+
// Not identical in code, but reuses loose mode to have identical behavior
|
|
312
|
+
transform: (chunk, controller) => {
|
|
313
|
+
let s = String(chunk) // DOMString, might contain unpaired surrogates
|
|
314
|
+
if (s.length === 0) return
|
|
315
|
+
if (lead) {
|
|
316
|
+
s = lead + s
|
|
317
|
+
lead = null
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const last = s.charCodeAt(s.length - 1) // Can't come from previous lead due to length check
|
|
321
|
+
if ((last & 0xfc_00) === 0xd8_00) {
|
|
322
|
+
lead = s[s.length - 1]
|
|
323
|
+
s = s.slice(0, -1)
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (s) controller.enqueue(utf8fromStringLoose(s))
|
|
327
|
+
},
|
|
328
|
+
// https://encoding.spec.whatwg.org/#encode-and-flush
|
|
329
|
+
flush: (controller) => {
|
|
330
|
+
if (lead) controller.enqueue(Uint8Array.of(0xef, 0xbf, 0xbd))
|
|
331
|
+
},
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
define(this, 'encoding', 'utf-8')
|
|
335
|
+
define(this, 'readable', transform.readable)
|
|
336
|
+
define(this, 'writable', transform.writable)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
get [Symbol.toStringTag]() {
|
|
340
|
+
return 'TextEncoderStream'
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
262
344
|
// Warning: unlike whatwg-encoding, returns lowercased labels
|
|
263
345
|
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
|
264
346
|
export function getBOMEncoding(input) {
|
package/fallback/latin1.js
CHANGED
|
@@ -109,6 +109,7 @@ export const encodeCharcodes = isHermes
|
|
|
109
109
|
|
|
110
110
|
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
|
111
111
|
|
|
112
|
+
// Warning: can be used only on checked strings, converts strings to 8-bit
|
|
112
113
|
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
|
|
113
114
|
|
|
114
115
|
// Expects nativeEncoder to be present
|
package/fallback/multi-byte.js
CHANGED
|
@@ -474,6 +474,7 @@ const mappers = {
|
|
|
474
474
|
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
|
|
475
475
|
|
|
476
476
|
export function multibyteDecoder(enc, loose = false) {
|
|
477
|
+
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
477
478
|
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
|
478
479
|
|
|
479
480
|
// Input is assumed to be typechecked already
|
package/fallback/single-byte.js
CHANGED
|
@@ -23,6 +23,7 @@ function getEncoding(encoding) {
|
|
|
23
23
|
|
|
24
24
|
const mappers = new Map()
|
|
25
25
|
const decoders = new Map()
|
|
26
|
+
const encmaps = new Map()
|
|
26
27
|
|
|
27
28
|
// Used only on Node.js, no reason to optimize for anything else
|
|
28
29
|
// E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
|
|
@@ -31,7 +32,7 @@ export function encodingMapper(encoding) {
|
|
|
31
32
|
if (cached) return cached
|
|
32
33
|
|
|
33
34
|
const codes = getEncoding(encoding)
|
|
34
|
-
const incomplete = codes.includes(
|
|
35
|
+
const incomplete = codes.includes(r)
|
|
35
36
|
let map
|
|
36
37
|
const mapper = (arr, start = 0) => {
|
|
37
38
|
if (!map) {
|
|
@@ -66,7 +67,7 @@ export function encodingDecoder(encoding) {
|
|
|
66
67
|
|
|
67
68
|
let strings
|
|
68
69
|
const codes = getEncoding(encoding)
|
|
69
|
-
const incomplete = codes.includes(
|
|
70
|
+
const incomplete = codes.includes(r)
|
|
70
71
|
const decoder = (arr, loose = false) => {
|
|
71
72
|
if (!strings) {
|
|
72
73
|
const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
|
|
@@ -85,3 +86,21 @@ export function encodingDecoder(encoding) {
|
|
|
85
86
|
decoders.set(encoding, decoder)
|
|
86
87
|
return decoder
|
|
87
88
|
}
|
|
89
|
+
|
|
90
|
+
export function encodeMap(encoding) {
|
|
91
|
+
const cached = encmaps.get(encoding)
|
|
92
|
+
if (cached) return cached
|
|
93
|
+
|
|
94
|
+
const codes = getEncoding(encoding)
|
|
95
|
+
let max = 128
|
|
96
|
+
while (codes.length < 128) codes.push(128 + codes.length)
|
|
97
|
+
for (const code of codes) if (code > max && code !== r) max = code
|
|
98
|
+
const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
|
|
99
|
+
for (let i = 0; i < 128; i++) {
|
|
100
|
+
map[i] = i
|
|
101
|
+
if (codes[i] !== r) map[codes[i]] = 128 + i
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
encmaps.set(encoding, map)
|
|
105
|
+
return map
|
|
106
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@exodus/bytes",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.7.0",
|
|
4
4
|
"description": "Various operations on Uint8Array data",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"lint": "eslint .",
|
|
@@ -145,6 +145,7 @@
|
|
|
145
145
|
"@exodus/prettier": "^1.0.0",
|
|
146
146
|
"@exodus/test": "^1.0.0-rc.109",
|
|
147
147
|
"@noble/hashes": "^2.0.1",
|
|
148
|
+
"@petamoriken/float16": "^3.9.3",
|
|
148
149
|
"@scure/base": "^1.2.6",
|
|
149
150
|
"@stablelib/base64": "^2.0.1",
|
|
150
151
|
"@stablelib/hex": "^2.0.1",
|
|
@@ -172,6 +173,7 @@
|
|
|
172
173
|
"typescript": "^5.9.3",
|
|
173
174
|
"uint8array-tools": "^0.0.9",
|
|
174
175
|
"utf8": "^3.0.0",
|
|
176
|
+
"web-streams-polyfill": "^4.2.0",
|
|
175
177
|
"whatwg-encoding": "^3.1.1",
|
|
176
178
|
"wif": "^5.0.0"
|
|
177
179
|
},
|
package/single-byte.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
|
-
import { canDecoders } from './fallback/_utils.js'
|
|
3
|
-
import {
|
|
2
|
+
import { canDecoders, nativeEncoder } from './fallback/_utils.js'
|
|
3
|
+
import { encodeAscii } from './fallback/latin1.js'
|
|
4
|
+
import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
4
5
|
|
|
5
6
|
const { TextDecoder } = globalThis
|
|
6
7
|
|
|
@@ -34,6 +35,7 @@ function shouldUseNative(enc) {
|
|
|
34
35
|
}
|
|
35
36
|
|
|
36
37
|
export function createSinglebyteDecoder(encoding, loose = false) {
|
|
38
|
+
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
37
39
|
assertEncoding(encoding)
|
|
38
40
|
|
|
39
41
|
if (canDecoders && shouldUseNative(encoding)) {
|
|
@@ -56,4 +58,45 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
56
58
|
}
|
|
57
59
|
}
|
|
58
60
|
|
|
61
|
+
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
62
|
+
|
|
63
|
+
function encode(s, m) {
|
|
64
|
+
const len = s.length
|
|
65
|
+
const x = new Uint8Array(len)
|
|
66
|
+
for (let i = 0; i < len; i++) {
|
|
67
|
+
const x0 = s.charCodeAt(i)
|
|
68
|
+
const c0 = m[x0]
|
|
69
|
+
if (!c0 && x0) return null
|
|
70
|
+
x[i] = c0
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return x
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
77
|
+
// TODO: replacement, truncate (replacement will need varying length)
|
|
78
|
+
if (mode !== 'fatal') throw new Error('Unsupported mode')
|
|
79
|
+
const m = encodeMap(encoding) // asserts
|
|
80
|
+
|
|
81
|
+
// No single-byte encoder produces surrogate pairs, so any surrogate is invalid
|
|
82
|
+
// This needs special treatment only to decide how many replacement chars to output, one or two
|
|
83
|
+
// Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
|
|
84
|
+
return (s) => {
|
|
85
|
+
if (typeof s !== 'string') throw new TypeError('Input is not a string')
|
|
86
|
+
|
|
87
|
+
// Instead of an ASCII regex check, encode optimistically - this is faster
|
|
88
|
+
// Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
|
|
89
|
+
if (nativeEncoder && !NON_LATIN.test(s)) {
|
|
90
|
+
try {
|
|
91
|
+
return encodeAscii(s, E_STRICT)
|
|
92
|
+
} catch {}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const res = encode(s, m)
|
|
96
|
+
if (!res) throw new TypeError(E_STRICT)
|
|
97
|
+
return res
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
59
101
|
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
102
|
+
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|
package/single-byte.node.js
CHANGED
|
@@ -2,7 +2,7 @@ import { assertUint8 } from './assert.js'
|
|
|
2
2
|
import { isAscii } from 'node:buffer'
|
|
3
3
|
import { isDeno, isLE, toBuf } from './fallback/_utils.js'
|
|
4
4
|
import { asciiPrefix } from './fallback/latin1.js'
|
|
5
|
-
import { encodingMapper, encodingDecoder, E_STRICT } from './fallback/single-byte.js'
|
|
5
|
+
import { encodingMapper, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
|
|
6
6
|
|
|
7
7
|
function latin1Prefix(arr, start) {
|
|
8
8
|
let p = start | 0
|
|
@@ -22,6 +22,7 @@ function latin1Prefix(arr, start) {
|
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
export function createSinglebyteDecoder(encoding, loose = false) {
|
|
25
|
+
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
|
25
26
|
const latin1path = encoding === 'windows-1252'
|
|
26
27
|
if (isDeno) {
|
|
27
28
|
const jsDecoder = encodingDecoder(encoding) // asserts
|
|
@@ -57,4 +58,51 @@ export function createSinglebyteDecoder(encoding, loose = false) {
|
|
|
57
58
|
}
|
|
58
59
|
}
|
|
59
60
|
|
|
61
|
+
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
|
62
|
+
|
|
63
|
+
export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
|
|
64
|
+
// TODO: replacement, truncate (replacement will need varying length)
|
|
65
|
+
if (mode !== 'fatal') throw new Error('Unsupported mode')
|
|
66
|
+
const m = encodeMap(encoding) // asserts
|
|
67
|
+
|
|
68
|
+
return (s) => {
|
|
69
|
+
if (typeof s !== 'string') throw new TypeError('Input is not a string')
|
|
70
|
+
|
|
71
|
+
// Instead of an ASCII regex check, encode optimistically - this is faster
|
|
72
|
+
// Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
|
|
73
|
+
if (!NON_LATIN.test(s)) {
|
|
74
|
+
const b = Buffer.from(s, 'utf8') // ascii/latin1 coerces, we need to check
|
|
75
|
+
if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const len = s.length
|
|
79
|
+
let i = 0
|
|
80
|
+
const b = Buffer.from(s, 'utf-16le') // aligned
|
|
81
|
+
if (!isLE) b.swap16()
|
|
82
|
+
const x = new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
|
|
83
|
+
for (const len3 = len - 3; i < len3; i += 4) {
|
|
84
|
+
const x0 = x[i], x1 = x[i + 1], x2 = x[i + 2], x3 = x[i + 3] // prettier-ignore
|
|
85
|
+
const c0 = m[x0], c1 = m[x1], c2 = m[x2], c3 = m[x3] // prettier-ignore
|
|
86
|
+
if (!(c0 && c1 && c2 && c3) && ((!c0 && x0) || (!c1 && x1) || (!c2 && x2) || (!c3 && x3))) {
|
|
87
|
+
throw new TypeError(E_STRICT)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
x[i] = c0
|
|
91
|
+
x[i + 1] = c1
|
|
92
|
+
x[i + 2] = c2
|
|
93
|
+
x[i + 3] = c3
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
for (; i < len; i++) {
|
|
97
|
+
const x0 = x[i]
|
|
98
|
+
const c0 = m[x0]
|
|
99
|
+
if (!c0 && x0) throw new TypeError(E_STRICT)
|
|
100
|
+
x[i] = c0
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return new Uint8Array(x)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
60
107
|
export const windows1252toString = createSinglebyteDecoder('windows-1252')
|
|
108
|
+
export const windows1252fromString = createSinglebyteEncoder('windows-1252')
|