@exodus/bytes 1.0.0-rc.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +286 -4
- package/array.d.ts +24 -0
- package/base58.js +16 -8
- package/base64.d.ts +76 -0
- package/bigint.js +14 -0
- package/encoding-lite.js +7 -0
- package/encoding.js +12 -0
- package/fallback/_utils.js +100 -10
- package/fallback/encoding.js +290 -0
- package/fallback/encoding.labels.js +46 -0
- package/fallback/encoding.util.js +34 -0
- package/fallback/hex.js +2 -70
- package/fallback/latin1.js +2 -1
- package/fallback/multi-byte.encodings.cjs +1 -0
- package/fallback/multi-byte.encodings.json +545 -0
- package/fallback/multi-byte.js +448 -0
- package/fallback/multi-byte.table.js +114 -0
- package/fallback/single-byte.encodings.js +61 -0
- package/fallback/single-byte.js +86 -0
- package/fallback/utf16.js +180 -0
- package/hex.d.ts +22 -0
- package/hex.node.js +2 -0
- package/multi-byte.js +13 -0
- package/multi-byte.node.js +25 -0
- package/package.json +62 -13
- package/single-byte.js +54 -0
- package/single-byte.node.js +62 -0
- package/utf16.js +73 -0
- package/utf16.node.js +79 -0
- package/utf8.d.ts +42 -0
- package/utf8.js +7 -9
- package/utf8.node.js +8 -5
package/utf16.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import * as js from './fallback/utf16.js'
|
|
2
|
+
import { canDecoders, isLE } from './fallback/_utils.js'
|
|
3
|
+
|
|
4
|
+
const { TextDecoder } = globalThis // Buffer is optional
|
|
5
|
+
const ignoreBOM = true
|
|
6
|
+
const decoderFatalLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM, fatal: true }) : null
|
|
7
|
+
const decoderLooseLE = canDecoders ? new TextDecoder('utf-16le', { ignoreBOM }) : null
|
|
8
|
+
const decoderFatalBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM, fatal: true }) : null
|
|
9
|
+
const decoderLooseBE = canDecoders ? new TextDecoder('utf-16be', { ignoreBOM }) : null
|
|
10
|
+
const decoderFatal16 = isLE ? decoderFatalLE : decoderFatalBE
|
|
11
|
+
const decoderLoose16 = isLE ? decoderLooseLE : decoderFatalBE
|
|
12
|
+
const { isWellFormed } = String.prototype
|
|
13
|
+
|
|
14
|
+
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
15
|
+
|
|
16
|
+
// Unlike utf8, operates on Uint16Arrays by default
|
|
17
|
+
|
|
18
|
+
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
19
|
+
|
|
20
|
+
function encode(str, loose = false, format = 'uint16') {
|
|
21
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
22
|
+
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
23
|
+
throw new TypeError('Unknown format')
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
|
27
|
+
|
|
28
|
+
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
|
29
|
+
// On JSC, check during loop is faster than isWellFormed
|
|
30
|
+
// If isWellFormed is available, we skip check during decoding and recheck after
|
|
31
|
+
// If isWellFormed is unavailable, we check in js during decoding
|
|
32
|
+
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
|
33
|
+
const u16 = js.encode(str, loose, !loose && isWellFormed, shouldSwap)
|
|
34
|
+
|
|
35
|
+
if (format === 'uint8-le' || format === 'uint8-be') return to8(u16) // Already swapped
|
|
36
|
+
if (format === 'uint16') return u16
|
|
37
|
+
throw new Error('Unreachable')
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function decode(input, loose = false, format = 'uint16') {
|
|
41
|
+
let u16
|
|
42
|
+
switch (format) {
|
|
43
|
+
case 'uint16':
|
|
44
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
45
|
+
if (canDecoders) return loose ? decoderLoose16.decode(input) : decoderFatal16.decode(input)
|
|
46
|
+
u16 = input
|
|
47
|
+
break
|
|
48
|
+
case 'uint8-le':
|
|
49
|
+
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
50
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
51
|
+
if (canDecoders) return loose ? decoderLooseLE.decode(input) : decoderFatalLE.decode(input)
|
|
52
|
+
u16 = js.to16input(input, true)
|
|
53
|
+
break
|
|
54
|
+
case 'uint8-be':
|
|
55
|
+
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
56
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
57
|
+
if (canDecoders) return loose ? decoderLooseBE.decode(input) : decoderFatalBE.decode(input)
|
|
58
|
+
u16 = js.to16input(input, false)
|
|
59
|
+
break
|
|
60
|
+
default:
|
|
61
|
+
throw new TypeError('Unknown format')
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const str = js.decode(u16, loose, !loose && isWellFormed)
|
|
65
|
+
if (!loose && isWellFormed && !isWellFormed.call(str)) throw new TypeError(E_STRICT)
|
|
66
|
+
|
|
67
|
+
return str
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
71
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
|
72
|
+
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
73
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
package/utf16.node.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { nativeDecoder, isDeno, isLE } from './fallback/_utils.js'
|
|
2
|
+
import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf16.js'
|
|
3
|
+
|
|
4
|
+
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
5
|
+
|
|
6
|
+
const { isWellFormed } = String.prototype
|
|
7
|
+
const to8 = (a) => new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
|
8
|
+
|
|
9
|
+
// Unlike utf8, operates on Uint16Arrays by default
|
|
10
|
+
|
|
11
|
+
function encode(str, loose = false, format = 'uint16') {
|
|
12
|
+
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
13
|
+
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
|
14
|
+
throw new TypeError('Unknown format')
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (!isWellFormed.call(str)) {
|
|
18
|
+
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
|
19
|
+
str = nativeDecoder.decode(Buffer.from(str)) // well, let's fix up (Buffer doesn't do this with utf16 encoding)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const ble = Buffer.from(str, 'utf-16le')
|
|
23
|
+
|
|
24
|
+
if (format === 'uint8-le') return to8(ble)
|
|
25
|
+
if (format === 'uint8-be') return to8(ble.swap16())
|
|
26
|
+
if (format === 'uint16') {
|
|
27
|
+
const b = ble.byteOffset % 2 === 0 ? ble : Buffer.from(ble) // it should be already aligned, but just in case
|
|
28
|
+
if (!isLE) b.swap16()
|
|
29
|
+
return new Uint16Array(b.buffer, b.byteOffset, b.byteLength / 2)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
throw new Error('Unreachable')
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const swapped = (x, swap) =>
|
|
36
|
+
swap ? Buffer.from(x).swap16() : Buffer.from(x.buffer, x.byteOffset, x.byteLength)
|
|
37
|
+
|
|
38
|
+
// We skip TextDecoder on Node.js, as it's is somewhy significantly slower than Buffer for utf16
|
|
39
|
+
function decodeNode(input, loose = false, format = 'uint16') {
|
|
40
|
+
let ble
|
|
41
|
+
if (format === 'uint16') {
|
|
42
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
43
|
+
ble = swapped(input, !isLE)
|
|
44
|
+
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
45
|
+
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
46
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
47
|
+
ble = swapped(input, format === 'uint8-be')
|
|
48
|
+
} else {
|
|
49
|
+
throw new TypeError('Unknown format')
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const str = ble.ucs2Slice(0, ble.byteLength)
|
|
53
|
+
if (isWellFormed.call(str)) return str
|
|
54
|
+
if (!loose) throw new TypeError(E_STRICT)
|
|
55
|
+
return nativeDecoder.decode(Buffer.from(str)) // fixup (see above)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function decodeDecoder(input, loose = false, format = 'uint16') {
|
|
59
|
+
let encoding
|
|
60
|
+
if (format === 'uint16') {
|
|
61
|
+
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
|
62
|
+
encoding = isLE ? 'utf-16le' : 'utf-16be'
|
|
63
|
+
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
|
64
|
+
if (!(input instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
|
65
|
+
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
|
66
|
+
encoding = format === 'uint8-le' ? 'utf-16le' : 'utf-16be'
|
|
67
|
+
} else {
|
|
68
|
+
throw new TypeError('Unknown format')
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return new TextDecoder(encoding, { ignoreBOM: true, fatal: !loose }).decode(input) // TODO: cache decoder?
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const decode = isDeno ? decodeDecoder : decodeNode
|
|
75
|
+
|
|
76
|
+
export const utf16fromString = (str, format = 'uint16') => encode(str, false, format)
|
|
77
|
+
export const utf16fromStringLoose = (str, format = 'uint16') => encode(str, true, format)
|
|
78
|
+
export const utf16toString = (arr, format = 'uint16') => decode(arr, false, format)
|
|
79
|
+
export const utf16toStringLoose = (arr, format = 'uint16') => decode(arr, true, format)
|
package/utf8.d.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
|
|
3
|
+
import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Encodes a string to UTF-8 bytes (strict mode)
|
|
7
|
+
* Throws on invalid Unicode (unpaired surrogates)
|
|
8
|
+
* @param str - The string to encode
|
|
9
|
+
* @param format - Output format (default: 'uint8')
|
|
10
|
+
* @returns The encoded bytes
|
|
11
|
+
*/
|
|
12
|
+
export function utf8fromString(str: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
13
|
+
export function utf8fromString(str: string, format: 'buffer'): Buffer;
|
|
14
|
+
export function utf8fromString(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Encodes a string to UTF-8 bytes (loose mode)
|
|
18
|
+
* Replaces invalid Unicode with replacement character
|
|
19
|
+
* @param str - The string to encode
|
|
20
|
+
* @param format - Output format (default: 'uint8')
|
|
21
|
+
* @returns The encoded bytes
|
|
22
|
+
*/
|
|
23
|
+
export function utf8fromStringLoose(str: string, format?: 'uint8'): Uint8ArrayBuffer;
|
|
24
|
+
export function utf8fromStringLoose(str: string, format: 'buffer'): Buffer;
|
|
25
|
+
export function utf8fromStringLoose(str: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Decodes UTF-8 bytes to a string (strict mode)
|
|
29
|
+
* Throws on invalid UTF-8 sequences
|
|
30
|
+
* @param arr - The bytes to decode
|
|
31
|
+
* @returns The decoded string
|
|
32
|
+
*/
|
|
33
|
+
export function utf8toString(arr: Uint8ArrayBuffer): string;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Decodes UTF-8 bytes to a string (loose mode)
|
|
37
|
+
* Replaces invalid sequences with replacement character
|
|
38
|
+
* @param arr - The bytes to decode
|
|
39
|
+
* @returns The decoded string
|
|
40
|
+
*/
|
|
41
|
+
export function utf8toStringLoose(arr: Uint8ArrayBuffer): string;
|
|
42
|
+
|
package/utf8.js
CHANGED
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
import { assertUint8 } from './assert.js'
|
|
2
2
|
import { typedView } from './array.js'
|
|
3
|
-
import { isHermes } from './fallback/_utils.js'
|
|
3
|
+
import { isHermes, nativeDecoder, nativeEncoder } from './fallback/_utils.js'
|
|
4
4
|
import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
|
|
5
5
|
import * as js from './fallback/utf8.js'
|
|
6
6
|
|
|
7
|
-
const {
|
|
8
|
-
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
|
9
|
-
const isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]')) // we consider Node.js TextDecoder/TextEncoder native
|
|
10
|
-
const haveDecoder = isNative(TextDecoder)
|
|
11
|
-
const nativeEncoder = isNative(TextEncoder) ? new TextEncoder() : null
|
|
7
|
+
const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
|
|
12
8
|
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
|
|
13
9
|
// We don't want to strip anything unexpectedly
|
|
14
|
-
const
|
|
15
|
-
const
|
|
10
|
+
const decoderLoose = nativeDecoder
|
|
11
|
+
const decoderFatal = nativeDecoder
|
|
12
|
+
? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true })
|
|
13
|
+
: null
|
|
16
14
|
const { isWellFormed } = String.prototype
|
|
17
15
|
|
|
18
16
|
const { E_STRICT, E_STRICT_UNICODE } = js
|
|
@@ -56,7 +54,7 @@ function encode(str, loose = false) {
|
|
|
56
54
|
function decode(arr, loose = false) {
|
|
57
55
|
assertUint8(arr)
|
|
58
56
|
if (arr.byteLength === 0) return ''
|
|
59
|
-
if (
|
|
57
|
+
if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
|
|
60
58
|
|
|
61
59
|
// Fast path for ASCII prefix, this is faster than all alternatives below
|
|
62
60
|
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr))
|
package/utf8.node.js
CHANGED
|
@@ -5,16 +5,17 @@ import { isAscii } from 'node:buffer'
|
|
|
5
5
|
|
|
6
6
|
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')
|
|
7
7
|
|
|
8
|
-
const decoderFatal = new TextDecoder('
|
|
9
|
-
const decoderLoose = new TextDecoder('
|
|
8
|
+
const decoderFatal = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true })
|
|
9
|
+
const decoderLoose = new TextDecoder('utf-8', { ignoreBOM: true })
|
|
10
10
|
const { isWellFormed } = String.prototype
|
|
11
|
+
const isDeno = Boolean(globalThis.Deno)
|
|
11
12
|
|
|
12
13
|
function encode(str, loose = false) {
|
|
13
14
|
if (typeof str !== 'string') throw new TypeError('Input is not a string')
|
|
14
15
|
const strLength = str.length
|
|
15
16
|
if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
|
|
16
17
|
let res
|
|
17
|
-
if (strLength > 0x4_00) {
|
|
18
|
+
if (strLength > 0x4_00 && !isDeno) {
|
|
18
19
|
// Faster for large strings
|
|
19
20
|
const byteLength = Buffer.byteLength(str)
|
|
20
21
|
res = Buffer.allocUnsafe(byteLength)
|
|
@@ -35,11 +36,13 @@ function decode(arr, loose = false) {
|
|
|
35
36
|
assertUint8(arr)
|
|
36
37
|
const byteLength = arr.byteLength
|
|
37
38
|
if (byteLength === 0) return ''
|
|
38
|
-
if (byteLength > 0x6_00 && isAscii(arr)) {
|
|
39
|
+
if (byteLength > 0x6_00 && !(isDeno && loose) && isAscii(arr)) {
|
|
39
40
|
// On non-ascii strings, this loses ~10% * [relative position of the first non-ascii byte] (up to 10% total)
|
|
40
41
|
// On ascii strings, this wins 1.5x on loose = false and 1.3x on loose = true
|
|
41
42
|
// Only makes sense for large enough strings
|
|
42
|
-
|
|
43
|
+
const buf = Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength)
|
|
44
|
+
if (isDeno) return buf.toString() // Deno suffers from .latin1Slice
|
|
45
|
+
return buf.latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
|
|
43
46
|
}
|
|
44
47
|
|
|
45
48
|
return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr)
|