@exodus/bytes 1.14.1 → 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/single-byte.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { assertU8, E_STRING } from './fallback/_utils.js'
2
2
  import { nativeDecoderLatin1, nativeEncoder } from './fallback/platform.js'
3
- import { encodeAscii, encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
3
+ import { encodeAsciiPrefix, encodeLatin1 } from './fallback/latin1.js'
4
4
  import { assertEncoding, encodingDecoder, encodeMap, E_STRICT } from './fallback/single-byte.js'
5
5
 
6
6
  const { TextDecoder, btoa } = globalThis
@@ -90,37 +90,38 @@ function encode(s, m) {
90
90
  // fromBase64+btoa path is faster on everything where fromBase64 is fast
91
91
  const useLatin1btoa = Uint8Array.fromBase64 && btoa
92
92
 
93
+ export function latin1fromString(s) {
94
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
95
+ // max limit is to not produce base64 strings that are too long
96
+ if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
97
+ try {
98
+ return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
99
+ } catch {
100
+ throw new TypeError(E_STRICT)
101
+ }
102
+ }
103
+
104
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
105
+ return encodeLatin1(s)
106
+ }
107
+
93
108
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
94
109
  // TODO: replacement, truncate (replacement will need varying length)
95
110
  if (mode !== 'fatal') throw new Error('Unsupported mode')
111
+ if (encoding === 'iso-8859-1') return latin1fromString
96
112
  const m = encodeMap(encoding) // asserts
97
- const isLatin1 = encoding === 'iso-8859-1'
98
113
 
99
114
  // No single-byte encoder produces surrogate pairs, so any surrogate is invalid
100
115
  // This needs special treatment only to decide how many replacement chars to output, one or two
101
116
  // Not much use in running isWellFormed, most likely cause of error is unmapped chars, not surrogate pairs
102
117
  return (s) => {
103
118
  if (typeof s !== 'string') throw new TypeError(E_STRING)
104
- if (isLatin1) {
105
- // max limit is to not produce base64 strings that are too long
106
- if (useLatin1btoa && s.length >= 1024 && s.length < 1e8) {
107
- try {
108
- return Uint8Array.fromBase64(btoa(s)) // fails on non-latin1
109
- } catch {
110
- throw new TypeError(E_STRICT)
111
- }
112
- }
113
-
114
- if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
115
- return encodeLatin1(s)
116
- }
117
119
 
118
120
  // Instead of an ASCII regex check, encode optimistically - this is faster
119
121
  // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
120
122
  if (nativeEncoder && !NON_LATIN.test(s)) {
121
- try {
122
- return encodeAscii(s, E_STRICT)
123
- } catch {}
123
+ const u8 = nativeEncoder.encode(s)
124
+ if (u8.length === s.length) return u8
124
125
  }
125
126
 
126
127
  const res = encode(s, m)
@@ -130,6 +131,5 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
130
131
  }
131
132
 
132
133
  export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
133
- export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
134
134
  export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
135
135
  export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
@@ -39,7 +39,7 @@ export function createSinglebyteDecoder(encoding, loose = false) {
39
39
  return (arr) => {
40
40
  assertU8(arr)
41
41
  if (arr.byteLength === 0) return ''
42
- if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice() // .latin1Slice is faster than .asciiSlice
42
+ if (isLatin1 || isAscii(arr)) return toBuf(arr).latin1Slice(0, arr.byteLength) // .latin1Slice is faster than .asciiSlice
43
43
 
44
44
  // Node.js TextDecoder is broken, so we can't use it. It's also slow anyway
45
45
 
@@ -87,25 +87,33 @@ function encode(s, m) {
87
87
  return new Uint8Array(x)
88
88
  }
89
89
 
90
+ export function latin1fromString(s) {
91
+ if (typeof s !== 'string') throw new TypeError(E_STRING)
92
+ if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
93
+ const ab = new ArrayBuffer(s.length)
94
+ Buffer.from(ab).latin1Write(s)
95
+ return new Uint8Array(ab)
96
+ }
97
+
90
98
  export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
91
99
  // TODO: replacement, truncate (replacement will need varying length)
92
100
  if (mode !== 'fatal') throw new Error('Unsupported mode')
101
+ if (encoding === 'iso-8859-1') return latin1fromString
93
102
  const m = encodeMap(encoding) // asserts
94
- const isLatin1 = encoding === 'iso-8859-1'
95
103
 
96
104
  return (s) => {
97
105
  if (typeof s !== 'string') throw new TypeError(E_STRING)
98
- if (isLatin1) {
99
- if (NON_LATIN.test(s)) throw new TypeError(E_STRICT)
100
- const b = Buffer.from(s, 'latin1')
101
- return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
102
- }
103
106
 
104
107
  // Instead of an ASCII regex check, encode optimistically - this is faster
105
108
  // Check for 8-bit string with a regex though, this is instant on 8-bit strings so doesn't hurt the ASCII fast path
106
109
  if (!NON_LATIN.test(s)) {
107
- const b = Buffer.from(s, 'utf8') // ascii/latin1 coerces, we need to check
108
- if (b.length === s.length) return new Uint8Array(b.buffer, b.byteOffset, b.byteLength)
110
+ const byteLength = Buffer.byteLength(s)
111
+ // ascii/latin1 coerces, we need to check
112
+ if (byteLength === s.length) {
113
+ const ab = new ArrayBuffer(byteLength)
114
+ Buffer.from(ab).latin1Write(s)
115
+ return new Uint8Array(ab)
116
+ }
109
117
  }
110
118
 
111
119
  const res = encode(s, m)
@@ -115,6 +123,5 @@ export function createSinglebyteEncoder(encoding, { mode = 'fatal' } = {}) {
115
123
  }
116
124
 
117
125
  export const latin1toString = /* @__PURE__ */ createSinglebyteDecoder('iso-8859-1')
118
- export const latin1fromString = /* @__PURE__ */ createSinglebyteEncoder('iso-8859-1')
119
126
  export const windows1252toString = /* @__PURE__ */ createSinglebyteDecoder('windows-1252')
120
127
  export const windows1252fromString = /* @__PURE__ */ createSinglebyteEncoder('windows-1252')
package/utf16.node.js CHANGED
@@ -21,7 +21,8 @@ function encode(str, loose = false, format = 'uint16') {
21
21
  throw new TypeError(E_STRICT_UNICODE)
22
22
  }
23
23
 
24
- const ble = Buffer.from(str, 'utf-16le')
24
+ const ble = Buffer.allocUnsafeSlow(str.length * 2) // non-pooled
25
+ ble.ucs2Write(str)
25
26
 
26
27
  if (format === 'uint8-le') return to8(ble)
27
28
  if (format === 'uint8-be') return to8(ble.swap16())
package/utf8.d.ts CHANGED
@@ -35,8 +35,9 @@ import type { OutputFormat, Uint8ArrayBuffer } from './array.js';
35
35
  * @returns The encoded bytes
36
36
  */
37
37
  export function utf8fromString(string: string, format?: 'uint8'): Uint8ArrayBuffer;
38
+ export function utf8fromString(string: string, format: 'arraybuffer'): ArrayBuffer;
38
39
  export function utf8fromString(string: string, format: 'buffer'): Buffer;
39
- export function utf8fromString(string: string, format?: OutputFormat): Uint8ArrayBuffer | Buffer;
40
+ export function utf8fromString(string: string, format?: OutputFormat): Uint8ArrayBuffer | ArrayBuffer | Buffer;
40
41
 
41
42
  /**
42
43
  * Encode a string to UTF-8 bytes (loose mode)
@@ -59,11 +60,12 @@ export function utf8fromString(string: string, format?: OutputFormat): Uint8Arra
59
60
  * @returns The encoded bytes
60
61
  */
61
62
  export function utf8fromStringLoose(string: string, format?: 'uint8'): Uint8ArrayBuffer;
63
+ export function utf8fromStringLoose(string: string, format: 'arraybuffer'): ArrayBuffer;
62
64
  export function utf8fromStringLoose(string: string, format: 'buffer'): Buffer;
63
65
  export function utf8fromStringLoose(
64
66
  string: string,
65
67
  format?: OutputFormat
66
- ): Uint8ArrayBuffer | Buffer;
68
+ ): Uint8ArrayBuffer | ArrayBuffer | Buffer;
67
69
 
68
70
  /**
69
71
  * Decode UTF-8 bytes to a string (strict mode)
package/utf8.js CHANGED
@@ -1,5 +1,4 @@
1
- import { typedView } from './array.js'
2
- import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
1
+ import { assertU8, fromUint8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
3
2
  import { nativeDecoder, nativeEncoder } from './fallback/platform.js'
4
3
  import * as js from './fallback/utf8.auto.js'
5
4
 
@@ -60,7 +59,7 @@ function decode(arr, loose = false) {
60
59
  return js.decodeFast(arr, loose)
61
60
  }
62
61
 
63
- export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
64
- export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
62
+ export const utf8fromString = (str, format = 'uint8') => fromUint8(encode(str, false), format)
63
+ export const utf8fromStringLoose = (str, format = 'uint8') => fromUint8(encode(str, true), format)
65
64
  export const utf8toString = (arr) => decode(arr, false)
66
65
  export const utf8toStringLoose = (arr) => decode(arr, true)
package/utf8.node.js CHANGED
@@ -1,5 +1,4 @@
1
- import { typedView } from './array.js'
2
- import { assertU8, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
1
+ import { assertU8, fromBuffer, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
3
2
  import { E_STRICT } from './fallback/utf8.js'
4
3
  import { isAscii } from 'node:buffer'
5
4
 
@@ -16,7 +15,7 @@ try {
16
15
  // Without ICU, Node.js doesn't support fatal option for utf-8
17
16
  }
18
17
 
19
- function encode(str, loose = false) {
18
+ function encode(str, loose, format) {
20
19
  if (typeof str !== 'string') throw new TypeError(E_STRING)
21
20
  const strLength = str.length
22
21
  if (strLength === 0) return new Uint8Array() // faster than Uint8Array.of
@@ -24,18 +23,19 @@ function encode(str, loose = false) {
24
23
  if (strLength > 0x4_00 && !isDeno) {
25
24
  // Faster for large strings
26
25
  const byteLength = Buffer.byteLength(str)
27
- res = Buffer.allocUnsafe(byteLength)
28
- const ascii = byteLength === strLength
29
- const written = ascii ? res.latin1Write(str) : res.utf8Write(str)
26
+ res = format === 'buffer' ? Buffer.allocUnsafe(byteLength) : Buffer.allocUnsafeSlow(byteLength)
27
+ const written = byteLength === strLength ? res.latin1Write(str) : res.utf8Write(str)
30
28
  if (written !== byteLength) throw new Error('Failed to write all bytes') // safeguard just in case
31
- if (ascii || loose) return res // no further checks needed
32
29
  } else {
33
30
  res = Buffer.from(str)
34
- if (res.length === strLength || loose) return res
35
31
  }
36
32
 
37
- if (!isWellFormed.call(str)) throw new TypeError(E_STRICT_UNICODE)
38
- return res
33
+ // Loose and ascii do not need the check
34
+ if (!loose && res.length !== strLength && !isWellFormed.call(str)) {
35
+ throw new TypeError(E_STRICT_UNICODE)
36
+ }
37
+
38
+ return fromBuffer(res, format)
39
39
  }
40
40
 
41
41
  function decode(arr, loose = false) {
@@ -61,7 +61,7 @@ function decode(arr, loose = false) {
61
61
  return str
62
62
  }
63
63
 
64
- export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
65
- export const utf8fromStringLoose = (str, format = 'uint8') => typedView(encode(str, true), format)
64
+ export const utf8fromString = (str, format = 'uint8') => encode(str, false, format)
65
+ export const utf8fromStringLoose = (str, format = 'uint8') => encode(str, true, format)
66
66
  export const utf8toString = (arr) => decode(arr, false)
67
67
  export const utf8toStringLoose = (arr) => decode(arr, true)