@qevm/strings 5.7.0 → 5.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src.ts/utf8.ts CHANGED
@@ -2,67 +2,92 @@
2
2
 
3
3
  import { arrayify, BytesLike } from "@qevm/bytes";
4
4
 
5
- import { Logger } from "@ethersproject/logger";
5
+ import { Logger } from "@qevm/logger";
6
6
  import { version } from "./_version";
7
7
  const logger = new Logger(version);
8
8
 
9
9
  ///////////////////////////////
10
10
 
11
11
  export enum UnicodeNormalizationForm {
12
- current = "",
13
- NFC = "NFC",
14
- NFD = "NFD",
15
- NFKC = "NFKC",
16
- NFKD = "NFKD"
17
- };
12
+ current = "",
13
+ NFC = "NFC",
14
+ NFD = "NFD",
15
+ NFKC = "NFKC",
16
+ NFKD = "NFKD",
17
+ }
18
18
 
19
19
  export enum Utf8ErrorReason {
20
20
  // A continuation byte was present where there was nothing to continue
21
21
  // - offset = the index the codepoint began in
22
- UNEXPECTED_CONTINUE = "unexpected continuation byte",
22
+ UNEXPECTED_CONTINUE = "unexpected continuation byte",
23
23
 
24
24
  // An invalid (non-continuation) byte to start a UTF-8 codepoint was found
25
25
  // - offset = the index the codepoint began in
26
- BAD_PREFIX = "bad codepoint prefix",
26
+ BAD_PREFIX = "bad codepoint prefix",
27
27
 
28
28
  // The string is too short to process the expected codepoint
29
29
  // - offset = the index the codepoint began in
30
- OVERRUN = "string overrun",
30
+ OVERRUN = "string overrun",
31
31
 
32
32
  // A missing continuation byte was expected but not found
33
33
  // - offset = the index the continuation byte was expected at
34
- MISSING_CONTINUE = "missing continuation byte",
34
+ MISSING_CONTINUE = "missing continuation byte",
35
35
 
36
36
  // The computed code point is outside the range for UTF-8
37
37
  // - offset = start of this codepoint
38
38
  // - badCodepoint = the computed codepoint; outside the UTF-8 range
39
- OUT_OF_RANGE = "out of UTF-8 range",
39
+ OUT_OF_RANGE = "out of UTF-8 range",
40
40
 
41
41
  // UTF-8 strings may not contain UTF-16 surrogate pairs
42
42
  // - offset = start of this codepoint
43
43
  // - badCodepoint = the computed codepoint; inside the UTF-16 surrogate range
44
- UTF16_SURROGATE = "UTF-16 surrogate",
44
+ UTF16_SURROGATE = "UTF-16 surrogate",
45
45
 
46
46
  // The string is an overlong representation
47
47
  // - offset = start of this codepoint
48
48
  // - badCodepoint = the computed codepoint; already bounds checked
49
- OVERLONG = "overlong representation",
50
- };
51
-
52
-
53
- export type Utf8ErrorFunc = (reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number) => number;
54
-
55
- function errorFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number): number {
56
- return logger.throwArgumentError(`invalid codepoint at offset ${ offset }; ${ reason }`, "bytes", bytes);
49
+ OVERLONG = "overlong representation",
57
50
  }
58
51
 
59
- function ignoreFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number): number {
52
+ export type Utf8ErrorFunc = (
53
+ reason: Utf8ErrorReason,
54
+ offset: number,
55
+ bytes: ArrayLike<number>,
56
+ output: Array<number>,
57
+ badCodepoint?: number,
58
+ ) => number;
59
+
60
+ function errorFunc(
61
+ reason: Utf8ErrorReason,
62
+ offset: number,
63
+ bytes: ArrayLike<number>,
64
+ output: Array<number>,
65
+ badCodepoint?: number,
66
+ ): number {
67
+ return logger.throwArgumentError(
68
+ `invalid codepoint at offset ${offset}; ${reason}`,
69
+ "bytes",
70
+ bytes,
71
+ );
72
+ }
60
73
 
74
+ function ignoreFunc(
75
+ reason: Utf8ErrorReason,
76
+ offset: number,
77
+ bytes: ArrayLike<number>,
78
+ output: Array<number>,
79
+ badCodepoint?: number,
80
+ ): number {
61
81
  // If there is an invalid prefix (including stray continuation), skip any additional continuation bytes
62
- if (reason === Utf8ErrorReason.BAD_PREFIX || reason === Utf8ErrorReason.UNEXPECTED_CONTINUE) {
82
+ if (
83
+ reason === Utf8ErrorReason.BAD_PREFIX ||
84
+ reason === Utf8ErrorReason.UNEXPECTED_CONTINUE
85
+ ) {
63
86
  let i = 0;
64
87
  for (let o = offset + 1; o < bytes.length; o++) {
65
- if (bytes[o] >> 6 !== 0x02) { break; }
88
+ if (bytes[o] >> 6 !== 0x02) {
89
+ break;
90
+ }
66
91
  i++;
67
92
  }
68
93
  return i;
@@ -78,8 +103,13 @@ function ignoreFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<nu
78
103
  return 0;
79
104
  }
80
105
 
81
- function replaceFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number): number {
82
-
106
+ function replaceFunc(
107
+ reason: Utf8ErrorReason,
108
+ offset: number,
109
+ bytes: ArrayLike<number>,
110
+ output: Array<number>,
111
+ badCodepoint?: number,
112
+ ): number {
83
113
  // Overlong representations are otherwise "valid" code points; just non-deistingtished
84
114
  if (reason === Utf8ErrorReason.OVERLONG) {
85
115
  output.push(badCodepoint);
@@ -94,15 +124,20 @@ function replaceFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<n
94
124
  }
95
125
 
96
126
  // Common error handing strategies
97
- export const Utf8ErrorFuncs: { [ name: string ]: Utf8ErrorFunc } = Object.freeze({
127
+ export const Utf8ErrorFuncs: { [name: string]: Utf8ErrorFunc } = Object.freeze({
98
128
  error: errorFunc,
99
129
  ignore: ignoreFunc,
100
- replace: replaceFunc
130
+ replace: replaceFunc,
101
131
  });
102
132
 
103
133
  // http://stackoverflow.com/questions/13356493/decode-utf-8-with-javascript#13691499
104
- function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<number> {
105
- if (onError == null) { onError = Utf8ErrorFuncs.error; }
134
+ function getUtf8CodePoints(
135
+ bytes: BytesLike,
136
+ onError?: Utf8ErrorFunc,
137
+ ): Array<number> {
138
+ if (onError == null) {
139
+ onError = Utf8ErrorFuncs.error;
140
+ }
106
141
 
107
142
  bytes = arrayify(bytes);
108
143
 
@@ -110,8 +145,7 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
110
145
  let i = 0;
111
146
 
112
147
  // Invalid bytes are ignored
113
- while(i < bytes.length) {
114
-
148
+ while (i < bytes.length) {
115
149
  const c = bytes[i++];
116
150
 
117
151
  // 0xxx xxxx
@@ -129,19 +163,23 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
129
163
  extraLength = 1;
130
164
  overlongMask = 0x7f;
131
165
 
132
- // 1110 xxxx 10xx xxxx 10xx xxxx
166
+ // 1110 xxxx 10xx xxxx 10xx xxxx
133
167
  } else if ((c & 0xf0) === 0xe0) {
134
168
  extraLength = 2;
135
169
  overlongMask = 0x7ff;
136
170
 
137
- // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
171
+ // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
138
172
  } else if ((c & 0xf8) === 0xf0) {
139
173
  extraLength = 3;
140
174
  overlongMask = 0xffff;
141
-
142
175
  } else {
143
176
  if ((c & 0xc0) === 0x80) {
144
- i += onError(Utf8ErrorReason.UNEXPECTED_CONTINUE, i - 1, bytes, result);
177
+ i += onError(
178
+ Utf8ErrorReason.UNEXPECTED_CONTINUE,
179
+ i - 1,
180
+ bytes,
181
+ result,
182
+ );
145
183
  } else {
146
184
  i += onError(Utf8ErrorReason.BAD_PREFIX, i - 1, bytes, result);
147
185
  }
@@ -162,33 +200,58 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
162
200
 
163
201
  // Invalid continuation byte
164
202
  if ((nextChar & 0xc0) != 0x80) {
165
- i += onError(Utf8ErrorReason.MISSING_CONTINUE, i, bytes, result);
203
+ i += onError(
204
+ Utf8ErrorReason.MISSING_CONTINUE,
205
+ i,
206
+ bytes,
207
+ result,
208
+ );
166
209
  res = null;
167
210
  break;
168
- };
211
+ }
169
212
 
170
213
  res = (res << 6) | (nextChar & 0x3f);
171
214
  i++;
172
215
  }
173
216
 
174
217
  // See above loop for invalid continuation byte
175
- if (res === null) { continue; }
218
+ if (res === null) {
219
+ continue;
220
+ }
176
221
 
177
222
  // Maximum code point
178
223
  if (res > 0x10ffff) {
179
- i += onError(Utf8ErrorReason.OUT_OF_RANGE, i - 1 - extraLength, bytes, result, res);
224
+ i += onError(
225
+ Utf8ErrorReason.OUT_OF_RANGE,
226
+ i - 1 - extraLength,
227
+ bytes,
228
+ result,
229
+ res,
230
+ );
180
231
  continue;
181
232
  }
182
233
 
183
234
  // Reserved for UTF-16 surrogate halves
184
235
  if (res >= 0xd800 && res <= 0xdfff) {
185
- i += onError(Utf8ErrorReason.UTF16_SURROGATE, i - 1 - extraLength, bytes, result, res);
236
+ i += onError(
237
+ Utf8ErrorReason.UTF16_SURROGATE,
238
+ i - 1 - extraLength,
239
+ bytes,
240
+ result,
241
+ res,
242
+ );
186
243
  continue;
187
244
  }
188
245
 
189
246
  // Check for overlong sequences (more bytes than needed)
190
247
  if (res <= overlongMask) {
191
- i += onError(Utf8ErrorReason.OVERLONG, i - 1 - extraLength, bytes, result, res);
248
+ i += onError(
249
+ Utf8ErrorReason.OVERLONG,
250
+ i - 1 - extraLength,
251
+ bytes,
252
+ result,
253
+ res,
254
+ );
192
255
  continue;
193
256
  }
194
257
 
@@ -199,8 +262,10 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
199
262
  }
200
263
 
201
264
  // http://stackoverflow.com/questions/18729405/how-to-convert-utf8-string-to-byte-array
202
- export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = UnicodeNormalizationForm.current): Uint8Array {
203
-
265
+ export function toUtf8Bytes(
266
+ str: string,
267
+ form: UnicodeNormalizationForm = UnicodeNormalizationForm.current,
268
+ ): Uint8Array {
204
269
  if (form != UnicodeNormalizationForm.current) {
205
270
  logger.checkNormalize();
206
271
  str = str.normalize(form);
@@ -212,11 +277,9 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
212
277
 
213
278
  if (c < 0x80) {
214
279
  result.push(c);
215
-
216
280
  } else if (c < 0x800) {
217
281
  result.push((c >> 6) | 0xc0);
218
282
  result.push((c & 0x3f) | 0x80);
219
-
220
283
  } else if ((c & 0xfc00) == 0xd800) {
221
284
  i++;
222
285
  const c2 = str.charCodeAt(i);
@@ -231,7 +294,6 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
231
294
  result.push(((pair >> 12) & 0x3f) | 0x80);
232
295
  result.push(((pair >> 6) & 0x3f) | 0x80);
233
296
  result.push((pair & 0x3f) | 0x80);
234
-
235
297
  } else {
236
298
  result.push((c >> 12) | 0xe0);
237
299
  result.push(((c >> 6) & 0x3f) | 0x80);
@@ -240,56 +302,82 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
240
302
  }
241
303
 
242
304
  return arrayify(result);
243
- };
305
+ }
244
306
 
245
307
  function escapeChar(value: number) {
246
- const hex = ("0000" + value.toString(16));
308
+ const hex = "0000" + value.toString(16);
247
309
  return "\\u" + hex.substring(hex.length - 4);
248
310
  }
249
311
 
250
- export function _toEscapedUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string {
251
- return '"' + getUtf8CodePoints(bytes, onError).map((codePoint) => {
252
- if (codePoint < 256) {
253
- switch (codePoint) {
254
- case 8: return "\\b";
255
- case 9: return "\\t";
256
- case 10: return "\\n"
257
- case 13: return "\\r";
258
- case 34: return "\\\"";
259
- case 92: return "\\\\";
260
- }
261
-
262
- if (codePoint >= 32 && codePoint < 127) {
263
- return String.fromCharCode(codePoint);
264
- }
265
- }
266
-
267
- if (codePoint <= 0xffff) {
268
- return escapeChar(codePoint);
269
- }
270
-
271
- codePoint -= 0x10000;
272
- return escapeChar(((codePoint >> 10) & 0x3ff) + 0xd800) + escapeChar((codePoint & 0x3ff) + 0xdc00);
273
- }).join("") + '"';
312
+ export function _toEscapedUtf8String(
313
+ bytes: BytesLike,
314
+ onError?: Utf8ErrorFunc,
315
+ ): string {
316
+ return (
317
+ '"' +
318
+ getUtf8CodePoints(bytes, onError)
319
+ .map((codePoint) => {
320
+ if (codePoint < 256) {
321
+ switch (codePoint) {
322
+ case 8:
323
+ return "\\b";
324
+ case 9:
325
+ return "\\t";
326
+ case 10:
327
+ return "\\n";
328
+ case 13:
329
+ return "\\r";
330
+ case 34:
331
+ return '\\"';
332
+ case 92:
333
+ return "\\\\";
334
+ }
335
+
336
+ if (codePoint >= 32 && codePoint < 127) {
337
+ return String.fromCharCode(codePoint);
338
+ }
339
+ }
340
+
341
+ if (codePoint <= 0xffff) {
342
+ return escapeChar(codePoint);
343
+ }
344
+
345
+ codePoint -= 0x10000;
346
+ return (
347
+ escapeChar(((codePoint >> 10) & 0x3ff) + 0xd800) +
348
+ escapeChar((codePoint & 0x3ff) + 0xdc00)
349
+ );
350
+ })
351
+ .join("") +
352
+ '"'
353
+ );
274
354
  }
275
355
 
276
356
  export function _toUtf8String(codePoints: Array<number>): string {
277
- return codePoints.map((codePoint) => {
278
- if (codePoint <= 0xffff) {
279
- return String.fromCharCode(codePoint);
280
- }
281
- codePoint -= 0x10000;
282
- return String.fromCharCode(
283
- (((codePoint >> 10) & 0x3ff) + 0xd800),
284
- ((codePoint & 0x3ff) + 0xdc00)
285
- );
286
- }).join("");
357
+ return codePoints
358
+ .map((codePoint) => {
359
+ if (codePoint <= 0xffff) {
360
+ return String.fromCharCode(codePoint);
361
+ }
362
+ codePoint -= 0x10000;
363
+ return String.fromCharCode(
364
+ ((codePoint >> 10) & 0x3ff) + 0xd800,
365
+ (codePoint & 0x3ff) + 0xdc00,
366
+ );
367
+ })
368
+ .join("");
287
369
  }
288
370
 
289
- export function toUtf8String(bytes: BytesLike, onError?: Utf8ErrorFunc): string {
371
+ export function toUtf8String(
372
+ bytes: BytesLike,
373
+ onError?: Utf8ErrorFunc,
374
+ ): string {
290
375
  return _toUtf8String(getUtf8CodePoints(bytes, onError));
291
376
  }
292
377
 
293
- export function toUtf8CodePoints(str: string, form: UnicodeNormalizationForm = UnicodeNormalizationForm.current): Array<number> {
378
+ export function toUtf8CodePoints(
379
+ str: string,
380
+ form: UnicodeNormalizationForm = UnicodeNormalizationForm.current,
381
+ ): Array<number> {
294
382
  return getUtf8CodePoints(toUtf8Bytes(str, form));
295
383
  }