@qevm/strings 5.7.0 → 5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -4
- package/lib/_version.d.ts +1 -1
- package/lib/_version.js +1 -1
- package/lib/bytes32.d.ts.map +1 -1
- package/lib/bytes32.js +6 -6
- package/lib/bytes32.js.map +1 -1
- package/lib/idna.d.ts.map +1 -1
- package/lib/idna.js +48 -38
- package/lib/idna.js.map +1 -1
- package/lib/index.d.ts +1 -1
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +3 -3
- package/lib/index.js.map +1 -1
- package/lib/utf8.d.ts.map +1 -1
- package/lib/utf8.js +60 -52
- package/lib/utf8.js.map +1 -1
- package/package.json +31 -27
- package/src.ts/_version.ts +1 -1
- package/src.ts/bytes32.ts +14 -9
- package/src.ts/idna.ts +112 -58
- package/src.ts/index.ts +12 -7
- package/src.ts/utf8.ts +173 -85
package/src.ts/utf8.ts
CHANGED
|
@@ -2,67 +2,92 @@
|
|
|
2
2
|
|
|
3
3
|
import { arrayify, BytesLike } from "@qevm/bytes";
|
|
4
4
|
|
|
5
|
-
import { Logger } from "@
|
|
5
|
+
import { Logger } from "@qevm/logger";
|
|
6
6
|
import { version } from "./_version";
|
|
7
7
|
const logger = new Logger(version);
|
|
8
8
|
|
|
9
9
|
///////////////////////////////
|
|
10
10
|
|
|
11
11
|
export enum UnicodeNormalizationForm {
|
|
12
|
-
current
|
|
13
|
-
NFC
|
|
14
|
-
NFD
|
|
15
|
-
NFKC
|
|
16
|
-
NFKD
|
|
17
|
-
}
|
|
12
|
+
current = "",
|
|
13
|
+
NFC = "NFC",
|
|
14
|
+
NFD = "NFD",
|
|
15
|
+
NFKC = "NFKC",
|
|
16
|
+
NFKD = "NFKD",
|
|
17
|
+
}
|
|
18
18
|
|
|
19
19
|
export enum Utf8ErrorReason {
|
|
20
20
|
// A continuation byte was present where there was nothing to continue
|
|
21
21
|
// - offset = the index the codepoint began in
|
|
22
|
-
UNEXPECTED_CONTINUE
|
|
22
|
+
UNEXPECTED_CONTINUE = "unexpected continuation byte",
|
|
23
23
|
|
|
24
24
|
// An invalid (non-continuation) byte to start a UTF-8 codepoint was found
|
|
25
25
|
// - offset = the index the codepoint began in
|
|
26
|
-
BAD_PREFIX
|
|
26
|
+
BAD_PREFIX = "bad codepoint prefix",
|
|
27
27
|
|
|
28
28
|
// The string is too short to process the expected codepoint
|
|
29
29
|
// - offset = the index the codepoint began in
|
|
30
|
-
OVERRUN
|
|
30
|
+
OVERRUN = "string overrun",
|
|
31
31
|
|
|
32
32
|
// A missing continuation byte was expected but not found
|
|
33
33
|
// - offset = the index the continuation byte was expected at
|
|
34
|
-
MISSING_CONTINUE
|
|
34
|
+
MISSING_CONTINUE = "missing continuation byte",
|
|
35
35
|
|
|
36
36
|
// The computed code point is outside the range for UTF-8
|
|
37
37
|
// - offset = start of this codepoint
|
|
38
38
|
// - badCodepoint = the computed codepoint; outside the UTF-8 range
|
|
39
|
-
OUT_OF_RANGE
|
|
39
|
+
OUT_OF_RANGE = "out of UTF-8 range",
|
|
40
40
|
|
|
41
41
|
// UTF-8 strings may not contain UTF-16 surrogate pairs
|
|
42
42
|
// - offset = start of this codepoint
|
|
43
43
|
// - badCodepoint = the computed codepoint; inside the UTF-16 surrogate range
|
|
44
|
-
UTF16_SURROGATE
|
|
44
|
+
UTF16_SURROGATE = "UTF-16 surrogate",
|
|
45
45
|
|
|
46
46
|
// The string is an overlong representation
|
|
47
47
|
// - offset = start of this codepoint
|
|
48
48
|
// - badCodepoint = the computed codepoint; already bounds checked
|
|
49
|
-
OVERLONG
|
|
50
|
-
};
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
export type Utf8ErrorFunc = (reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number) => number;
|
|
54
|
-
|
|
55
|
-
function errorFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<number>, output: Array<number>, badCodepoint?: number): number {
|
|
56
|
-
return logger.throwArgumentError(`invalid codepoint at offset ${ offset }; ${ reason }`, "bytes", bytes);
|
|
49
|
+
OVERLONG = "overlong representation",
|
|
57
50
|
}
|
|
58
51
|
|
|
59
|
-
|
|
52
|
+
export type Utf8ErrorFunc = (
|
|
53
|
+
reason: Utf8ErrorReason,
|
|
54
|
+
offset: number,
|
|
55
|
+
bytes: ArrayLike<number>,
|
|
56
|
+
output: Array<number>,
|
|
57
|
+
badCodepoint?: number,
|
|
58
|
+
) => number;
|
|
59
|
+
|
|
60
|
+
function errorFunc(
|
|
61
|
+
reason: Utf8ErrorReason,
|
|
62
|
+
offset: number,
|
|
63
|
+
bytes: ArrayLike<number>,
|
|
64
|
+
output: Array<number>,
|
|
65
|
+
badCodepoint?: number,
|
|
66
|
+
): number {
|
|
67
|
+
return logger.throwArgumentError(
|
|
68
|
+
`invalid codepoint at offset ${offset}; ${reason}`,
|
|
69
|
+
"bytes",
|
|
70
|
+
bytes,
|
|
71
|
+
);
|
|
72
|
+
}
|
|
60
73
|
|
|
74
|
+
function ignoreFunc(
|
|
75
|
+
reason: Utf8ErrorReason,
|
|
76
|
+
offset: number,
|
|
77
|
+
bytes: ArrayLike<number>,
|
|
78
|
+
output: Array<number>,
|
|
79
|
+
badCodepoint?: number,
|
|
80
|
+
): number {
|
|
61
81
|
// If there is an invalid prefix (including stray continuation), skip any additional continuation bytes
|
|
62
|
-
if (
|
|
82
|
+
if (
|
|
83
|
+
reason === Utf8ErrorReason.BAD_PREFIX ||
|
|
84
|
+
reason === Utf8ErrorReason.UNEXPECTED_CONTINUE
|
|
85
|
+
) {
|
|
63
86
|
let i = 0;
|
|
64
87
|
for (let o = offset + 1; o < bytes.length; o++) {
|
|
65
|
-
if (bytes[o] >> 6 !== 0x02) {
|
|
88
|
+
if (bytes[o] >> 6 !== 0x02) {
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
66
91
|
i++;
|
|
67
92
|
}
|
|
68
93
|
return i;
|
|
@@ -78,8 +103,13 @@ function ignoreFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<nu
|
|
|
78
103
|
return 0;
|
|
79
104
|
}
|
|
80
105
|
|
|
81
|
-
function replaceFunc(
|
|
82
|
-
|
|
106
|
+
function replaceFunc(
|
|
107
|
+
reason: Utf8ErrorReason,
|
|
108
|
+
offset: number,
|
|
109
|
+
bytes: ArrayLike<number>,
|
|
110
|
+
output: Array<number>,
|
|
111
|
+
badCodepoint?: number,
|
|
112
|
+
): number {
|
|
83
113
|
// Overlong representations are otherwise "valid" code points; just non-deistingtished
|
|
84
114
|
if (reason === Utf8ErrorReason.OVERLONG) {
|
|
85
115
|
output.push(badCodepoint);
|
|
@@ -94,15 +124,20 @@ function replaceFunc(reason: Utf8ErrorReason, offset: number, bytes: ArrayLike<n
|
|
|
94
124
|
}
|
|
95
125
|
|
|
96
126
|
// Common error handing strategies
|
|
97
|
-
export const Utf8ErrorFuncs: { [
|
|
127
|
+
export const Utf8ErrorFuncs: { [name: string]: Utf8ErrorFunc } = Object.freeze({
|
|
98
128
|
error: errorFunc,
|
|
99
129
|
ignore: ignoreFunc,
|
|
100
|
-
replace: replaceFunc
|
|
130
|
+
replace: replaceFunc,
|
|
101
131
|
});
|
|
102
132
|
|
|
103
133
|
// http://stackoverflow.com/questions/13356493/decode-utf-8-with-javascript#13691499
|
|
104
|
-
function getUtf8CodePoints(
|
|
105
|
-
|
|
134
|
+
function getUtf8CodePoints(
|
|
135
|
+
bytes: BytesLike,
|
|
136
|
+
onError?: Utf8ErrorFunc,
|
|
137
|
+
): Array<number> {
|
|
138
|
+
if (onError == null) {
|
|
139
|
+
onError = Utf8ErrorFuncs.error;
|
|
140
|
+
}
|
|
106
141
|
|
|
107
142
|
bytes = arrayify(bytes);
|
|
108
143
|
|
|
@@ -110,8 +145,7 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
|
|
|
110
145
|
let i = 0;
|
|
111
146
|
|
|
112
147
|
// Invalid bytes are ignored
|
|
113
|
-
while(i < bytes.length) {
|
|
114
|
-
|
|
148
|
+
while (i < bytes.length) {
|
|
115
149
|
const c = bytes[i++];
|
|
116
150
|
|
|
117
151
|
// 0xxx xxxx
|
|
@@ -129,19 +163,23 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
|
|
|
129
163
|
extraLength = 1;
|
|
130
164
|
overlongMask = 0x7f;
|
|
131
165
|
|
|
132
|
-
|
|
166
|
+
// 1110 xxxx 10xx xxxx 10xx xxxx
|
|
133
167
|
} else if ((c & 0xf0) === 0xe0) {
|
|
134
168
|
extraLength = 2;
|
|
135
169
|
overlongMask = 0x7ff;
|
|
136
170
|
|
|
137
|
-
|
|
171
|
+
// 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
|
|
138
172
|
} else if ((c & 0xf8) === 0xf0) {
|
|
139
173
|
extraLength = 3;
|
|
140
174
|
overlongMask = 0xffff;
|
|
141
|
-
|
|
142
175
|
} else {
|
|
143
176
|
if ((c & 0xc0) === 0x80) {
|
|
144
|
-
i += onError(
|
|
177
|
+
i += onError(
|
|
178
|
+
Utf8ErrorReason.UNEXPECTED_CONTINUE,
|
|
179
|
+
i - 1,
|
|
180
|
+
bytes,
|
|
181
|
+
result,
|
|
182
|
+
);
|
|
145
183
|
} else {
|
|
146
184
|
i += onError(Utf8ErrorReason.BAD_PREFIX, i - 1, bytes, result);
|
|
147
185
|
}
|
|
@@ -162,33 +200,58 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
|
|
|
162
200
|
|
|
163
201
|
// Invalid continuation byte
|
|
164
202
|
if ((nextChar & 0xc0) != 0x80) {
|
|
165
|
-
i += onError(
|
|
203
|
+
i += onError(
|
|
204
|
+
Utf8ErrorReason.MISSING_CONTINUE,
|
|
205
|
+
i,
|
|
206
|
+
bytes,
|
|
207
|
+
result,
|
|
208
|
+
);
|
|
166
209
|
res = null;
|
|
167
210
|
break;
|
|
168
|
-
}
|
|
211
|
+
}
|
|
169
212
|
|
|
170
213
|
res = (res << 6) | (nextChar & 0x3f);
|
|
171
214
|
i++;
|
|
172
215
|
}
|
|
173
216
|
|
|
174
217
|
// See above loop for invalid continuation byte
|
|
175
|
-
if (res === null) {
|
|
218
|
+
if (res === null) {
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
176
221
|
|
|
177
222
|
// Maximum code point
|
|
178
223
|
if (res > 0x10ffff) {
|
|
179
|
-
i += onError(
|
|
224
|
+
i += onError(
|
|
225
|
+
Utf8ErrorReason.OUT_OF_RANGE,
|
|
226
|
+
i - 1 - extraLength,
|
|
227
|
+
bytes,
|
|
228
|
+
result,
|
|
229
|
+
res,
|
|
230
|
+
);
|
|
180
231
|
continue;
|
|
181
232
|
}
|
|
182
233
|
|
|
183
234
|
// Reserved for UTF-16 surrogate halves
|
|
184
235
|
if (res >= 0xd800 && res <= 0xdfff) {
|
|
185
|
-
i += onError(
|
|
236
|
+
i += onError(
|
|
237
|
+
Utf8ErrorReason.UTF16_SURROGATE,
|
|
238
|
+
i - 1 - extraLength,
|
|
239
|
+
bytes,
|
|
240
|
+
result,
|
|
241
|
+
res,
|
|
242
|
+
);
|
|
186
243
|
continue;
|
|
187
244
|
}
|
|
188
245
|
|
|
189
246
|
// Check for overlong sequences (more bytes than needed)
|
|
190
247
|
if (res <= overlongMask) {
|
|
191
|
-
i += onError(
|
|
248
|
+
i += onError(
|
|
249
|
+
Utf8ErrorReason.OVERLONG,
|
|
250
|
+
i - 1 - extraLength,
|
|
251
|
+
bytes,
|
|
252
|
+
result,
|
|
253
|
+
res,
|
|
254
|
+
);
|
|
192
255
|
continue;
|
|
193
256
|
}
|
|
194
257
|
|
|
@@ -199,8 +262,10 @@ function getUtf8CodePoints(bytes: BytesLike, onError?: Utf8ErrorFunc): Array<num
|
|
|
199
262
|
}
|
|
200
263
|
|
|
201
264
|
// http://stackoverflow.com/questions/18729405/how-to-convert-utf8-string-to-byte-array
|
|
202
|
-
export function toUtf8Bytes(
|
|
203
|
-
|
|
265
|
+
export function toUtf8Bytes(
|
|
266
|
+
str: string,
|
|
267
|
+
form: UnicodeNormalizationForm = UnicodeNormalizationForm.current,
|
|
268
|
+
): Uint8Array {
|
|
204
269
|
if (form != UnicodeNormalizationForm.current) {
|
|
205
270
|
logger.checkNormalize();
|
|
206
271
|
str = str.normalize(form);
|
|
@@ -212,11 +277,9 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
|
|
|
212
277
|
|
|
213
278
|
if (c < 0x80) {
|
|
214
279
|
result.push(c);
|
|
215
|
-
|
|
216
280
|
} else if (c < 0x800) {
|
|
217
281
|
result.push((c >> 6) | 0xc0);
|
|
218
282
|
result.push((c & 0x3f) | 0x80);
|
|
219
|
-
|
|
220
283
|
} else if ((c & 0xfc00) == 0xd800) {
|
|
221
284
|
i++;
|
|
222
285
|
const c2 = str.charCodeAt(i);
|
|
@@ -231,7 +294,6 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
|
|
|
231
294
|
result.push(((pair >> 12) & 0x3f) | 0x80);
|
|
232
295
|
result.push(((pair >> 6) & 0x3f) | 0x80);
|
|
233
296
|
result.push((pair & 0x3f) | 0x80);
|
|
234
|
-
|
|
235
297
|
} else {
|
|
236
298
|
result.push((c >> 12) | 0xe0);
|
|
237
299
|
result.push(((c >> 6) & 0x3f) | 0x80);
|
|
@@ -240,56 +302,82 @@ export function toUtf8Bytes(str: string, form: UnicodeNormalizationForm = Unicod
|
|
|
240
302
|
}
|
|
241
303
|
|
|
242
304
|
return arrayify(result);
|
|
243
|
-
}
|
|
305
|
+
}
|
|
244
306
|
|
|
245
307
|
function escapeChar(value: number) {
|
|
246
|
-
const hex =
|
|
308
|
+
const hex = "0000" + value.toString(16);
|
|
247
309
|
return "\\u" + hex.substring(hex.length - 4);
|
|
248
310
|
}
|
|
249
311
|
|
|
250
|
-
export function _toEscapedUtf8String(
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
312
|
+
export function _toEscapedUtf8String(
|
|
313
|
+
bytes: BytesLike,
|
|
314
|
+
onError?: Utf8ErrorFunc,
|
|
315
|
+
): string {
|
|
316
|
+
return (
|
|
317
|
+
'"' +
|
|
318
|
+
getUtf8CodePoints(bytes, onError)
|
|
319
|
+
.map((codePoint) => {
|
|
320
|
+
if (codePoint < 256) {
|
|
321
|
+
switch (codePoint) {
|
|
322
|
+
case 8:
|
|
323
|
+
return "\\b";
|
|
324
|
+
case 9:
|
|
325
|
+
return "\\t";
|
|
326
|
+
case 10:
|
|
327
|
+
return "\\n";
|
|
328
|
+
case 13:
|
|
329
|
+
return "\\r";
|
|
330
|
+
case 34:
|
|
331
|
+
return '\\"';
|
|
332
|
+
case 92:
|
|
333
|
+
return "\\\\";
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if (codePoint >= 32 && codePoint < 127) {
|
|
337
|
+
return String.fromCharCode(codePoint);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (codePoint <= 0xffff) {
|
|
342
|
+
return escapeChar(codePoint);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
codePoint -= 0x10000;
|
|
346
|
+
return (
|
|
347
|
+
escapeChar(((codePoint >> 10) & 0x3ff) + 0xd800) +
|
|
348
|
+
escapeChar((codePoint & 0x3ff) + 0xdc00)
|
|
349
|
+
);
|
|
350
|
+
})
|
|
351
|
+
.join("") +
|
|
352
|
+
'"'
|
|
353
|
+
);
|
|
274
354
|
}
|
|
275
355
|
|
|
276
356
|
export function _toUtf8String(codePoints: Array<number>): string {
|
|
277
|
-
return codePoints
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
357
|
+
return codePoints
|
|
358
|
+
.map((codePoint) => {
|
|
359
|
+
if (codePoint <= 0xffff) {
|
|
360
|
+
return String.fromCharCode(codePoint);
|
|
361
|
+
}
|
|
362
|
+
codePoint -= 0x10000;
|
|
363
|
+
return String.fromCharCode(
|
|
364
|
+
((codePoint >> 10) & 0x3ff) + 0xd800,
|
|
365
|
+
(codePoint & 0x3ff) + 0xdc00,
|
|
366
|
+
);
|
|
367
|
+
})
|
|
368
|
+
.join("");
|
|
287
369
|
}
|
|
288
370
|
|
|
289
|
-
export function toUtf8String(
|
|
371
|
+
export function toUtf8String(
|
|
372
|
+
bytes: BytesLike,
|
|
373
|
+
onError?: Utf8ErrorFunc,
|
|
374
|
+
): string {
|
|
290
375
|
return _toUtf8String(getUtf8CodePoints(bytes, onError));
|
|
291
376
|
}
|
|
292
377
|
|
|
293
|
-
export function toUtf8CodePoints(
|
|
378
|
+
export function toUtf8CodePoints(
|
|
379
|
+
str: string,
|
|
380
|
+
form: UnicodeNormalizationForm = UnicodeNormalizationForm.current,
|
|
381
|
+
): Array<number> {
|
|
294
382
|
return getUtf8CodePoints(toUtf8Bytes(str, form));
|
|
295
383
|
}
|