@keymanapp/common-types 18.0.41-alpha → 18.0.46-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/src/consts/virtual-key-constants.d.ts +191 -191
- package/build/src/consts/virtual-key-constants.js +221 -221
- package/build/src/consts/virtual-key-constants.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.d.ts +5 -5
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.js +72 -72
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.d.ts +28 -28
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.js +120 -120
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file.d.ts +95 -95
- package/build/src/keyman-touch-layout/keyman-touch-layout-file.js +52 -52
- package/build/src/kmx/element-string.d.ts +23 -23
- package/build/src/kmx/element-string.js +125 -125
- package/build/src/kmx/element-string.js.map +1 -1
- package/build/src/kmx/keyman-targets.d.ts +20 -20
- package/build/src/kmx/keyman-targets.js +79 -79
- package/build/src/kmx/keyman-targets.js.map +1 -1
- package/build/src/kmx/kmx-builder.d.ts +32 -32
- package/build/src/kmx/kmx-builder.js +189 -189
- package/build/src/kmx/kmx-builder.js.map +1 -1
- package/build/src/kmx/kmx-file-reader.d.ts +13 -13
- package/build/src/kmx/kmx-file-reader.js +134 -134
- package/build/src/kmx/kmx-file-reader.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-disp.d.ts +18 -18
- package/build/src/kmx/kmx-plus-builder/build-disp.js +27 -27
- package/build/src/kmx/kmx-plus-builder/build-disp.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-elem.d.ts +28 -28
- package/build/src/kmx/kmx-plus-builder/build-elem.js +89 -89
- package/build/src/kmx/kmx-plus-builder/build-elem.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-keys.d.ts +56 -56
- package/build/src/kmx/kmx-plus-builder/build-keys.js +107 -107
- package/build/src/kmx/kmx-plus-builder/build-keys.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-layr.d.ts +53 -53
- package/build/src/kmx/kmx-plus-builder/build-layr.js +101 -101
- package/build/src/kmx/kmx-plus-builder/build-layr.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-list.d.ts +35 -35
- package/build/src/kmx/kmx-plus-builder/build-list.js +66 -66
- package/build/src/kmx/kmx-plus-builder/build-list.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-loca.d.ts +11 -11
- package/build/src/kmx/kmx-plus-builder/build-loca.js +21 -21
- package/build/src/kmx/kmx-plus-builder/build-loca.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-meta.d.ts +16 -16
- package/build/src/kmx/kmx-plus-builder/build-meta.js +20 -20
- package/build/src/kmx/kmx-plus-builder/build-sect.d.ts +15 -15
- package/build/src/kmx/kmx-plus-builder/build-sect.js +15 -15
- package/build/src/kmx/kmx-plus-builder/build-sect.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-strs.d.ts +22 -22
- package/build/src/kmx/kmx-plus-builder/build-strs.js +46 -46
- package/build/src/kmx/kmx-plus-builder/build-strs.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-tran.d.ts +32 -32
- package/build/src/kmx/kmx-plus-builder/build-tran.js +69 -69
- package/build/src/kmx/kmx-plus-builder/build-tran.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-uset.d.ts +30 -30
- package/build/src/kmx/kmx-plus-builder/build-uset.js +60 -60
- package/build/src/kmx/kmx-plus-builder/build-uset.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-vars.d.ts +21 -21
- package/build/src/kmx/kmx-plus-builder/build-vars.js +48 -48
- package/build/src/kmx/kmx-plus-builder/build-vars.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/builder-section.d.ts +9 -9
- package/build/src/kmx/kmx-plus-builder/builder-section.js +2 -2
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.d.ts +42 -42
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.js +137 -137
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.js.map +1 -1
- package/build/src/kmx/kmx-plus.d.ts +322 -322
- package/build/src/kmx/kmx-plus.js +830 -830
- package/build/src/kmx/kmx-plus.js.map +1 -1
- package/build/src/kmx/kmx.d.ts +233 -233
- package/build/src/kmx/kmx.js +302 -302
- package/build/src/kmx/kmx.js.map +1 -1
- package/build/src/kmx/string-list.d.ts +35 -35
- package/build/src/kmx/string-list.js +88 -88
- package/build/src/kmx/string-list.js.map +1 -1
- package/build/src/kpj/keyman-developer-project.d.ts +78 -78
- package/build/src/kpj/keyman-developer-project.js +195 -195
- package/build/src/kpj/keyman-developer-project.js.map +1 -1
- package/build/src/kpj/kpj-file-reader.d.ts +18 -18
- package/build/src/kpj/kpj-file-reader.js +119 -119
- package/build/src/kpj/kpj-file-reader.js.map +1 -1
- package/build/src/kpj/kpj-file.d.ts +39 -39
- package/build/src/kpj/kpj-file.js +10 -10
- package/build/src/kvk/kvk-file-reader.d.ts +4 -4
- package/build/src/kvk/kvk-file-reader.js +37 -37
- package/build/src/kvk/kvk-file-reader.js.map +1 -1
- package/build/src/kvk/kvk-file-writer.d.ts +19 -19
- package/build/src/kvk/kvk-file-writer.js +77 -77
- package/build/src/kvk/kvk-file-writer.js.map +1 -1
- package/build/src/kvk/kvk-file.d.ts +66 -66
- package/build/src/kvk/kvk-file.d.ts.map +1 -1
- package/build/src/kvk/kvk-file.js +90 -90
- package/build/src/kvk/kvk-file.js.map +1 -1
- package/build/src/kvk/kvks-file-reader.d.ts +23 -23
- package/build/src/kvk/kvks-file-reader.js +175 -175
- package/build/src/kvk/kvks-file-reader.js.map +1 -1
- package/build/src/kvk/kvks-file-writer.d.ts +6 -6
- package/build/src/kvk/kvks-file-writer.js +106 -106
- package/build/src/kvk/kvks-file-writer.js.map +1 -1
- package/build/src/kvk/kvks-file.d.ts +43 -43
- package/build/src/kvk/kvks-file.js +9 -9
- package/build/src/kvk/visual-keyboard.d.ts +44 -44
- package/build/src/kvk/visual-keyboard.js +75 -75
- package/build/src/kvk/visual-keyboard.js.map +1 -1
- package/build/src/ldml-keyboard/ldml-keyboard-testdata-xml.d.ts +60 -60
- package/build/src/ldml-keyboard/ldml-keyboard-testdata-xml.js +19 -19
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.d.ts +95 -95
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.js +409 -409
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.js.map +1 -1
- package/build/src/ldml-keyboard/ldml-keyboard-xml.d.ts +170 -170
- package/build/src/ldml-keyboard/ldml-keyboard-xml.js +55 -55
- package/build/src/ldml-keyboard/pattern-parser.d.ts +201 -201
- package/build/src/ldml-keyboard/pattern-parser.js +490 -490
- package/build/src/ldml-keyboard/pattern-parser.js.map +1 -1
- package/build/src/ldml-keyboard/unicodeset-parser-api.d.ts +37 -37
- package/build/src/ldml-keyboard/unicodeset-parser-api.js +27 -27
- package/build/src/main.d.ts +33 -33
- package/build/src/main.js +32 -32
- package/build/src/package/kmp-json-file.d.ts +115 -115
- package/build/src/package/kmp-json-file.js +1 -1
- package/build/src/package/kps-file.d.ts +175 -175
- package/build/src/package/kps-file.js +15 -15
- package/build/src/schema-validators.d.ts +13 -13
- package/build/src/schema-validators.js +25 -25
- package/build/src/schemas/displaymap.schema.d.ts +57 -57
- package/build/src/schemas/displaymap.schema.js +35 -35
- package/build/src/schemas/displaymap.schema.validator.d.mts +2 -2
- package/build/src/schemas/displaymap.schema.validator.mjs +266 -266
- package/build/src/schemas/displaymap.schema.validator.mjs.map +1 -1
- package/build/src/schemas/keyboard_info.schema.d.ts +209 -209
- package/build/src/schemas/keyboard_info.schema.js +98 -98
- package/build/src/schemas/keyboard_info.schema.validator.d.mts +2 -2
- package/build/src/schemas/keyboard_info.schema.validator.mjs +1160 -1160
- package/build/src/schemas/keyboard_info.schema.validator.mjs.map +1 -1
- package/build/src/schemas/keyman-touch-layout.clean.spec.d.ts +227 -227
- package/build/src/schemas/keyman-touch-layout.clean.spec.js +151 -151
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.d.mts +2 -2
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.mjs +1094 -1094
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.mjs.map +1 -1
- package/build/src/schemas/keyman-touch-layout.spec.d.ts +229 -229
- package/build/src/schemas/keyman-touch-layout.spec.js +140 -140
- package/build/src/schemas/keyman-touch-layout.spec.validator.d.mts +2 -2
- package/build/src/schemas/keyman-touch-layout.spec.validator.mjs +1436 -1436
- package/build/src/schemas/keyman-touch-layout.spec.validator.mjs.map +1 -1
- package/build/src/schemas/kmp.schema.d.ts +324 -324
- package/build/src/schemas/kmp.schema.js +353 -353
- package/build/src/schemas/kmp.schema.validator.d.mts +2 -2
- package/build/src/schemas/kmp.schema.validator.mjs +1627 -1627
- package/build/src/schemas/kmp.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kpj-9.0.schema.d.ts +152 -152
- package/build/src/schemas/kpj-9.0.schema.js +131 -131
- package/build/src/schemas/kpj-9.0.schema.validator.d.mts +2 -2
- package/build/src/schemas/kpj-9.0.schema.validator.mjs +655 -655
- package/build/src/schemas/kpj-9.0.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kpj.schema.d.ts +122 -122
- package/build/src/schemas/kpj.schema.js +129 -129
- package/build/src/schemas/kpj.schema.validator.d.mts +2 -2
- package/build/src/schemas/kpj.schema.validator.mjs +515 -515
- package/build/src/schemas/kpj.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kvks.schema.d.ts +160 -160
- package/build/src/schemas/kvks.schema.js +182 -182
- package/build/src/schemas/kvks.schema.validator.d.mts +2 -2
- package/build/src/schemas/kvks.schema.validator.mjs +625 -625
- package/build/src/schemas/kvks.schema.validator.mjs.map +1 -1
- package/build/src/schemas/ldml-keyboard3.schema.d.ts +635 -635
- package/build/src/schemas/ldml-keyboard3.schema.js +696 -696
- package/build/src/schemas/ldml-keyboard3.schema.validator.d.mts +2 -2
- package/build/src/schemas/ldml-keyboard3.schema.validator.mjs +3117 -3117
- package/build/src/schemas/ldml-keyboard3.schema.validator.mjs.map +1 -1
- package/build/src/schemas/ldml-keyboardtest3.schema.d.ts +192 -192
- package/build/src/schemas/ldml-keyboardtest3.schema.js +225 -225
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.d.mts +2 -2
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.mjs +744 -744
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.mjs.map +1 -1
- package/build/src/schemas.d.ts +2300 -2300
- package/build/src/schemas.js +23 -23
- package/build/src/util/common-events.d.ts +44 -44
- package/build/src/util/common-events.js +26 -26
- package/build/src/util/compiler-interfaces.d.ts +371 -371
- package/build/src/util/compiler-interfaces.d.ts.map +1 -1
- package/build/src/util/compiler-interfaces.js +380 -380
- package/build/src/util/compiler-interfaces.js.map +1 -1
- package/build/src/util/errors.d.ts +5 -5
- package/build/src/util/errors.js +5 -5
- package/build/src/util/file-types.d.ts +93 -93
- package/build/src/util/file-types.d.ts.map +1 -1
- package/build/src/util/file-types.js +142 -142
- package/build/src/util/file-types.js.map +1 -1
- package/build/src/util/util.d.ts +98 -98
- package/build/src/util/util.js +368 -368
- package/build/src/util/util.js.map +1 -1
- package/package.json +3 -3
package/build/src/util/util.js
CHANGED
|
@@ -1,370 +1,370 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* xml2js will not place single-entry objects into arrays. Easiest way to fix
|
|
3
|
-
* this is to box them ourselves as needed. Ensures that o.x is an array.
|
|
4
|
-
*
|
|
5
|
-
* @param o Object with property to box
|
|
6
|
-
* @param x Name of element to box
|
|
7
|
-
*/
|
|
1
|
+
/**
|
|
2
|
+
* xml2js will not place single-entry objects into arrays. Easiest way to fix
|
|
3
|
+
* this is to box them ourselves as needed. Ensures that o.x is an array.
|
|
4
|
+
*
|
|
5
|
+
* @param o Object with property to box
|
|
6
|
+
* @param x Name of element to box
|
|
7
|
+
*/
|
|
8
8
|
|
|
9
|
-
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="
|
|
10
|
-
export function boxXmlArray(o, x) {
|
|
11
|
-
if (typeof o == 'object' && !Array.isArray(o[x])) {
|
|
12
|
-
if (o[x] === null || o[x] === undefined) {
|
|
13
|
-
o[x] = [];
|
|
14
|
-
}
|
|
15
|
-
else {
|
|
16
|
-
o[x] = [o[x]];
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
// TODO-LDML: #7569 the below regex works, but captures more than it should
|
|
21
|
-
// (it would include \u{fffffffffffffffff } which
|
|
22
|
-
// is overlong and has a space at the end.) The second regex does not work yet.
|
|
23
|
-
export const MATCH_HEX_ESCAPE = /\\u{([0-9a-fA-F ]{1,})}/g;
|
|
24
|
-
// const MATCH_HEX_ESCAPE = /\\u{((?:(?:[0-9a-fA-F]{1,5})|(?:10[0-9a-fA-F]{4})(?: (?!}))?)+)}/g;
|
|
25
|
-
/** regex for single quad escape such as \u0127 or \U00000000 */
|
|
26
|
-
export const CONTAINS_QUAD_ESCAPE = /(?:\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{8}))/;
|
|
27
|
-
/** regex for single quad escape such as \u0127 */
|
|
28
|
-
export const MATCH_QUAD_ESCAPE = new RegExp(CONTAINS_QUAD_ESCAPE, 'g');
|
|
29
|
-
export class UnescapeError extends Error {
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* Unescape one codepoint
|
|
33
|
-
* @param hex one codepoint in hex, such as '0127'
|
|
34
|
-
* @returns the unescaped codepoint
|
|
35
|
-
*/
|
|
36
|
-
export function unescapeOne(hex) {
|
|
37
|
-
const codepoint = Number.parseInt(hex, 16);
|
|
38
|
-
return String.fromCodePoint(codepoint);
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Unescape one single quad string such as \u0127 / \U00000000
|
|
42
|
-
* Throws exception if the string doesn't match MATCH_QUAD_ESCAPE
|
|
43
|
-
* Note this does not attempt to handle or reject surrogates.
|
|
44
|
-
* So, `\\uD838\\uDD09` will work but other combinations may not.
|
|
45
|
-
* @param s input string
|
|
46
|
-
* @returns output
|
|
47
|
-
*/
|
|
48
|
-
export function unescapeOneQuadString(s) {
|
|
49
|
-
if (!s || !s.match(MATCH_QUAD_ESCAPE)) {
|
|
50
|
-
throw new UnescapeError(`Not a quad escape: ${s}`);
|
|
51
|
-
}
|
|
52
|
-
function processMatch(str, m16, m32) {
|
|
53
|
-
return unescapeOne(m16 || m32); // either \u or \U
|
|
54
|
-
}
|
|
55
|
-
s = s.replace(MATCH_QUAD_ESCAPE, processMatch);
|
|
56
|
-
return s;
|
|
57
|
-
}
|
|
58
|
-
/** unscape multiple occurences of \u0127 style strings */
|
|
59
|
-
export function unescapeQuadString(s) {
|
|
60
|
-
s = s.replaceAll(MATCH_QUAD_ESCAPE, (quad) => unescapeOneQuadString(quad));
|
|
61
|
-
return s;
|
|
62
|
-
}
|
|
63
|
-
/**
|
|
64
|
-
* Unescapes a string according to UTS#18§1.1, see <https://www.unicode.org/reports/tr18/#Hex_notation>
|
|
65
|
-
* @param s escaped string
|
|
66
|
-
* @returns
|
|
67
|
-
*/
|
|
68
|
-
export function unescapeString(s) {
|
|
69
|
-
if (!s) {
|
|
70
|
-
return s;
|
|
71
|
-
}
|
|
72
|
-
try {
|
|
73
|
-
/**
|
|
74
|
-
* process one regex match
|
|
75
|
-
* @param str ignored
|
|
76
|
-
* @param matched the entire match such as '0127' or '22 22'
|
|
77
|
-
* @returns the unescaped match
|
|
78
|
-
*/
|
|
79
|
-
function processMatch(str, matched) {
|
|
80
|
-
const codepoints = matched.split(' ');
|
|
81
|
-
const unescaped = codepoints.map(unescapeOne);
|
|
82
|
-
return unescaped.join('');
|
|
83
|
-
}
|
|
84
|
-
s = s.replaceAll(MATCH_HEX_ESCAPE, processMatch);
|
|
85
|
-
}
|
|
86
|
-
catch (e) {
|
|
87
|
-
if (e instanceof RangeError) {
|
|
88
|
-
throw new UnescapeError(`Out of range while unescaping '${s}': ${e.message}`, { cause: e });
|
|
89
|
-
/* c8 ignore next 3 */
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
throw e; // pass through some other error
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
return s;
|
|
96
|
-
}
|
|
97
|
-
/** 0000 … FFFF */
|
|
98
|
-
export function hexQuad(n) {
|
|
99
|
-
if (n < 0x0000 || n > 0xFFFF) {
|
|
100
|
-
throw RangeError(`${n} not in [0x0000,0xFFFF]`);
|
|
101
|
-
}
|
|
102
|
-
return n.toString(16).padStart(4, '0');
|
|
103
|
-
}
|
|
104
|
-
/** 00000000 … FFFFFFFF */
|
|
105
|
-
export function hexOcts(n) {
|
|
106
|
-
if (n < 0x0000 || n > 0xFFFFFFFF) {
|
|
107
|
-
throw RangeError(`${n} not in [0x00000000,0xFFFFFFFF]`);
|
|
108
|
-
}
|
|
109
|
-
return n.toString(16).padStart(8, '0');
|
|
110
|
-
}
|
|
111
|
-
/** escape one char for regex in \uXXXX form */
|
|
112
|
-
export function escapeRegexChar(ch) {
|
|
113
|
-
const code = ch.codePointAt(0);
|
|
114
|
-
if (code <= 0xFFFF) {
|
|
115
|
-
return '\\u' + hexQuad(code);
|
|
116
|
-
}
|
|
117
|
-
else {
|
|
118
|
-
return '\\U' + hexOcts(code);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
/** chars that must be escaped: syntax, C0 + C1 controls */
|
|
122
|
-
const REGEX_SYNTAX_CHAR = /^[\u0000-\u001F\u007F-\u009F{}\[\]\\?|.^$*()/+-]$/;
|
|
123
|
-
function escapeRegexCharIfSyntax(ch) {
|
|
124
|
-
// escape if syntax or not valid
|
|
125
|
-
if (REGEX_SYNTAX_CHAR.test(ch) || !isValidUnicode(ch.codePointAt(0))) {
|
|
126
|
-
return escapeRegexChar(ch);
|
|
127
|
-
}
|
|
128
|
-
else {
|
|
129
|
-
return ch; // leave unescaped
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Unescape one codepoint to \u or \U format
|
|
134
|
-
* @param hex one codepoint in hex, such as '0127'
|
|
135
|
-
* @returns the unescaped codepoint
|
|
136
|
-
*/
|
|
137
|
-
function regexOne(hex) {
|
|
138
|
-
const unescaped = unescapeOne(hex);
|
|
139
|
-
// re-escape as 16 or 32 bit code units
|
|
140
|
-
return Array.from(unescaped).map(ch => escapeRegexCharIfSyntax(ch)).join('');
|
|
141
|
-
}
|
|
142
|
-
/**
|
|
143
|
-
* Escape a string (\uxxxx form) if there are any problematic codepoints
|
|
144
|
-
*/
|
|
145
|
-
export function escapeStringForRegex(s) {
|
|
146
|
-
return s.split('').map(ch => escapeRegexCharIfSyntax(ch)).join('');
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* Unescapes a string according to UTS#18§1.1, see <https://www.unicode.org/reports/tr18/#Hex_notation>
|
|
150
|
-
* @param s escaped string
|
|
151
|
-
* @returns
|
|
152
|
-
*/
|
|
153
|
-
export function unescapeStringToRegex(s) {
|
|
154
|
-
if (!s) {
|
|
155
|
-
return s;
|
|
156
|
-
}
|
|
157
|
-
try {
|
|
158
|
-
/**
|
|
159
|
-
* process one regex match
|
|
160
|
-
* @param str ignored
|
|
161
|
-
* @param matched the entire match such as '0127' or '22 22'
|
|
162
|
-
* @returns the unescaped match
|
|
163
|
-
*/
|
|
164
|
-
function processMatch(str, matched) {
|
|
165
|
-
const codepoints = matched.split(' ');
|
|
166
|
-
const unescaped = codepoints.map(regexOne);
|
|
167
|
-
return unescaped.join('');
|
|
168
|
-
}
|
|
169
|
-
s = s.replaceAll(MATCH_HEX_ESCAPE, processMatch);
|
|
170
|
-
}
|
|
171
|
-
catch (e) {
|
|
172
|
-
if (e instanceof RangeError) {
|
|
173
|
-
throw new UnescapeError(`Out of range while unescaping '${s}': ${e.message}`, { cause: e });
|
|
174
|
-
/* c8 ignore next 3 */
|
|
175
|
-
}
|
|
176
|
-
else {
|
|
177
|
-
throw e; // pass through some other error
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
return s;
|
|
181
|
-
}
|
|
182
|
-
/** True if this string *could* be a UTF-32 single char */
|
|
183
|
-
export function isOneChar(value) {
|
|
184
|
-
return [...value].length === 1;
|
|
185
|
-
}
|
|
186
|
-
export function toOneChar(value) {
|
|
187
|
-
if (!isOneChar(value)) {
|
|
188
|
-
throw Error(`Not a single char: ${value}`);
|
|
189
|
-
}
|
|
190
|
-
return value.codePointAt(0);
|
|
191
|
-
}
|
|
192
|
-
export function describeCodepoint(ch) {
|
|
193
|
-
let s;
|
|
194
|
-
const p = BadStringAnalyzer.getProblem(ch);
|
|
195
|
-
if (p != null) {
|
|
196
|
-
// for example: 'PUA (U+E010)'
|
|
197
|
-
s = p;
|
|
198
|
-
}
|
|
199
|
-
else {
|
|
200
|
-
// for example: '"a" (U+61)'
|
|
201
|
-
s = `"${String.fromCodePoint(ch)}"`;
|
|
202
|
-
}
|
|
203
|
-
return `${s} (U+${Number(ch).toString(16).toUpperCase()})`;
|
|
204
|
-
}
|
|
205
|
-
export var BadStringType;
|
|
206
|
-
(function (BadStringType) {
|
|
207
|
-
BadStringType["pua"] = "PUA";
|
|
208
|
-
BadStringType["unassigned"] = "Unassigned";
|
|
209
|
-
BadStringType["illegal"] = "Illegal";
|
|
210
|
-
BadStringType["denormalized"] = "Denormalized";
|
|
211
|
-
})(BadStringType || (BadStringType = {}));
|
|
212
|
-
;
|
|
213
|
-
// Following from kmx_xstring.h / .cpp
|
|
214
|
-
const Uni_LEAD_SURROGATE_START = 0xD800;
|
|
215
|
-
const Uni_LEAD_SURROGATE_END = 0xDBFF;
|
|
216
|
-
const Uni_TRAIL_SURROGATE_START = 0xDC00;
|
|
217
|
-
const Uni_TRAIL_SURROGATE_END = 0xDFFF;
|
|
218
|
-
const Uni_SURROGATE_START = Uni_LEAD_SURROGATE_START;
|
|
219
|
-
const Uni_SURROGATE_END = Uni_TRAIL_SURROGATE_END;
|
|
220
|
-
const Uni_FD_NONCHARACTER_START = 0xFDD0;
|
|
221
|
-
const Uni_FD_NONCHARACTER_END = 0xFDEF;
|
|
222
|
-
const Uni_FFFE_NONCHARACTER = 0xFFFE;
|
|
223
|
-
const Uni_PLANE_MASK = 0x1F0000;
|
|
224
|
-
const Uni_MAX_CODEPOINT = 0x10FFFF;
|
|
225
|
-
// plane 0, 15, and 16 PUA
|
|
226
|
-
const Uni_PUA_00_START = 0xE000;
|
|
227
|
-
const Uni_PUA_00_END = 0xF8FF;
|
|
228
|
-
const Uni_PUA_15_START = 0x0F0000;
|
|
229
|
-
const Uni_PUA_15_END = 0x0FFFFD;
|
|
230
|
-
const Uni_PUA_16_START = 0x100000;
|
|
231
|
-
const Uni_PUA_16_END = 0x10FFFD;
|
|
232
|
-
/**
|
|
233
|
-
* @brief True if a lead surrogate
|
|
234
|
-
* \def Uni_IsSurrogate1
|
|
235
|
-
*/
|
|
236
|
-
function Uni_IsSurrogate1(ch) {
|
|
237
|
-
return ((ch) >= Uni_LEAD_SURROGATE_START && (ch) <= Uni_LEAD_SURROGATE_END);
|
|
238
|
-
}
|
|
239
|
-
/**
|
|
240
|
-
* @brief True if a trail surrogate
|
|
241
|
-
* \def Uni_IsSurrogate2
|
|
242
|
-
*/
|
|
243
|
-
function Uni_IsSurrogate2(ch) {
|
|
244
|
-
return ((ch) >= Uni_TRAIL_SURROGATE_START && (ch) <= Uni_TRAIL_SURROGATE_END);
|
|
245
|
-
}
|
|
246
|
-
/**
|
|
247
|
-
* @brief True if any surrogate
|
|
248
|
-
* \def UniIsSurrogate
|
|
249
|
-
*/
|
|
250
|
-
function Uni_IsSurrogate(ch) {
|
|
251
|
-
return (Uni_IsSurrogate1(ch) || Uni_IsSurrogate2(ch));
|
|
252
|
-
}
|
|
253
|
-
function Uni_IsEndOfPlaneNonCharacter(ch) {
|
|
254
|
-
return (((ch) & Uni_FFFE_NONCHARACTER) == Uni_FFFE_NONCHARACTER); // matches FFFF or FFFE
|
|
255
|
-
}
|
|
256
|
-
function Uni_IsNoncharacter(ch) {
|
|
257
|
-
return (((ch) >= Uni_FD_NONCHARACTER_START && (ch) <= Uni_FD_NONCHARACTER_END) || Uni_IsEndOfPlaneNonCharacter(ch));
|
|
258
|
-
}
|
|
259
|
-
function Uni_InCodespace(ch) {
|
|
260
|
-
return (ch >= 0 && ch <= Uni_MAX_CODEPOINT);
|
|
261
|
-
}
|
|
262
|
-
;
|
|
263
|
-
function Uni_IsValid1(ch) {
|
|
264
|
-
return (Uni_InCodespace(ch) && !Uni_IsSurrogate(ch) && !Uni_IsNoncharacter(ch));
|
|
265
|
-
}
|
|
266
|
-
export function isValidUnicode(start, end) {
|
|
267
|
-
if (!end) {
|
|
268
|
-
// single char
|
|
269
|
-
return Uni_IsValid1(start);
|
|
270
|
-
}
|
|
271
|
-
else if (!Uni_IsValid1(end) || !Uni_IsValid1(start) || (end < start)) {
|
|
272
|
-
// start or end out of range, or inverted range
|
|
273
|
-
return false;
|
|
274
|
-
}
|
|
275
|
-
else if ((start <= Uni_SURROGATE_END) && (end >= Uni_SURROGATE_START)) {
|
|
276
|
-
// contains some of the surrogate range
|
|
277
|
-
return false;
|
|
278
|
-
}
|
|
279
|
-
else if ((start <= Uni_FD_NONCHARACTER_END) && (end >= Uni_FD_NONCHARACTER_START)) {
|
|
280
|
-
// contains some of the noncharacter range
|
|
281
|
-
return false;
|
|
282
|
-
}
|
|
283
|
-
else if ((start & Uni_PLANE_MASK) != (end & Uni_PLANE_MASK)) {
|
|
284
|
-
// start and end are on different planes, meaning that the U+__FFFE/U+__FFFF noncharacters
|
|
285
|
-
// are contained.
|
|
286
|
-
// As a reminder, we already checked that start/end are themselves valid,
|
|
287
|
-
// so we know that 'end' is not on a noncharacter at end of plane.
|
|
288
|
-
return false;
|
|
289
|
-
}
|
|
290
|
-
else {
|
|
291
|
-
return true;
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
export function isPUA(ch) {
|
|
295
|
-
return ((ch >= Uni_PUA_00_START && ch <= Uni_PUA_00_END) ||
|
|
296
|
-
(ch >= Uni_PUA_15_START && ch <= Uni_PUA_15_END) ||
|
|
297
|
-
(ch >= Uni_PUA_16_START && ch <= Uni_PUA_16_END));
|
|
298
|
-
}
|
|
299
|
-
class BadStringMap extends Map {
|
|
300
|
-
toString() {
|
|
301
|
-
if (!this.size) {
|
|
302
|
-
return "{}";
|
|
303
|
-
}
|
|
304
|
-
return Array.from(this.entries()).map(([t, s]) => `${t}: ${Array.from(s.values()).map(describeCodepoint).join(' ')}`).join(', ');
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
/** abstract class for analyzing and categorizing strings */
|
|
308
|
-
export class StringAnalyzer {
|
|
309
|
-
/** add a string for analysis */
|
|
310
|
-
add(s) {
|
|
311
|
-
for (const c of [...s]) {
|
|
312
|
-
const ch = c.codePointAt(0);
|
|
313
|
-
const problem = this.analyzeCodePoint(c, ch);
|
|
314
|
-
if (problem) {
|
|
315
|
-
this.addProblem(ch, problem);
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
/** internal interface for the result of an analysis */
|
|
320
|
-
addProblem(ch, type) {
|
|
321
|
-
if (!this.m.has(type)) {
|
|
322
|
-
this.m.set(type, new Set());
|
|
323
|
-
}
|
|
324
|
-
this.m.get(type).add(ch);
|
|
325
|
-
}
|
|
326
|
-
/** get the results of the analysis */
|
|
327
|
-
analyze() {
|
|
328
|
-
if (this.m.size == 0) {
|
|
329
|
-
return null;
|
|
330
|
-
}
|
|
331
|
-
else {
|
|
332
|
-
return this.m;
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
|
-
/** internal map */
|
|
336
|
-
m = new BadStringMap();
|
|
337
|
-
}
|
|
338
|
-
/** analyze a string looking for bad unicode */
|
|
339
|
-
export class BadStringAnalyzer extends StringAnalyzer {
|
|
340
|
-
/** analyze one codepoint */
|
|
341
|
-
analyzeCodePoint(c, ch) {
|
|
342
|
-
return BadStringAnalyzer.getProblem(ch);
|
|
343
|
-
}
|
|
344
|
-
/** export analyzer function */
|
|
345
|
-
static getProblem(ch) {
|
|
346
|
-
if (!isValidUnicode(ch)) {
|
|
347
|
-
return BadStringType.illegal;
|
|
348
|
-
}
|
|
349
|
-
else if (isPUA(ch)) {
|
|
350
|
-
return BadStringType.pua;
|
|
351
|
-
}
|
|
352
|
-
else { // TODO-LDML: unassigned
|
|
353
|
-
return null;
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
/** Analyzer that checks if something isn't NFD */
|
|
358
|
-
export class NFDAnalyzer extends StringAnalyzer {
|
|
359
|
-
analyzeCodePoint(c, ch) {
|
|
360
|
-
const nfd = c.normalize("NFD");
|
|
361
|
-
if (c !== nfd) {
|
|
362
|
-
return BadStringType.denormalized;
|
|
363
|
-
}
|
|
364
|
-
else {
|
|
365
|
-
return null;
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
}
|
|
9
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="5541fdef-16ab-5797-ab70-735266c3839e")}catch(e){}}();
|
|
10
|
+
export function boxXmlArray(o, x) {
|
|
11
|
+
if (typeof o == 'object' && !Array.isArray(o[x])) {
|
|
12
|
+
if (o[x] === null || o[x] === undefined) {
|
|
13
|
+
o[x] = [];
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
o[x] = [o[x]];
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
// TODO-LDML: #7569 the below regex works, but captures more than it should
|
|
21
|
+
// (it would include \u{fffffffffffffffff } which
|
|
22
|
+
// is overlong and has a space at the end.) The second regex does not work yet.
|
|
23
|
+
export const MATCH_HEX_ESCAPE = /\\u{([0-9a-fA-F ]{1,})}/g;
|
|
24
|
+
// const MATCH_HEX_ESCAPE = /\\u{((?:(?:[0-9a-fA-F]{1,5})|(?:10[0-9a-fA-F]{4})(?: (?!}))?)+)}/g;
|
|
25
|
+
/** regex for single quad escape such as \u0127 or \U00000000 */
|
|
26
|
+
export const CONTAINS_QUAD_ESCAPE = /(?:\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{8}))/;
|
|
27
|
+
/** regex for single quad escape such as \u0127 */
|
|
28
|
+
export const MATCH_QUAD_ESCAPE = new RegExp(CONTAINS_QUAD_ESCAPE, 'g');
|
|
29
|
+
export class UnescapeError extends Error {
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Unescape one codepoint
|
|
33
|
+
* @param hex one codepoint in hex, such as '0127'
|
|
34
|
+
* @returns the unescaped codepoint
|
|
35
|
+
*/
|
|
36
|
+
export function unescapeOne(hex) {
|
|
37
|
+
const codepoint = Number.parseInt(hex, 16);
|
|
38
|
+
return String.fromCodePoint(codepoint);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Unescape one single quad string such as \u0127 / \U00000000
|
|
42
|
+
* Throws exception if the string doesn't match MATCH_QUAD_ESCAPE
|
|
43
|
+
* Note this does not attempt to handle or reject surrogates.
|
|
44
|
+
* So, `\\uD838\\uDD09` will work but other combinations may not.
|
|
45
|
+
* @param s input string
|
|
46
|
+
* @returns output
|
|
47
|
+
*/
|
|
48
|
+
export function unescapeOneQuadString(s) {
|
|
49
|
+
if (!s || !s.match(MATCH_QUAD_ESCAPE)) {
|
|
50
|
+
throw new UnescapeError(`Not a quad escape: ${s}`);
|
|
51
|
+
}
|
|
52
|
+
function processMatch(str, m16, m32) {
|
|
53
|
+
return unescapeOne(m16 || m32); // either \u or \U
|
|
54
|
+
}
|
|
55
|
+
s = s.replace(MATCH_QUAD_ESCAPE, processMatch);
|
|
56
|
+
return s;
|
|
57
|
+
}
|
|
58
|
+
/** unscape multiple occurences of \u0127 style strings */
|
|
59
|
+
export function unescapeQuadString(s) {
|
|
60
|
+
s = s.replaceAll(MATCH_QUAD_ESCAPE, (quad) => unescapeOneQuadString(quad));
|
|
61
|
+
return s;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Unescapes a string according to UTS#18§1.1, see <https://www.unicode.org/reports/tr18/#Hex_notation>
|
|
65
|
+
* @param s escaped string
|
|
66
|
+
* @returns
|
|
67
|
+
*/
|
|
68
|
+
export function unescapeString(s) {
|
|
69
|
+
if (!s) {
|
|
70
|
+
return s;
|
|
71
|
+
}
|
|
72
|
+
try {
|
|
73
|
+
/**
|
|
74
|
+
* process one regex match
|
|
75
|
+
* @param str ignored
|
|
76
|
+
* @param matched the entire match such as '0127' or '22 22'
|
|
77
|
+
* @returns the unescaped match
|
|
78
|
+
*/
|
|
79
|
+
function processMatch(str, matched) {
|
|
80
|
+
const codepoints = matched.split(' ');
|
|
81
|
+
const unescaped = codepoints.map(unescapeOne);
|
|
82
|
+
return unescaped.join('');
|
|
83
|
+
}
|
|
84
|
+
s = s.replaceAll(MATCH_HEX_ESCAPE, processMatch);
|
|
85
|
+
}
|
|
86
|
+
catch (e) {
|
|
87
|
+
if (e instanceof RangeError) {
|
|
88
|
+
throw new UnescapeError(`Out of range while unescaping '${s}': ${e.message}`, { cause: e });
|
|
89
|
+
/* c8 ignore next 3 */
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
throw e; // pass through some other error
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return s;
|
|
96
|
+
}
|
|
97
|
+
/** 0000 … FFFF */
|
|
98
|
+
export function hexQuad(n) {
|
|
99
|
+
if (n < 0x0000 || n > 0xFFFF) {
|
|
100
|
+
throw RangeError(`${n} not in [0x0000,0xFFFF]`);
|
|
101
|
+
}
|
|
102
|
+
return n.toString(16).padStart(4, '0');
|
|
103
|
+
}
|
|
104
|
+
/** 00000000 … FFFFFFFF */
|
|
105
|
+
export function hexOcts(n) {
|
|
106
|
+
if (n < 0x0000 || n > 0xFFFFFFFF) {
|
|
107
|
+
throw RangeError(`${n} not in [0x00000000,0xFFFFFFFF]`);
|
|
108
|
+
}
|
|
109
|
+
return n.toString(16).padStart(8, '0');
|
|
110
|
+
}
|
|
111
|
+
/** escape one char for regex in \uXXXX form */
|
|
112
|
+
export function escapeRegexChar(ch) {
|
|
113
|
+
const code = ch.codePointAt(0);
|
|
114
|
+
if (code <= 0xFFFF) {
|
|
115
|
+
return '\\u' + hexQuad(code);
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
return '\\U' + hexOcts(code);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/** chars that must be escaped: syntax, C0 + C1 controls */
|
|
122
|
+
const REGEX_SYNTAX_CHAR = /^[\u0000-\u001F\u007F-\u009F{}\[\]\\?|.^$*()/+-]$/;
|
|
123
|
+
function escapeRegexCharIfSyntax(ch) {
|
|
124
|
+
// escape if syntax or not valid
|
|
125
|
+
if (REGEX_SYNTAX_CHAR.test(ch) || !isValidUnicode(ch.codePointAt(0))) {
|
|
126
|
+
return escapeRegexChar(ch);
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
return ch; // leave unescaped
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Unescape one codepoint to \u or \U format
|
|
134
|
+
* @param hex one codepoint in hex, such as '0127'
|
|
135
|
+
* @returns the unescaped codepoint
|
|
136
|
+
*/
|
|
137
|
+
function regexOne(hex) {
|
|
138
|
+
const unescaped = unescapeOne(hex);
|
|
139
|
+
// re-escape as 16 or 32 bit code units
|
|
140
|
+
return Array.from(unescaped).map(ch => escapeRegexCharIfSyntax(ch)).join('');
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Escape a string (\uxxxx form) if there are any problematic codepoints
|
|
144
|
+
*/
|
|
145
|
+
export function escapeStringForRegex(s) {
|
|
146
|
+
return s.split('').map(ch => escapeRegexCharIfSyntax(ch)).join('');
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Unescapes a string according to UTS#18§1.1, see <https://www.unicode.org/reports/tr18/#Hex_notation>
|
|
150
|
+
* @param s escaped string
|
|
151
|
+
* @returns
|
|
152
|
+
*/
|
|
153
|
+
export function unescapeStringToRegex(s) {
|
|
154
|
+
if (!s) {
|
|
155
|
+
return s;
|
|
156
|
+
}
|
|
157
|
+
try {
|
|
158
|
+
/**
|
|
159
|
+
* process one regex match
|
|
160
|
+
* @param str ignored
|
|
161
|
+
* @param matched the entire match such as '0127' or '22 22'
|
|
162
|
+
* @returns the unescaped match
|
|
163
|
+
*/
|
|
164
|
+
function processMatch(str, matched) {
|
|
165
|
+
const codepoints = matched.split(' ');
|
|
166
|
+
const unescaped = codepoints.map(regexOne);
|
|
167
|
+
return unescaped.join('');
|
|
168
|
+
}
|
|
169
|
+
s = s.replaceAll(MATCH_HEX_ESCAPE, processMatch);
|
|
170
|
+
}
|
|
171
|
+
catch (e) {
|
|
172
|
+
if (e instanceof RangeError) {
|
|
173
|
+
throw new UnescapeError(`Out of range while unescaping '${s}': ${e.message}`, { cause: e });
|
|
174
|
+
/* c8 ignore next 3 */
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
throw e; // pass through some other error
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return s;
|
|
181
|
+
}
|
|
182
|
+
/** True if this string *could* be a UTF-32 single char */
|
|
183
|
+
export function isOneChar(value) {
|
|
184
|
+
return [...value].length === 1;
|
|
185
|
+
}
|
|
186
|
+
export function toOneChar(value) {
|
|
187
|
+
if (!isOneChar(value)) {
|
|
188
|
+
throw Error(`Not a single char: ${value}`);
|
|
189
|
+
}
|
|
190
|
+
return value.codePointAt(0);
|
|
191
|
+
}
|
|
192
|
+
export function describeCodepoint(ch) {
|
|
193
|
+
let s;
|
|
194
|
+
const p = BadStringAnalyzer.getProblem(ch);
|
|
195
|
+
if (p != null) {
|
|
196
|
+
// for example: 'PUA (U+E010)'
|
|
197
|
+
s = p;
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
// for example: '"a" (U+61)'
|
|
201
|
+
s = `"${String.fromCodePoint(ch)}"`;
|
|
202
|
+
}
|
|
203
|
+
return `${s} (U+${Number(ch).toString(16).toUpperCase()})`;
|
|
204
|
+
}
|
|
205
|
+
export var BadStringType;
|
|
206
|
+
(function (BadStringType) {
|
|
207
|
+
BadStringType["pua"] = "PUA";
|
|
208
|
+
BadStringType["unassigned"] = "Unassigned";
|
|
209
|
+
BadStringType["illegal"] = "Illegal";
|
|
210
|
+
BadStringType["denormalized"] = "Denormalized";
|
|
211
|
+
})(BadStringType || (BadStringType = {}));
|
|
212
|
+
;
|
|
213
|
+
// Following from kmx_xstring.h / .cpp
|
|
214
|
+
const Uni_LEAD_SURROGATE_START = 0xD800;
|
|
215
|
+
const Uni_LEAD_SURROGATE_END = 0xDBFF;
|
|
216
|
+
const Uni_TRAIL_SURROGATE_START = 0xDC00;
|
|
217
|
+
const Uni_TRAIL_SURROGATE_END = 0xDFFF;
|
|
218
|
+
const Uni_SURROGATE_START = Uni_LEAD_SURROGATE_START;
|
|
219
|
+
const Uni_SURROGATE_END = Uni_TRAIL_SURROGATE_END;
|
|
220
|
+
const Uni_FD_NONCHARACTER_START = 0xFDD0;
|
|
221
|
+
const Uni_FD_NONCHARACTER_END = 0xFDEF;
|
|
222
|
+
const Uni_FFFE_NONCHARACTER = 0xFFFE;
|
|
223
|
+
const Uni_PLANE_MASK = 0x1F0000;
|
|
224
|
+
const Uni_MAX_CODEPOINT = 0x10FFFF;
|
|
225
|
+
// plane 0, 15, and 16 PUA
|
|
226
|
+
const Uni_PUA_00_START = 0xE000;
|
|
227
|
+
const Uni_PUA_00_END = 0xF8FF;
|
|
228
|
+
const Uni_PUA_15_START = 0x0F0000;
|
|
229
|
+
const Uni_PUA_15_END = 0x0FFFFD;
|
|
230
|
+
const Uni_PUA_16_START = 0x100000;
|
|
231
|
+
const Uni_PUA_16_END = 0x10FFFD;
|
|
232
|
+
/**
|
|
233
|
+
* @brief True if a lead surrogate
|
|
234
|
+
* \def Uni_IsSurrogate1
|
|
235
|
+
*/
|
|
236
|
+
function Uni_IsSurrogate1(ch) {
|
|
237
|
+
return ((ch) >= Uni_LEAD_SURROGATE_START && (ch) <= Uni_LEAD_SURROGATE_END);
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* @brief True if a trail surrogate
|
|
241
|
+
* \def Uni_IsSurrogate2
|
|
242
|
+
*/
|
|
243
|
+
function Uni_IsSurrogate2(ch) {
|
|
244
|
+
return ((ch) >= Uni_TRAIL_SURROGATE_START && (ch) <= Uni_TRAIL_SURROGATE_END);
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* @brief True if any surrogate
|
|
248
|
+
* \def UniIsSurrogate
|
|
249
|
+
*/
|
|
250
|
+
function Uni_IsSurrogate(ch) {
|
|
251
|
+
return (Uni_IsSurrogate1(ch) || Uni_IsSurrogate2(ch));
|
|
252
|
+
}
|
|
253
|
+
function Uni_IsEndOfPlaneNonCharacter(ch) {
|
|
254
|
+
return (((ch) & Uni_FFFE_NONCHARACTER) == Uni_FFFE_NONCHARACTER); // matches FFFF or FFFE
|
|
255
|
+
}
|
|
256
|
+
function Uni_IsNoncharacter(ch) {
|
|
257
|
+
return (((ch) >= Uni_FD_NONCHARACTER_START && (ch) <= Uni_FD_NONCHARACTER_END) || Uni_IsEndOfPlaneNonCharacter(ch));
|
|
258
|
+
}
|
|
259
|
+
function Uni_InCodespace(ch) {
|
|
260
|
+
return (ch >= 0 && ch <= Uni_MAX_CODEPOINT);
|
|
261
|
+
}
|
|
262
|
+
;
|
|
263
|
+
function Uni_IsValid1(ch) {
|
|
264
|
+
return (Uni_InCodespace(ch) && !Uni_IsSurrogate(ch) && !Uni_IsNoncharacter(ch));
|
|
265
|
+
}
|
|
266
|
+
export function isValidUnicode(start, end) {
|
|
267
|
+
if (!end) {
|
|
268
|
+
// single char
|
|
269
|
+
return Uni_IsValid1(start);
|
|
270
|
+
}
|
|
271
|
+
else if (!Uni_IsValid1(end) || !Uni_IsValid1(start) || (end < start)) {
|
|
272
|
+
// start or end out of range, or inverted range
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
else if ((start <= Uni_SURROGATE_END) && (end >= Uni_SURROGATE_START)) {
|
|
276
|
+
// contains some of the surrogate range
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
else if ((start <= Uni_FD_NONCHARACTER_END) && (end >= Uni_FD_NONCHARACTER_START)) {
|
|
280
|
+
// contains some of the noncharacter range
|
|
281
|
+
return false;
|
|
282
|
+
}
|
|
283
|
+
else if ((start & Uni_PLANE_MASK) != (end & Uni_PLANE_MASK)) {
|
|
284
|
+
// start and end are on different planes, meaning that the U+__FFFE/U+__FFFF noncharacters
|
|
285
|
+
// are contained.
|
|
286
|
+
// As a reminder, we already checked that start/end are themselves valid,
|
|
287
|
+
// so we know that 'end' is not on a noncharacter at end of plane.
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
return true;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
export function isPUA(ch) {
|
|
295
|
+
return ((ch >= Uni_PUA_00_START && ch <= Uni_PUA_00_END) ||
|
|
296
|
+
(ch >= Uni_PUA_15_START && ch <= Uni_PUA_15_END) ||
|
|
297
|
+
(ch >= Uni_PUA_16_START && ch <= Uni_PUA_16_END));
|
|
298
|
+
}
|
|
299
|
+
class BadStringMap extends Map {
|
|
300
|
+
toString() {
|
|
301
|
+
if (!this.size) {
|
|
302
|
+
return "{}";
|
|
303
|
+
}
|
|
304
|
+
return Array.from(this.entries()).map(([t, s]) => `${t}: ${Array.from(s.values()).map(describeCodepoint).join(' ')}`).join(', ');
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
/** abstract class for analyzing and categorizing strings */
|
|
308
|
+
export class StringAnalyzer {
|
|
309
|
+
/** add a string for analysis */
|
|
310
|
+
add(s) {
|
|
311
|
+
for (const c of [...s]) {
|
|
312
|
+
const ch = c.codePointAt(0);
|
|
313
|
+
const problem = this.analyzeCodePoint(c, ch);
|
|
314
|
+
if (problem) {
|
|
315
|
+
this.addProblem(ch, problem);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
/** internal interface for the result of an analysis */
|
|
320
|
+
addProblem(ch, type) {
|
|
321
|
+
if (!this.m.has(type)) {
|
|
322
|
+
this.m.set(type, new Set());
|
|
323
|
+
}
|
|
324
|
+
this.m.get(type).add(ch);
|
|
325
|
+
}
|
|
326
|
+
/** get the results of the analysis */
|
|
327
|
+
analyze() {
|
|
328
|
+
if (this.m.size == 0) {
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
else {
|
|
332
|
+
return this.m;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
/** internal map */
|
|
336
|
+
m = new BadStringMap();
|
|
337
|
+
}
|
|
338
|
+
/** analyze a string looking for bad unicode */
|
|
339
|
+
export class BadStringAnalyzer extends StringAnalyzer {
|
|
340
|
+
/** analyze one codepoint */
|
|
341
|
+
analyzeCodePoint(c, ch) {
|
|
342
|
+
return BadStringAnalyzer.getProblem(ch);
|
|
343
|
+
}
|
|
344
|
+
/** export analyzer function */
|
|
345
|
+
static getProblem(ch) {
|
|
346
|
+
if (!isValidUnicode(ch)) {
|
|
347
|
+
return BadStringType.illegal;
|
|
348
|
+
}
|
|
349
|
+
else if (isPUA(ch)) {
|
|
350
|
+
return BadStringType.pua;
|
|
351
|
+
}
|
|
352
|
+
else { // TODO-LDML: unassigned
|
|
353
|
+
return null;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
/** Analyzer that checks if something isn't NFD */
|
|
358
|
+
export class NFDAnalyzer extends StringAnalyzer {
|
|
359
|
+
analyzeCodePoint(c, ch) {
|
|
360
|
+
const nfd = c.normalize("NFD");
|
|
361
|
+
if (c !== nfd) {
|
|
362
|
+
return BadStringType.denormalized;
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
return null;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
369
|
//# sourceMappingURL=util.js.map
|
|
370
|
-
//# debugId=
|
|
370
|
+
//# debugId=5541fdef-16ab-5797-ab70-735266c3839e
|