@keymanapp/common-types 18.0.41-alpha → 18.0.46-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/src/consts/virtual-key-constants.d.ts +191 -191
- package/build/src/consts/virtual-key-constants.js +221 -221
- package/build/src/consts/virtual-key-constants.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.d.ts +5 -5
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.js +72 -72
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-reader.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.d.ts +28 -28
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.js +120 -120
- package/build/src/keyman-touch-layout/keyman-touch-layout-file-writer.js.map +1 -1
- package/build/src/keyman-touch-layout/keyman-touch-layout-file.d.ts +95 -95
- package/build/src/keyman-touch-layout/keyman-touch-layout-file.js +52 -52
- package/build/src/kmx/element-string.d.ts +23 -23
- package/build/src/kmx/element-string.js +125 -125
- package/build/src/kmx/element-string.js.map +1 -1
- package/build/src/kmx/keyman-targets.d.ts +20 -20
- package/build/src/kmx/keyman-targets.js +79 -79
- package/build/src/kmx/keyman-targets.js.map +1 -1
- package/build/src/kmx/kmx-builder.d.ts +32 -32
- package/build/src/kmx/kmx-builder.js +189 -189
- package/build/src/kmx/kmx-builder.js.map +1 -1
- package/build/src/kmx/kmx-file-reader.d.ts +13 -13
- package/build/src/kmx/kmx-file-reader.js +134 -134
- package/build/src/kmx/kmx-file-reader.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-disp.d.ts +18 -18
- package/build/src/kmx/kmx-plus-builder/build-disp.js +27 -27
- package/build/src/kmx/kmx-plus-builder/build-disp.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-elem.d.ts +28 -28
- package/build/src/kmx/kmx-plus-builder/build-elem.js +89 -89
- package/build/src/kmx/kmx-plus-builder/build-elem.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-keys.d.ts +56 -56
- package/build/src/kmx/kmx-plus-builder/build-keys.js +107 -107
- package/build/src/kmx/kmx-plus-builder/build-keys.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-layr.d.ts +53 -53
- package/build/src/kmx/kmx-plus-builder/build-layr.js +101 -101
- package/build/src/kmx/kmx-plus-builder/build-layr.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-list.d.ts +35 -35
- package/build/src/kmx/kmx-plus-builder/build-list.js +66 -66
- package/build/src/kmx/kmx-plus-builder/build-list.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-loca.d.ts +11 -11
- package/build/src/kmx/kmx-plus-builder/build-loca.js +21 -21
- package/build/src/kmx/kmx-plus-builder/build-loca.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-meta.d.ts +16 -16
- package/build/src/kmx/kmx-plus-builder/build-meta.js +20 -20
- package/build/src/kmx/kmx-plus-builder/build-sect.d.ts +15 -15
- package/build/src/kmx/kmx-plus-builder/build-sect.js +15 -15
- package/build/src/kmx/kmx-plus-builder/build-sect.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-strs.d.ts +22 -22
- package/build/src/kmx/kmx-plus-builder/build-strs.js +46 -46
- package/build/src/kmx/kmx-plus-builder/build-strs.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-tran.d.ts +32 -32
- package/build/src/kmx/kmx-plus-builder/build-tran.js +69 -69
- package/build/src/kmx/kmx-plus-builder/build-tran.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-uset.d.ts +30 -30
- package/build/src/kmx/kmx-plus-builder/build-uset.js +60 -60
- package/build/src/kmx/kmx-plus-builder/build-uset.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/build-vars.d.ts +21 -21
- package/build/src/kmx/kmx-plus-builder/build-vars.js +48 -48
- package/build/src/kmx/kmx-plus-builder/build-vars.js.map +1 -1
- package/build/src/kmx/kmx-plus-builder/builder-section.d.ts +9 -9
- package/build/src/kmx/kmx-plus-builder/builder-section.js +2 -2
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.d.ts +42 -42
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.js +137 -137
- package/build/src/kmx/kmx-plus-builder/kmx-plus-builder.js.map +1 -1
- package/build/src/kmx/kmx-plus.d.ts +322 -322
- package/build/src/kmx/kmx-plus.js +830 -830
- package/build/src/kmx/kmx-plus.js.map +1 -1
- package/build/src/kmx/kmx.d.ts +233 -233
- package/build/src/kmx/kmx.js +302 -302
- package/build/src/kmx/kmx.js.map +1 -1
- package/build/src/kmx/string-list.d.ts +35 -35
- package/build/src/kmx/string-list.js +88 -88
- package/build/src/kmx/string-list.js.map +1 -1
- package/build/src/kpj/keyman-developer-project.d.ts +78 -78
- package/build/src/kpj/keyman-developer-project.js +195 -195
- package/build/src/kpj/keyman-developer-project.js.map +1 -1
- package/build/src/kpj/kpj-file-reader.d.ts +18 -18
- package/build/src/kpj/kpj-file-reader.js +119 -119
- package/build/src/kpj/kpj-file-reader.js.map +1 -1
- package/build/src/kpj/kpj-file.d.ts +39 -39
- package/build/src/kpj/kpj-file.js +10 -10
- package/build/src/kvk/kvk-file-reader.d.ts +4 -4
- package/build/src/kvk/kvk-file-reader.js +37 -37
- package/build/src/kvk/kvk-file-reader.js.map +1 -1
- package/build/src/kvk/kvk-file-writer.d.ts +19 -19
- package/build/src/kvk/kvk-file-writer.js +77 -77
- package/build/src/kvk/kvk-file-writer.js.map +1 -1
- package/build/src/kvk/kvk-file.d.ts +66 -66
- package/build/src/kvk/kvk-file.d.ts.map +1 -1
- package/build/src/kvk/kvk-file.js +90 -90
- package/build/src/kvk/kvk-file.js.map +1 -1
- package/build/src/kvk/kvks-file-reader.d.ts +23 -23
- package/build/src/kvk/kvks-file-reader.js +175 -175
- package/build/src/kvk/kvks-file-reader.js.map +1 -1
- package/build/src/kvk/kvks-file-writer.d.ts +6 -6
- package/build/src/kvk/kvks-file-writer.js +106 -106
- package/build/src/kvk/kvks-file-writer.js.map +1 -1
- package/build/src/kvk/kvks-file.d.ts +43 -43
- package/build/src/kvk/kvks-file.js +9 -9
- package/build/src/kvk/visual-keyboard.d.ts +44 -44
- package/build/src/kvk/visual-keyboard.js +75 -75
- package/build/src/kvk/visual-keyboard.js.map +1 -1
- package/build/src/ldml-keyboard/ldml-keyboard-testdata-xml.d.ts +60 -60
- package/build/src/ldml-keyboard/ldml-keyboard-testdata-xml.js +19 -19
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.d.ts +95 -95
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.js +409 -409
- package/build/src/ldml-keyboard/ldml-keyboard-xml-reader.js.map +1 -1
- package/build/src/ldml-keyboard/ldml-keyboard-xml.d.ts +170 -170
- package/build/src/ldml-keyboard/ldml-keyboard-xml.js +55 -55
- package/build/src/ldml-keyboard/pattern-parser.d.ts +201 -201
- package/build/src/ldml-keyboard/pattern-parser.js +490 -490
- package/build/src/ldml-keyboard/pattern-parser.js.map +1 -1
- package/build/src/ldml-keyboard/unicodeset-parser-api.d.ts +37 -37
- package/build/src/ldml-keyboard/unicodeset-parser-api.js +27 -27
- package/build/src/main.d.ts +33 -33
- package/build/src/main.js +32 -32
- package/build/src/package/kmp-json-file.d.ts +115 -115
- package/build/src/package/kmp-json-file.js +1 -1
- package/build/src/package/kps-file.d.ts +175 -175
- package/build/src/package/kps-file.js +15 -15
- package/build/src/schema-validators.d.ts +13 -13
- package/build/src/schema-validators.js +25 -25
- package/build/src/schemas/displaymap.schema.d.ts +57 -57
- package/build/src/schemas/displaymap.schema.js +35 -35
- package/build/src/schemas/displaymap.schema.validator.d.mts +2 -2
- package/build/src/schemas/displaymap.schema.validator.mjs +266 -266
- package/build/src/schemas/displaymap.schema.validator.mjs.map +1 -1
- package/build/src/schemas/keyboard_info.schema.d.ts +209 -209
- package/build/src/schemas/keyboard_info.schema.js +98 -98
- package/build/src/schemas/keyboard_info.schema.validator.d.mts +2 -2
- package/build/src/schemas/keyboard_info.schema.validator.mjs +1160 -1160
- package/build/src/schemas/keyboard_info.schema.validator.mjs.map +1 -1
- package/build/src/schemas/keyman-touch-layout.clean.spec.d.ts +227 -227
- package/build/src/schemas/keyman-touch-layout.clean.spec.js +151 -151
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.d.mts +2 -2
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.mjs +1094 -1094
- package/build/src/schemas/keyman-touch-layout.clean.spec.validator.mjs.map +1 -1
- package/build/src/schemas/keyman-touch-layout.spec.d.ts +229 -229
- package/build/src/schemas/keyman-touch-layout.spec.js +140 -140
- package/build/src/schemas/keyman-touch-layout.spec.validator.d.mts +2 -2
- package/build/src/schemas/keyman-touch-layout.spec.validator.mjs +1436 -1436
- package/build/src/schemas/keyman-touch-layout.spec.validator.mjs.map +1 -1
- package/build/src/schemas/kmp.schema.d.ts +324 -324
- package/build/src/schemas/kmp.schema.js +353 -353
- package/build/src/schemas/kmp.schema.validator.d.mts +2 -2
- package/build/src/schemas/kmp.schema.validator.mjs +1627 -1627
- package/build/src/schemas/kmp.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kpj-9.0.schema.d.ts +152 -152
- package/build/src/schemas/kpj-9.0.schema.js +131 -131
- package/build/src/schemas/kpj-9.0.schema.validator.d.mts +2 -2
- package/build/src/schemas/kpj-9.0.schema.validator.mjs +655 -655
- package/build/src/schemas/kpj-9.0.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kpj.schema.d.ts +122 -122
- package/build/src/schemas/kpj.schema.js +129 -129
- package/build/src/schemas/kpj.schema.validator.d.mts +2 -2
- package/build/src/schemas/kpj.schema.validator.mjs +515 -515
- package/build/src/schemas/kpj.schema.validator.mjs.map +1 -1
- package/build/src/schemas/kvks.schema.d.ts +160 -160
- package/build/src/schemas/kvks.schema.js +182 -182
- package/build/src/schemas/kvks.schema.validator.d.mts +2 -2
- package/build/src/schemas/kvks.schema.validator.mjs +625 -625
- package/build/src/schemas/kvks.schema.validator.mjs.map +1 -1
- package/build/src/schemas/ldml-keyboard3.schema.d.ts +635 -635
- package/build/src/schemas/ldml-keyboard3.schema.js +696 -696
- package/build/src/schemas/ldml-keyboard3.schema.validator.d.mts +2 -2
- package/build/src/schemas/ldml-keyboard3.schema.validator.mjs +3117 -3117
- package/build/src/schemas/ldml-keyboard3.schema.validator.mjs.map +1 -1
- package/build/src/schemas/ldml-keyboardtest3.schema.d.ts +192 -192
- package/build/src/schemas/ldml-keyboardtest3.schema.js +225 -225
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.d.mts +2 -2
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.mjs +744 -744
- package/build/src/schemas/ldml-keyboardtest3.schema.validator.mjs.map +1 -1
- package/build/src/schemas.d.ts +2300 -2300
- package/build/src/schemas.js +23 -23
- package/build/src/util/common-events.d.ts +44 -44
- package/build/src/util/common-events.js +26 -26
- package/build/src/util/compiler-interfaces.d.ts +371 -371
- package/build/src/util/compiler-interfaces.d.ts.map +1 -1
- package/build/src/util/compiler-interfaces.js +380 -380
- package/build/src/util/compiler-interfaces.js.map +1 -1
- package/build/src/util/errors.d.ts +5 -5
- package/build/src/util/errors.js +5 -5
- package/build/src/util/file-types.d.ts +93 -93
- package/build/src/util/file-types.d.ts.map +1 -1
- package/build/src/util/file-types.js +142 -142
- package/build/src/util/file-types.js.map +1 -1
- package/build/src/util/util.d.ts +98 -98
- package/build/src/util/util.js +368 -368
- package/build/src/util/util.js.map +1 -1
- package/package.json +3 -3
|
@@ -1,492 +1,492 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Utilities for transform and marker processing
|
|
3
|
-
*/
|
|
1
|
+
/**
|
|
2
|
+
* Utilities for transform and marker processing
|
|
3
|
+
*/
|
|
4
4
|
|
|
5
|
-
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="
|
|
6
|
-
import { constants } from "@keymanapp/ldml-keyboard-constants";
|
|
7
|
-
import { MATCH_QUAD_ESCAPE, isOneChar, unescapeOneQuadString, unescapeString, hexQuad } from "../util/util.js";
|
|
8
|
-
/**
|
|
9
|
-
* Helper function for extracting matched items
|
|
10
|
-
* @param str input string
|
|
11
|
-
* @param match global RegEx to use
|
|
12
|
-
* @returns array of matched values
|
|
13
|
-
*/
|
|
14
|
-
function matchArray(str, match) {
|
|
15
|
-
const refs = (str || '').matchAll(match);
|
|
16
|
-
return Array.from(refs).map(r => r[1]);
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* Common regex for an ID
|
|
20
|
-
*/
|
|
21
|
-
const COMMON_ID = /^[0-9A-Za-z_]{1,32}$/;
|
|
22
|
-
/**
|
|
23
|
-
* Class for helping with markers
|
|
24
|
-
*/
|
|
25
|
-
export class MarkerParser {
|
|
26
|
-
/**
|
|
27
|
-
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
|
|
28
|
-
*/
|
|
29
|
-
static ID = COMMON_ID;
|
|
30
|
-
/**
|
|
31
|
-
* Special marker reference referring to any marker
|
|
32
|
-
*/
|
|
33
|
-
static ANY_MARKER = '\\m{.}';
|
|
34
|
-
/**
|
|
35
|
-
* id of the 'any' marker
|
|
36
|
-
*/
|
|
37
|
-
static ANY_MARKER_ID = '.';
|
|
38
|
-
/**
|
|
39
|
-
* Marker sentinel as a string - U+FFFF
|
|
40
|
-
*/
|
|
41
|
-
static SENTINEL = String.fromCodePoint(constants.uc_sentinel);
|
|
42
|
-
/** Marker sentinel as a regex match */
|
|
43
|
-
static SENTINEL_MATCH = '\\u' + hexQuad(constants.uc_sentinel);
|
|
44
|
-
/**
|
|
45
|
-
* Marker code as a string - U+0008
|
|
46
|
-
*/
|
|
47
|
-
static MARKER_CODE = String.fromCodePoint(constants.marker_code);
|
|
48
|
-
/** Marker code as a regex match */
|
|
49
|
-
static MARKER_CODE_MATCH = '\\u' + hexQuad(constants.marker_code);
|
|
50
|
-
/** Minimum ID (trailing code unit) */
|
|
51
|
-
static MIN_MARKER_INDEX = constants.marker_min_index;
|
|
52
|
-
/** Index meaning 'any marker' == `\m{.}` */
|
|
53
|
-
static ANY_MARKER_INDEX = constants.marker_any_index;
|
|
54
|
-
/** Maximum usable marker index */
|
|
55
|
-
static MAX_MARKER_INDEX = constants.marker_max_index;
|
|
56
|
-
/** Max count of markers */
|
|
57
|
-
static MAX_MARKER_COUNT = constants.marker_max_count;
|
|
58
|
-
static anyMarkerMatch() {
|
|
59
|
-
const start = hexQuad(MarkerParser.MIN_MARKER_INDEX);
|
|
60
|
-
const end = hexQuad(MarkerParser.MAX_MARKER_INDEX);
|
|
61
|
-
return `${MarkerParser.SENTINEL_MATCH}${MarkerParser.MARKER_CODE_MATCH}[\\u${start}-\\u${end}]`; // TODO-LDML: #9121 wrong escape format
|
|
62
|
-
}
|
|
63
|
-
/** Expression that matches any marker */
|
|
64
|
-
static ANY_MARKER_MATCH = MarkerParser.anyMarkerMatch();
|
|
65
|
-
/**
|
|
66
|
-
* Pattern for matching a marker reference, OR the special marker \m{.}
|
|
67
|
-
*/
|
|
68
|
-
static REFERENCE = /\\m{([0-9A-Za-z_]{1,32}|\.)}/g;
|
|
69
|
-
/**
|
|
70
|
-
* parse a string into marker references
|
|
71
|
-
* @param str input string such as "\m{a} … \m{.}"
|
|
72
|
-
* @returns `[]` or an array of all markers referenced
|
|
73
|
-
*/
|
|
74
|
-
static allReferences(str) {
|
|
75
|
-
if (!str) {
|
|
76
|
-
return [];
|
|
77
|
-
}
|
|
78
|
-
return matchArray(str, MarkerParser.REFERENCE);
|
|
79
|
-
}
|
|
80
|
-
static markerCodeToString(n, forMatch) {
|
|
81
|
-
if (!forMatch) {
|
|
82
|
-
return String.fromCharCode(n);
|
|
83
|
-
}
|
|
84
|
-
else {
|
|
85
|
-
return `\\u${hexQuad(n)}`; // TODO-LDML: #9121 wrong escape format
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
/** @returns string for marker #n */
|
|
89
|
-
static markerOutput(n, forMatch) {
|
|
90
|
-
if (n < MarkerParser.MIN_MARKER_INDEX || n > MarkerParser.ANY_MARKER_INDEX) {
|
|
91
|
-
throw RangeError(`Internal Error: marker index out of range ${n}`);
|
|
92
|
-
}
|
|
93
|
-
if (forMatch) {
|
|
94
|
-
return MarkerParser.SENTINEL_MATCH + MarkerParser.MARKER_CODE_MATCH + MarkerParser.markerCodeToString(n, forMatch);
|
|
95
|
-
}
|
|
96
|
-
else {
|
|
97
|
-
return MarkerParser.SENTINEL + MarkerParser.MARKER_CODE + MarkerParser.markerCodeToString(n, forMatch);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
/** @returns all marker strings as sentinel values */
|
|
101
|
-
static toSentinelString(s, markers, forMatch) {
|
|
102
|
-
if (!s)
|
|
103
|
-
return s;
|
|
104
|
-
return s.replaceAll(MarkerParser.REFERENCE, (sub, arg) => {
|
|
105
|
-
if (arg === MarkerParser.ANY_MARKER_ID) {
|
|
106
|
-
if (forMatch) {
|
|
107
|
-
return MarkerParser.ANY_MARKER_MATCH;
|
|
108
|
-
}
|
|
109
|
-
return MarkerParser.markerOutput(MarkerParser.ANY_MARKER_INDEX);
|
|
110
|
-
}
|
|
111
|
-
if (!markers) {
|
|
112
|
-
throw RangeError(`Internal Error: Could not find marker \\m{${arg}} (no markers defined)`);
|
|
113
|
-
}
|
|
114
|
-
const order = markers.getItemOrder(arg);
|
|
115
|
-
if (order === -1) {
|
|
116
|
-
throw RangeError(`Internal Error: Could not find marker \\m{${arg}}`);
|
|
117
|
-
}
|
|
118
|
-
else if (order > MarkerParser.MAX_MARKER_INDEX) {
|
|
119
|
-
throw RangeError(`Internal Error: marker \\m{${arg}} has out of range index ${order}`);
|
|
120
|
-
}
|
|
121
|
-
else {
|
|
122
|
-
return MarkerParser.markerOutput(order + 1, forMatch);
|
|
123
|
-
}
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* NFD a string, respecting markers.
|
|
128
|
-
* @param s input string
|
|
129
|
-
* @param forMatch true if regex, false if individual
|
|
130
|
-
* @returns the normalized string
|
|
131
|
-
*/
|
|
132
|
-
static nfd_markers(s, forMatch) {
|
|
133
|
-
const m = [];
|
|
134
|
-
return this.nfd_markers_segment(s, m, forMatch);
|
|
135
|
-
}
|
|
136
|
-
/**
|
|
137
|
-
* NFD a safe subset of a string, respecting markers
|
|
138
|
-
* @param s input string
|
|
139
|
-
* @param map output array of marker chars
|
|
140
|
-
* @param forMatch true if used for regexes
|
|
141
|
-
* @returns the updated string
|
|
142
|
-
*/
|
|
143
|
-
static nfd_markers_segment(s, map, forMatch) {
|
|
144
|
-
// remove (and parse) the markers first
|
|
145
|
-
const str_unmarked = MarkerParser.remove_markers(s, map, forMatch);
|
|
146
|
-
// then, NFD the normalized string
|
|
147
|
-
const str_unmarked_nfd = str_unmarked.normalize("NFD");
|
|
148
|
-
if (map.length == 0) {
|
|
149
|
-
// no markers, so we can safely return the normalized unmarked string
|
|
150
|
-
return str_unmarked_nfd;
|
|
151
|
-
}
|
|
152
|
-
else if (str_unmarked_nfd === str_unmarked) {
|
|
153
|
-
// normalization didn't shuffle anything, so it's entirely a no-op.
|
|
154
|
-
return s;
|
|
155
|
-
}
|
|
156
|
-
else {
|
|
157
|
-
// we had markers AND the normalization made a difference.
|
|
158
|
-
// add the markers back per the map, and return
|
|
159
|
-
return MarkerParser.add_back_markers(str_unmarked_nfd, map, forMatch);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
/** return the string s but with a marker sequence before it */
|
|
163
|
-
static prepend_marker(s, marker, forMatch) {
|
|
164
|
-
if (forMatch && marker === constants.marker_any_index) {
|
|
165
|
-
return MarkerParser.ANY_MARKER_MATCH + s;
|
|
166
|
-
}
|
|
167
|
-
else {
|
|
168
|
-
return MarkerParser.markerOutput(marker, forMatch) + s;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
/**
|
|
172
|
-
* Add back all markers in the map to the string
|
|
173
|
-
* @param s input string
|
|
174
|
-
* @param map output: the marker map
|
|
175
|
-
* @param forMatch if true, use regex format
|
|
176
|
-
*/
|
|
177
|
-
static add_back_markers(s, map, forMatch) {
|
|
178
|
-
// quick check: if no string, or no map: nothing to do
|
|
179
|
-
if (!s || !map?.length) {
|
|
180
|
-
return s;
|
|
181
|
-
}
|
|
182
|
-
/** output string */
|
|
183
|
-
let out = '';
|
|
184
|
-
/** for checking: the total number of markers expected, skipping end markers */
|
|
185
|
-
const max_markers = map.filter(({ end }) => !end).length;
|
|
186
|
-
/** for checking: the number of markers we've written */
|
|
187
|
-
let written_markers = 0;
|
|
188
|
-
/** we are going to mutate the map, so copy it */
|
|
189
|
-
const map2 = [...map]; // make a copy
|
|
190
|
-
// First, add back all 'MARKER_BEFORE_EOT' markers
|
|
191
|
-
while (map2.length && map2[map2.length - 1].ch === MARKER_BEFORE_EOT) {
|
|
192
|
-
// remove from list
|
|
193
|
-
const { marker, end } = map2.pop();
|
|
194
|
-
if (!end) {
|
|
195
|
-
out = MarkerParser.prepend_marker(out, marker, forMatch);
|
|
196
|
-
written_markers++;
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
// Then, take each codepoint (from back to front)
|
|
200
|
-
for (let p of [...s].reverse()) {
|
|
201
|
-
// reverse order code units, prepend to out
|
|
202
|
-
out = p + out;
|
|
203
|
-
for (let i = map2.length - 1; i >= 0; i--) {
|
|
204
|
-
const { ch, marker, processed, end } = map2[i];
|
|
205
|
-
if (ch === p && !processed) {
|
|
206
|
-
map2[i].processed = true; // mark as processed
|
|
207
|
-
if (end) {
|
|
208
|
-
break; // exit loop
|
|
209
|
-
}
|
|
210
|
-
else {
|
|
211
|
-
out = MarkerParser.prepend_marker(out, marker, forMatch);
|
|
212
|
-
written_markers++;
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
else if (map2[map2.length - 1]?.processed) {
|
|
216
|
-
// keep the list as short as possible
|
|
217
|
-
map2.pop();
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
// validate that we consumed all markers
|
|
222
|
-
if (written_markers !== max_markers) {
|
|
223
|
-
throw Error(`Internal Error: should have written ${max_markers} markers but only wrote ${written_markers}`);
|
|
224
|
-
}
|
|
225
|
-
return out;
|
|
226
|
-
}
|
|
227
|
-
/**
|
|
228
|
-
* Remove (and parse) markers from a string
|
|
229
|
-
* @param s input string
|
|
230
|
-
* @param map output map containing marker locations
|
|
231
|
-
* @param forMatch true if regex
|
|
232
|
-
* @returns the original string, without any markers
|
|
233
|
-
*/
|
|
234
|
-
static remove_markers(s, map, forMatch) {
|
|
235
|
-
/** accumulated output */
|
|
236
|
-
let out = '';
|
|
237
|
-
/** array of marker ids in order waiting to be added */
|
|
238
|
-
let last_markers = [];
|
|
239
|
-
/** input string, split into codepoint runs */
|
|
240
|
-
let a = [...s];
|
|
241
|
-
/** were any markers found? */
|
|
242
|
-
let had_markers = false;
|
|
243
|
-
/**
|
|
244
|
-
* subfunc: add all markers in the pending (last_markers) queue
|
|
245
|
-
* @param l string the marker is 'glued' to, or '' for end
|
|
246
|
-
*/
|
|
247
|
-
function add_pending_markers(l) {
|
|
248
|
-
// first char, or, marker-before-eot
|
|
249
|
-
const glueChars = (l === '') ? [MARKER_BEFORE_EOT] : [...(l.normalize("NFD"))];
|
|
250
|
-
const glue = glueChars[0];
|
|
251
|
-
// push the 'end' value
|
|
252
|
-
map.push({ ch: glue, end: true });
|
|
253
|
-
while (last_markers.length) {
|
|
254
|
-
const marker = last_markers[0];
|
|
255
|
-
last_markers = last_markers.slice(1); // pop from front
|
|
256
|
-
map.push({ ch: glue, marker });
|
|
257
|
-
}
|
|
258
|
-
// now, push the rest of the glue chars as an NFD sequence.
|
|
259
|
-
// For example, `\m{m}\u0344` will create the following stream:
|
|
260
|
-
// { ch: 0308, end: true}
|
|
261
|
-
// { ch: 0308, marker: 1}
|
|
262
|
-
// { ch: 0301, end: true} // added because of decomp
|
|
263
|
-
for (const ch of glueChars.slice(1)) {
|
|
264
|
-
map.push({ ch, end: true });
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
// iterate until the codepoint list is empty
|
|
268
|
-
while (a.length > 0) {
|
|
269
|
-
// does 'a' begin with a marker?
|
|
270
|
-
const p = MarkerParser.parse_next_marker(a.join(''), forMatch);
|
|
271
|
-
if (!p?.match) {
|
|
272
|
-
// no match
|
|
273
|
-
add_pending_markers(a[0]); // add any pending markers
|
|
274
|
-
out = out + a[0]; // add the non-marker text to the buffer
|
|
275
|
-
a = a.slice(1); // move forward 1 codepoint
|
|
276
|
-
}
|
|
277
|
-
else {
|
|
278
|
-
// found a marker
|
|
279
|
-
had_markers = true;
|
|
280
|
-
const { marker, match } = p;
|
|
281
|
-
if ((marker == constants.marker_any_index) ||
|
|
282
|
-
(marker >= constants.marker_min_index && marker <= constants.marker_max_index)) {
|
|
283
|
-
last_markers.push(marker);
|
|
284
|
-
}
|
|
285
|
-
else {
|
|
286
|
-
throw RangeError(`String contained out-of-range marker ${marker}: '${s}'`);
|
|
287
|
-
}
|
|
288
|
-
a = a.slice([...match].length); // move forward over matched marker
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
// add any remaining markers at the end of the string
|
|
292
|
-
add_pending_markers('');
|
|
293
|
-
if (!had_markers) {
|
|
294
|
-
// no markers were found. clear out the map.
|
|
295
|
-
map = [];
|
|
296
|
-
}
|
|
297
|
-
return out;
|
|
298
|
-
}
|
|
299
|
-
/**
|
|
300
|
-
* analyze the string to see if it begins with a marker
|
|
301
|
-
* @param s input string
|
|
302
|
-
* @param forMatch true if regex
|
|
303
|
-
* @returns parsed marker details
|
|
304
|
-
*/
|
|
305
|
-
static parse_next_marker(s, forMatch) {
|
|
306
|
-
if (!forMatch) {
|
|
307
|
-
// plain
|
|
308
|
-
const m = s.match(PARSE_SENTINEL_MARKER);
|
|
309
|
-
if (m) {
|
|
310
|
-
// full string matched
|
|
311
|
-
const match = m[0];
|
|
312
|
-
// extract the marker number
|
|
313
|
-
const marker = match.codePointAt(2);
|
|
314
|
-
return ({ match, marker });
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
else {
|
|
318
|
-
// regex
|
|
319
|
-
const m = s.match(PARSE_REGEX_MARKER);
|
|
320
|
-
if (m) {
|
|
321
|
-
// full string
|
|
322
|
-
const match = m[0];
|
|
323
|
-
// hex digit (if a single)
|
|
324
|
-
const single = m[1];
|
|
325
|
-
if (single) {
|
|
326
|
-
return ({ match, marker: Number.parseInt(single.substring(3), 16) });
|
|
327
|
-
}
|
|
328
|
-
else {
|
|
329
|
-
// it's a range, so it's an any match
|
|
330
|
-
return ({ match, marker: constants.marker_any_index });
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
return null;
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
;
|
|
338
|
-
/** special noncharacter value denoting end of string */
|
|
339
|
-
export const MARKER_BEFORE_EOT = '\ufffe';
|
|
340
|
-
/** matcher for a sentinel */
|
|
341
|
-
const PARSE_SENTINEL_MARKER = new RegExp(`^${MarkerParser.ANY_MARKER_MATCH}`);
|
|
342
|
-
/** matcher for a regex marker, either single or any */
|
|
343
|
-
const PARSE_REGEX_MARKER = /^\\uffff\\u0008(?:(\\u[0-9a-fA-F]{4})|(\[\\u[0-9a-fA-F]{4}-\\u[0-9a-fA-F]{4}\]))/;
|
|
344
|
-
;
|
|
345
|
-
;
|
|
346
|
-
/**
|
|
347
|
-
* Class for helping with markers
|
|
348
|
-
*/
|
|
349
|
-
export class VariableParser {
|
|
350
|
-
/**
|
|
351
|
-
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
|
|
352
|
-
*/
|
|
353
|
-
static ID = COMMON_ID;
|
|
354
|
-
/**
|
|
355
|
-
* Pattern for matching a string reference `$(str)`
|
|
356
|
-
*/
|
|
357
|
-
static STRING_REFERENCE = /\${([0-9A-Za-z_]{1,32})}/g;
|
|
358
|
-
/**
|
|
359
|
-
* Pattern for matching a set reference `$[set]`
|
|
360
|
-
*/
|
|
361
|
-
static SET_REFERENCE = /\$\[([0-9A-Za-z_]{1,32})\]/g;
|
|
362
|
-
/**
|
|
363
|
-
* Pattern for matching a capture set reference `($[set])`
|
|
364
|
-
*/
|
|
365
|
-
static CAPTURE_SET_REFERENCE = /\(\$\[([0-9A-Za-z_]{1,32})\]\)/;
|
|
366
|
-
/**
|
|
367
|
-
* `$[1:variable]`
|
|
368
|
-
* This regex matches the whole string.
|
|
369
|
-
*/
|
|
370
|
-
static MAPPED_SET_REFERENCE = /^\$\[1:([0-9A-Za-z_]{1,32})\]$/;
|
|
371
|
-
/**
|
|
372
|
-
* parse a string into references
|
|
373
|
-
* @param str input string
|
|
374
|
-
* @returns `[]` or an array of all string references referenced
|
|
375
|
-
*/
|
|
376
|
-
static allStringReferences(str) {
|
|
377
|
-
return matchArray(str, VariableParser.STRING_REFERENCE);
|
|
378
|
-
}
|
|
379
|
-
/**
|
|
380
|
-
* parse a string into references
|
|
381
|
-
* @param str input string
|
|
382
|
-
* @returns `[]` or an array of all string references referenced
|
|
383
|
-
*/
|
|
384
|
-
static allSetReferences(str) {
|
|
385
|
-
return matchArray(str, VariableParser.SET_REFERENCE);
|
|
386
|
-
}
|
|
387
|
-
/**
|
|
388
|
-
* Split an input string into a proper set
|
|
389
|
-
* @param str input string
|
|
390
|
-
* @returns
|
|
391
|
-
*/
|
|
392
|
-
static setSplitter(str) {
|
|
393
|
-
const s = str?.trim();
|
|
394
|
-
if (!s)
|
|
395
|
-
return [];
|
|
396
|
-
return s.split(/\s+/);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
/** for ElementParser.segment() */
|
|
400
|
-
export var ElementType;
|
|
401
|
-
(function (ElementType) {
|
|
402
|
-
ElementType["codepoint"] = ".";
|
|
403
|
-
ElementType["escaped"] = "\\";
|
|
404
|
-
ElementType["uset"] = "[";
|
|
405
|
-
ElementType["string"] = "*";
|
|
406
|
-
})(ElementType || (ElementType = {}));
|
|
407
|
-
;
|
|
408
|
-
/** one portion of a segmented element string */
|
|
409
|
-
export class ElementSegment {
|
|
410
|
-
segment;
|
|
411
|
-
type;
|
|
412
|
-
/**
|
|
413
|
-
* @param segment the string in the segment
|
|
414
|
-
* @param type type of segment. Will be calculated if not provided.
|
|
415
|
-
*/
|
|
416
|
-
constructor(segment, type) {
|
|
417
|
-
this.segment = segment;
|
|
418
|
-
if (type) {
|
|
419
|
-
this.type = type;
|
|
420
|
-
}
|
|
421
|
-
else if (ElementParser.MATCH_USET.test(segment)) {
|
|
422
|
-
this.type = ElementType.uset;
|
|
423
|
-
}
|
|
424
|
-
else if (ElementParser.MATCH_ESCAPED.test(segment)) {
|
|
425
|
-
this.type = ElementType.escaped;
|
|
426
|
-
}
|
|
427
|
-
else {
|
|
428
|
-
this.type = ElementType.codepoint;
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
/** unescaped format */
|
|
432
|
-
get unescaped() {
|
|
433
|
-
if (this.type !== ElementType.escaped) {
|
|
434
|
-
return this.segment;
|
|
435
|
-
}
|
|
436
|
-
else {
|
|
437
|
-
if (MATCH_QUAD_ESCAPE.test(this.segment)) {
|
|
438
|
-
return unescapeOneQuadString(this.segment);
|
|
439
|
-
}
|
|
440
|
-
else {
|
|
441
|
-
return unescapeString(this.segment);
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
;
|
|
447
|
-
/** Class for helping with Element strings (i.e. reorder) */
|
|
448
|
-
export class ElementParser {
|
|
449
|
-
/**
|
|
450
|
-
* Matches any complex UnicodeSet that would otherwise be misinterpreted
|
|
451
|
-
* by `MATCH_ELEMENT_SEGMENTS` due to nested `[]`'s.
|
|
452
|
-
* For example, `[[a-z]-[aeiou]]` could be
|
|
453
|
-
* mis-segmented into `[[a-z]`, `-`, `[aeiou]`, `]` */
|
|
454
|
-
static MATCH_NESTED_SQUARE_BRACKETS = /\[[^\]]*\[/;
|
|
455
|
-
/** Match (segment) UnicodeSets OR hex escapes OR single Unicode codepoints */
|
|
456
|
-
static MATCH_ELEMENT_SEGMENTS = /(?:\[[^\]]*\]|\\u[0-9a-fA-F]{4}|\\u\{[0-9a-fA-F]{1,6}\}|\\u\{(?:[0-9a-fA-F]{1,6})(?: [0-9a-fA-F]{1,6}){1,}\}|.)/gu;
|
|
457
|
-
/** Does it start with a UnicodeSet? Used to test the segments. */
|
|
458
|
-
static MATCH_USET = /^\[/;
|
|
459
|
-
/** Does it start with an escaped char? Used to test the segments. */
|
|
460
|
-
static MATCH_ESCAPED = /^\\u/;
|
|
461
|
-
/** Split a string into ElementSegments */
|
|
462
|
-
static segment(str) {
|
|
463
|
-
if (ElementParser.MATCH_NESTED_SQUARE_BRACKETS.test(str)) {
|
|
464
|
-
throw Error(`Unsupported: nested square brackets in element segment: ${str}`);
|
|
465
|
-
}
|
|
466
|
-
const list = [];
|
|
467
|
-
for (let m of str.match(ElementParser.MATCH_ELEMENT_SEGMENTS)) {
|
|
468
|
-
const e = new ElementSegment(m);
|
|
469
|
-
if (e.type === ElementType.escaped) {
|
|
470
|
-
// unescape
|
|
471
|
-
const { unescaped } = e;
|
|
472
|
-
if (isOneChar(unescaped)) {
|
|
473
|
-
list.push(e);
|
|
474
|
-
}
|
|
475
|
-
else {
|
|
476
|
-
// need to split the escaped segment, \u{41 42} -> \u{41}, \u{42}
|
|
477
|
-
for (let s of unescaped) {
|
|
478
|
-
list.push(new ElementSegment(`\\u{${s.codePointAt(0).toString(16)}}`));
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
else {
|
|
483
|
-
// all others
|
|
484
|
-
list.push(e);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
return list;
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
;
|
|
5
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="f6308a5e-9fe4-5298-8b51-76d5a693f598")}catch(e){}}();
|
|
6
|
+
import { constants } from "@keymanapp/ldml-keyboard-constants";
|
|
7
|
+
import { MATCH_QUAD_ESCAPE, isOneChar, unescapeOneQuadString, unescapeString, hexQuad } from "../util/util.js";
|
|
8
|
+
/**
|
|
9
|
+
* Helper function for extracting matched items
|
|
10
|
+
* @param str input string
|
|
11
|
+
* @param match global RegEx to use
|
|
12
|
+
* @returns array of matched values
|
|
13
|
+
*/
|
|
14
|
+
function matchArray(str, match) {
|
|
15
|
+
const refs = (str || '').matchAll(match);
|
|
16
|
+
return Array.from(refs).map(r => r[1]);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Common regex for an ID
|
|
20
|
+
*/
|
|
21
|
+
const COMMON_ID = /^[0-9A-Za-z_]{1,32}$/;
|
|
22
|
+
/**
|
|
23
|
+
* Class for helping with markers
|
|
24
|
+
*/
|
|
25
|
+
export class MarkerParser {
|
|
26
|
+
/**
|
|
27
|
+
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
|
|
28
|
+
*/
|
|
29
|
+
static ID = COMMON_ID;
|
|
30
|
+
/**
|
|
31
|
+
* Special marker reference referring to any marker
|
|
32
|
+
*/
|
|
33
|
+
static ANY_MARKER = '\\m{.}';
|
|
34
|
+
/**
|
|
35
|
+
* id of the 'any' marker
|
|
36
|
+
*/
|
|
37
|
+
static ANY_MARKER_ID = '.';
|
|
38
|
+
/**
|
|
39
|
+
* Marker sentinel as a string - U+FFFF
|
|
40
|
+
*/
|
|
41
|
+
static SENTINEL = String.fromCodePoint(constants.uc_sentinel);
|
|
42
|
+
/** Marker sentinel as a regex match */
|
|
43
|
+
static SENTINEL_MATCH = '\\u' + hexQuad(constants.uc_sentinel);
|
|
44
|
+
/**
|
|
45
|
+
* Marker code as a string - U+0008
|
|
46
|
+
*/
|
|
47
|
+
static MARKER_CODE = String.fromCodePoint(constants.marker_code);
|
|
48
|
+
/** Marker code as a regex match */
|
|
49
|
+
static MARKER_CODE_MATCH = '\\u' + hexQuad(constants.marker_code);
|
|
50
|
+
/** Minimum ID (trailing code unit) */
|
|
51
|
+
static MIN_MARKER_INDEX = constants.marker_min_index;
|
|
52
|
+
/** Index meaning 'any marker' == `\m{.}` */
|
|
53
|
+
static ANY_MARKER_INDEX = constants.marker_any_index;
|
|
54
|
+
/** Maximum usable marker index */
|
|
55
|
+
static MAX_MARKER_INDEX = constants.marker_max_index;
|
|
56
|
+
/** Max count of markers */
|
|
57
|
+
static MAX_MARKER_COUNT = constants.marker_max_count;
|
|
58
|
+
static anyMarkerMatch() {
|
|
59
|
+
const start = hexQuad(MarkerParser.MIN_MARKER_INDEX);
|
|
60
|
+
const end = hexQuad(MarkerParser.MAX_MARKER_INDEX);
|
|
61
|
+
return `${MarkerParser.SENTINEL_MATCH}${MarkerParser.MARKER_CODE_MATCH}[\\u${start}-\\u${end}]`; // TODO-LDML: #9121 wrong escape format
|
|
62
|
+
}
|
|
63
|
+
/** Expression that matches any marker */
|
|
64
|
+
static ANY_MARKER_MATCH = MarkerParser.anyMarkerMatch();
|
|
65
|
+
/**
|
|
66
|
+
* Pattern for matching a marker reference, OR the special marker \m{.}
|
|
67
|
+
*/
|
|
68
|
+
static REFERENCE = /\\m{([0-9A-Za-z_]{1,32}|\.)}/g;
|
|
69
|
+
/**
|
|
70
|
+
* parse a string into marker references
|
|
71
|
+
* @param str input string such as "\m{a} … \m{.}"
|
|
72
|
+
* @returns `[]` or an array of all markers referenced
|
|
73
|
+
*/
|
|
74
|
+
static allReferences(str) {
|
|
75
|
+
if (!str) {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
return matchArray(str, MarkerParser.REFERENCE);
|
|
79
|
+
}
|
|
80
|
+
static markerCodeToString(n, forMatch) {
|
|
81
|
+
if (!forMatch) {
|
|
82
|
+
return String.fromCharCode(n);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
return `\\u${hexQuad(n)}`; // TODO-LDML: #9121 wrong escape format
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/** @returns string for marker #n */
|
|
89
|
+
static markerOutput(n, forMatch) {
|
|
90
|
+
if (n < MarkerParser.MIN_MARKER_INDEX || n > MarkerParser.ANY_MARKER_INDEX) {
|
|
91
|
+
throw RangeError(`Internal Error: marker index out of range ${n}`);
|
|
92
|
+
}
|
|
93
|
+
if (forMatch) {
|
|
94
|
+
return MarkerParser.SENTINEL_MATCH + MarkerParser.MARKER_CODE_MATCH + MarkerParser.markerCodeToString(n, forMatch);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
return MarkerParser.SENTINEL + MarkerParser.MARKER_CODE + MarkerParser.markerCodeToString(n, forMatch);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
/** @returns all marker strings as sentinel values */
|
|
101
|
+
static toSentinelString(s, markers, forMatch) {
|
|
102
|
+
if (!s)
|
|
103
|
+
return s;
|
|
104
|
+
return s.replaceAll(MarkerParser.REFERENCE, (sub, arg) => {
|
|
105
|
+
if (arg === MarkerParser.ANY_MARKER_ID) {
|
|
106
|
+
if (forMatch) {
|
|
107
|
+
return MarkerParser.ANY_MARKER_MATCH;
|
|
108
|
+
}
|
|
109
|
+
return MarkerParser.markerOutput(MarkerParser.ANY_MARKER_INDEX);
|
|
110
|
+
}
|
|
111
|
+
if (!markers) {
|
|
112
|
+
throw RangeError(`Internal Error: Could not find marker \\m{${arg}} (no markers defined)`);
|
|
113
|
+
}
|
|
114
|
+
const order = markers.getItemOrder(arg);
|
|
115
|
+
if (order === -1) {
|
|
116
|
+
throw RangeError(`Internal Error: Could not find marker \\m{${arg}}`);
|
|
117
|
+
}
|
|
118
|
+
else if (order > MarkerParser.MAX_MARKER_INDEX) {
|
|
119
|
+
throw RangeError(`Internal Error: marker \\m{${arg}} has out of range index ${order}`);
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
return MarkerParser.markerOutput(order + 1, forMatch);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* NFD a string, respecting markers.
|
|
128
|
+
* @param s input string
|
|
129
|
+
* @param forMatch true if regex, false if individual
|
|
130
|
+
* @returns the normalized string
|
|
131
|
+
*/
|
|
132
|
+
static nfd_markers(s, forMatch) {
|
|
133
|
+
const m = [];
|
|
134
|
+
return this.nfd_markers_segment(s, m, forMatch);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* NFD a safe subset of a string, respecting markers
|
|
138
|
+
* @param s input string
|
|
139
|
+
* @param map output array of marker chars
|
|
140
|
+
* @param forMatch true if used for regexes
|
|
141
|
+
* @returns the updated string
|
|
142
|
+
*/
|
|
143
|
+
static nfd_markers_segment(s, map, forMatch) {
|
|
144
|
+
// remove (and parse) the markers first
|
|
145
|
+
const str_unmarked = MarkerParser.remove_markers(s, map, forMatch);
|
|
146
|
+
// then, NFD the normalized string
|
|
147
|
+
const str_unmarked_nfd = str_unmarked.normalize("NFD");
|
|
148
|
+
if (map.length == 0) {
|
|
149
|
+
// no markers, so we can safely return the normalized unmarked string
|
|
150
|
+
return str_unmarked_nfd;
|
|
151
|
+
}
|
|
152
|
+
else if (str_unmarked_nfd === str_unmarked) {
|
|
153
|
+
// normalization didn't shuffle anything, so it's entirely a no-op.
|
|
154
|
+
return s;
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
// we had markers AND the normalization made a difference.
|
|
158
|
+
// add the markers back per the map, and return
|
|
159
|
+
return MarkerParser.add_back_markers(str_unmarked_nfd, map, forMatch);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
/** return the string s but with a marker sequence before it */
|
|
163
|
+
static prepend_marker(s, marker, forMatch) {
|
|
164
|
+
if (forMatch && marker === constants.marker_any_index) {
|
|
165
|
+
return MarkerParser.ANY_MARKER_MATCH + s;
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
return MarkerParser.markerOutput(marker, forMatch) + s;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Add back all markers in the map to the string
|
|
173
|
+
* @param s input string
|
|
174
|
+
* @param map output: the marker map
|
|
175
|
+
* @param forMatch if true, use regex format
|
|
176
|
+
*/
|
|
177
|
+
static add_back_markers(s, map, forMatch) {
|
|
178
|
+
// quick check: if no string, or no map: nothing to do
|
|
179
|
+
if (!s || !map?.length) {
|
|
180
|
+
return s;
|
|
181
|
+
}
|
|
182
|
+
/** output string */
|
|
183
|
+
let out = '';
|
|
184
|
+
/** for checking: the total number of markers expected, skipping end markers */
|
|
185
|
+
const max_markers = map.filter(({ end }) => !end).length;
|
|
186
|
+
/** for checking: the number of markers we've written */
|
|
187
|
+
let written_markers = 0;
|
|
188
|
+
/** we are going to mutate the map, so copy it */
|
|
189
|
+
const map2 = [...map]; // make a copy
|
|
190
|
+
// First, add back all 'MARKER_BEFORE_EOT' markers
|
|
191
|
+
while (map2.length && map2[map2.length - 1].ch === MARKER_BEFORE_EOT) {
|
|
192
|
+
// remove from list
|
|
193
|
+
const { marker, end } = map2.pop();
|
|
194
|
+
if (!end) {
|
|
195
|
+
out = MarkerParser.prepend_marker(out, marker, forMatch);
|
|
196
|
+
written_markers++;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// Then, take each codepoint (from back to front)
|
|
200
|
+
for (let p of [...s].reverse()) {
|
|
201
|
+
// reverse order code units, prepend to out
|
|
202
|
+
out = p + out;
|
|
203
|
+
for (let i = map2.length - 1; i >= 0; i--) {
|
|
204
|
+
const { ch, marker, processed, end } = map2[i];
|
|
205
|
+
if (ch === p && !processed) {
|
|
206
|
+
map2[i].processed = true; // mark as processed
|
|
207
|
+
if (end) {
|
|
208
|
+
break; // exit loop
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
out = MarkerParser.prepend_marker(out, marker, forMatch);
|
|
212
|
+
written_markers++;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
else if (map2[map2.length - 1]?.processed) {
|
|
216
|
+
// keep the list as short as possible
|
|
217
|
+
map2.pop();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// validate that we consumed all markers
|
|
222
|
+
if (written_markers !== max_markers) {
|
|
223
|
+
throw Error(`Internal Error: should have written ${max_markers} markers but only wrote ${written_markers}`);
|
|
224
|
+
}
|
|
225
|
+
return out;
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Remove (and parse) markers from a string
|
|
229
|
+
* @param s input string
|
|
230
|
+
* @param map output map containing marker locations
|
|
231
|
+
* @param forMatch true if regex
|
|
232
|
+
* @returns the original string, without any markers
|
|
233
|
+
*/
|
|
234
|
+
static remove_markers(s, map, forMatch) {
|
|
235
|
+
/** accumulated output */
|
|
236
|
+
let out = '';
|
|
237
|
+
/** array of marker ids in order waiting to be added */
|
|
238
|
+
let last_markers = [];
|
|
239
|
+
/** input string, split into codepoint runs */
|
|
240
|
+
let a = [...s];
|
|
241
|
+
/** were any markers found? */
|
|
242
|
+
let had_markers = false;
|
|
243
|
+
/**
|
|
244
|
+
* subfunc: add all markers in the pending (last_markers) queue
|
|
245
|
+
* @param l string the marker is 'glued' to, or '' for end
|
|
246
|
+
*/
|
|
247
|
+
function add_pending_markers(l) {
|
|
248
|
+
// first char, or, marker-before-eot
|
|
249
|
+
const glueChars = (l === '') ? [MARKER_BEFORE_EOT] : [...(l.normalize("NFD"))];
|
|
250
|
+
const glue = glueChars[0];
|
|
251
|
+
// push the 'end' value
|
|
252
|
+
map.push({ ch: glue, end: true });
|
|
253
|
+
while (last_markers.length) {
|
|
254
|
+
const marker = last_markers[0];
|
|
255
|
+
last_markers = last_markers.slice(1); // pop from front
|
|
256
|
+
map.push({ ch: glue, marker });
|
|
257
|
+
}
|
|
258
|
+
// now, push the rest of the glue chars as an NFD sequence.
|
|
259
|
+
// For example, `\m{m}\u0344` will create the following stream:
|
|
260
|
+
// { ch: 0308, end: true}
|
|
261
|
+
// { ch: 0308, marker: 1}
|
|
262
|
+
// { ch: 0301, end: true} // added because of decomp
|
|
263
|
+
for (const ch of glueChars.slice(1)) {
|
|
264
|
+
map.push({ ch, end: true });
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// iterate until the codepoint list is empty
|
|
268
|
+
while (a.length > 0) {
|
|
269
|
+
// does 'a' begin with a marker?
|
|
270
|
+
const p = MarkerParser.parse_next_marker(a.join(''), forMatch);
|
|
271
|
+
if (!p?.match) {
|
|
272
|
+
// no match
|
|
273
|
+
add_pending_markers(a[0]); // add any pending markers
|
|
274
|
+
out = out + a[0]; // add the non-marker text to the buffer
|
|
275
|
+
a = a.slice(1); // move forward 1 codepoint
|
|
276
|
+
}
|
|
277
|
+
else {
|
|
278
|
+
// found a marker
|
|
279
|
+
had_markers = true;
|
|
280
|
+
const { marker, match } = p;
|
|
281
|
+
if ((marker == constants.marker_any_index) ||
|
|
282
|
+
(marker >= constants.marker_min_index && marker <= constants.marker_max_index)) {
|
|
283
|
+
last_markers.push(marker);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
throw RangeError(`String contained out-of-range marker ${marker}: '${s}'`);
|
|
287
|
+
}
|
|
288
|
+
a = a.slice([...match].length); // move forward over matched marker
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// add any remaining markers at the end of the string
|
|
292
|
+
add_pending_markers('');
|
|
293
|
+
if (!had_markers) {
|
|
294
|
+
// no markers were found. clear out the map.
|
|
295
|
+
map = [];
|
|
296
|
+
}
|
|
297
|
+
return out;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* analyze the string to see if it begins with a marker
|
|
301
|
+
* @param s input string
|
|
302
|
+
* @param forMatch true if regex
|
|
303
|
+
* @returns parsed marker details
|
|
304
|
+
*/
|
|
305
|
+
static parse_next_marker(s, forMatch) {
|
|
306
|
+
if (!forMatch) {
|
|
307
|
+
// plain
|
|
308
|
+
const m = s.match(PARSE_SENTINEL_MARKER);
|
|
309
|
+
if (m) {
|
|
310
|
+
// full string matched
|
|
311
|
+
const match = m[0];
|
|
312
|
+
// extract the marker number
|
|
313
|
+
const marker = match.codePointAt(2);
|
|
314
|
+
return ({ match, marker });
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
else {
|
|
318
|
+
// regex
|
|
319
|
+
const m = s.match(PARSE_REGEX_MARKER);
|
|
320
|
+
if (m) {
|
|
321
|
+
// full string
|
|
322
|
+
const match = m[0];
|
|
323
|
+
// hex digit (if a single)
|
|
324
|
+
const single = m[1];
|
|
325
|
+
if (single) {
|
|
326
|
+
return ({ match, marker: Number.parseInt(single.substring(3), 16) });
|
|
327
|
+
}
|
|
328
|
+
else {
|
|
329
|
+
// it's a range, so it's an any match
|
|
330
|
+
return ({ match, marker: constants.marker_any_index });
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
return null;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
;
|
|
338
|
+
/** special noncharacter value denoting end of string */
|
|
339
|
+
export const MARKER_BEFORE_EOT = '\ufffe';
|
|
340
|
+
/** matcher for a sentinel */
|
|
341
|
+
const PARSE_SENTINEL_MARKER = new RegExp(`^${MarkerParser.ANY_MARKER_MATCH}`);
|
|
342
|
+
/** matcher for a regex marker, either single or any */
|
|
343
|
+
const PARSE_REGEX_MARKER = /^\\uffff\\u0008(?:(\\u[0-9a-fA-F]{4})|(\[\\u[0-9a-fA-F]{4}-\\u[0-9a-fA-F]{4}\]))/;
|
|
344
|
+
;
|
|
345
|
+
;
|
|
346
|
+
/**
|
|
347
|
+
* Class for helping with markers
|
|
348
|
+
*/
|
|
349
|
+
export class VariableParser {
|
|
350
|
+
/**
|
|
351
|
+
* A marker id has the same constraint as a key id. TODO-LDML: Needs to be reflected in the spec
|
|
352
|
+
*/
|
|
353
|
+
static ID = COMMON_ID;
|
|
354
|
+
/**
|
|
355
|
+
* Pattern for matching a string reference `$(str)`
|
|
356
|
+
*/
|
|
357
|
+
static STRING_REFERENCE = /\${([0-9A-Za-z_]{1,32})}/g;
|
|
358
|
+
/**
|
|
359
|
+
* Pattern for matching a set reference `$[set]`
|
|
360
|
+
*/
|
|
361
|
+
static SET_REFERENCE = /\$\[([0-9A-Za-z_]{1,32})\]/g;
|
|
362
|
+
/**
|
|
363
|
+
* Pattern for matching a capture set reference `($[set])`
|
|
364
|
+
*/
|
|
365
|
+
static CAPTURE_SET_REFERENCE = /\(\$\[([0-9A-Za-z_]{1,32})\]\)/;
|
|
366
|
+
/**
|
|
367
|
+
* `$[1:variable]`
|
|
368
|
+
* This regex matches the whole string.
|
|
369
|
+
*/
|
|
370
|
+
static MAPPED_SET_REFERENCE = /^\$\[1:([0-9A-Za-z_]{1,32})\]$/;
|
|
371
|
+
/**
|
|
372
|
+
* parse a string into references
|
|
373
|
+
* @param str input string
|
|
374
|
+
* @returns `[]` or an array of all string references referenced
|
|
375
|
+
*/
|
|
376
|
+
static allStringReferences(str) {
|
|
377
|
+
return matchArray(str, VariableParser.STRING_REFERENCE);
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* parse a string into references
|
|
381
|
+
* @param str input string
|
|
382
|
+
* @returns `[]` or an array of all string references referenced
|
|
383
|
+
*/
|
|
384
|
+
static allSetReferences(str) {
|
|
385
|
+
return matchArray(str, VariableParser.SET_REFERENCE);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Split an input string into a proper set
|
|
389
|
+
* @param str input string
|
|
390
|
+
* @returns
|
|
391
|
+
*/
|
|
392
|
+
static setSplitter(str) {
|
|
393
|
+
const s = str?.trim();
|
|
394
|
+
if (!s)
|
|
395
|
+
return [];
|
|
396
|
+
return s.split(/\s+/);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
/** for ElementParser.segment() */
|
|
400
|
+
export var ElementType;
|
|
401
|
+
(function (ElementType) {
|
|
402
|
+
ElementType["codepoint"] = ".";
|
|
403
|
+
ElementType["escaped"] = "\\";
|
|
404
|
+
ElementType["uset"] = "[";
|
|
405
|
+
ElementType["string"] = "*";
|
|
406
|
+
})(ElementType || (ElementType = {}));
|
|
407
|
+
;
|
|
408
|
+
/** one portion of a segmented element string */
|
|
409
|
+
export class ElementSegment {
|
|
410
|
+
segment;
|
|
411
|
+
type;
|
|
412
|
+
/**
|
|
413
|
+
* @param segment the string in the segment
|
|
414
|
+
* @param type type of segment. Will be calculated if not provided.
|
|
415
|
+
*/
|
|
416
|
+
constructor(segment, type) {
|
|
417
|
+
this.segment = segment;
|
|
418
|
+
if (type) {
|
|
419
|
+
this.type = type;
|
|
420
|
+
}
|
|
421
|
+
else if (ElementParser.MATCH_USET.test(segment)) {
|
|
422
|
+
this.type = ElementType.uset;
|
|
423
|
+
}
|
|
424
|
+
else if (ElementParser.MATCH_ESCAPED.test(segment)) {
|
|
425
|
+
this.type = ElementType.escaped;
|
|
426
|
+
}
|
|
427
|
+
else {
|
|
428
|
+
this.type = ElementType.codepoint;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
/** unescaped format */
|
|
432
|
+
get unescaped() {
|
|
433
|
+
if (this.type !== ElementType.escaped) {
|
|
434
|
+
return this.segment;
|
|
435
|
+
}
|
|
436
|
+
else {
|
|
437
|
+
if (MATCH_QUAD_ESCAPE.test(this.segment)) {
|
|
438
|
+
return unescapeOneQuadString(this.segment);
|
|
439
|
+
}
|
|
440
|
+
else {
|
|
441
|
+
return unescapeString(this.segment);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
;
|
|
447
|
+
/** Class for helping with Element strings (i.e. reorder) */
|
|
448
|
+
export class ElementParser {
|
|
449
|
+
/**
|
|
450
|
+
* Matches any complex UnicodeSet that would otherwise be misinterpreted
|
|
451
|
+
* by `MATCH_ELEMENT_SEGMENTS` due to nested `[]`'s.
|
|
452
|
+
* For example, `[[a-z]-[aeiou]]` could be
|
|
453
|
+
* mis-segmented into `[[a-z]`, `-`, `[aeiou]`, `]` */
|
|
454
|
+
static MATCH_NESTED_SQUARE_BRACKETS = /\[[^\]]*\[/;
|
|
455
|
+
/** Match (segment) UnicodeSets OR hex escapes OR single Unicode codepoints */
|
|
456
|
+
static MATCH_ELEMENT_SEGMENTS = /(?:\[[^\]]*\]|\\u[0-9a-fA-F]{4}|\\u\{[0-9a-fA-F]{1,6}\}|\\u\{(?:[0-9a-fA-F]{1,6})(?: [0-9a-fA-F]{1,6}){1,}\}|.)/gu;
|
|
457
|
+
/** Does it start with a UnicodeSet? Used to test the segments. */
|
|
458
|
+
static MATCH_USET = /^\[/;
|
|
459
|
+
/** Does it start with an escaped char? Used to test the segments. */
|
|
460
|
+
static MATCH_ESCAPED = /^\\u/;
|
|
461
|
+
/** Split a string into ElementSegments */
|
|
462
|
+
static segment(str) {
|
|
463
|
+
if (ElementParser.MATCH_NESTED_SQUARE_BRACKETS.test(str)) {
|
|
464
|
+
throw Error(`Unsupported: nested square brackets in element segment: ${str}`);
|
|
465
|
+
}
|
|
466
|
+
const list = [];
|
|
467
|
+
for (let m of str.match(ElementParser.MATCH_ELEMENT_SEGMENTS)) {
|
|
468
|
+
const e = new ElementSegment(m);
|
|
469
|
+
if (e.type === ElementType.escaped) {
|
|
470
|
+
// unescape
|
|
471
|
+
const { unescaped } = e;
|
|
472
|
+
if (isOneChar(unescaped)) {
|
|
473
|
+
list.push(e);
|
|
474
|
+
}
|
|
475
|
+
else {
|
|
476
|
+
// need to split the escaped segment, \u{41 42} -> \u{41}, \u{42}
|
|
477
|
+
for (let s of unescaped) {
|
|
478
|
+
list.push(new ElementSegment(`\\u{${s.codePointAt(0).toString(16)}}`));
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
// all others
|
|
484
|
+
list.push(e);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
return list;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
;
|
|
491
491
|
//# sourceMappingURL=pattern-parser.js.map
|
|
492
|
-
//# debugId=
|
|
492
|
+
//# debugId=f6308a5e-9fe4-5298-8b51-76d5a693f598
|