@unified-latex/unified-latex-util-catcode 1.8.2 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -1,295 +1,307 @@
1
1
  import { match } from "@unified-latex/unified-latex-util-match";
2
- import { visit, EXIT, SKIP } from "@unified-latex/unified-latex-util-visit";
2
+ import { EXIT, SKIP, visit } from "@unified-latex/unified-latex-util-visit";
3
+ //#region libs/find-region.ts
4
+ /**
5
+ * Find all contiguous segments in the array that are between start and end blocks.
6
+ * The `start` and `end` are functions that determine when a region starts and ends.
7
+ */
3
8
  function findRegionInArray(tree, start, end) {
4
- const ret = [];
5
- let currRegion = { start: void 0, end: tree.length };
6
- for (let i = 0; i < tree.length; i++) {
7
- const node = tree[i];
8
- if (start(node)) {
9
- currRegion.start = i;
10
- }
11
- if (end(node)) {
12
- currRegion.end = i + 1;
13
- ret.push(currRegion);
14
- currRegion = { start: void 0, end: tree.length };
15
- }
16
- }
17
- if (currRegion.start != null) {
18
- ret.push(currRegion);
19
- }
20
- return ret;
9
+ const ret = [];
10
+ let currRegion = {
11
+ start: void 0,
12
+ end: tree.length
13
+ };
14
+ for (let i = 0; i < tree.length; i++) {
15
+ const node = tree[i];
16
+ if (start(node)) currRegion.start = i;
17
+ if (end(node)) {
18
+ currRegion.end = i + 1;
19
+ ret.push(currRegion);
20
+ currRegion = {
21
+ start: void 0,
22
+ end: tree.length
23
+ };
24
+ }
25
+ }
26
+ if (currRegion.start != null) ret.push(currRegion);
27
+ return ret;
21
28
  }
29
+ //#endregion
30
+ //#region libs/regions.ts
31
+ /**
32
+ * Given `regions`, a list of `Region`s (not necessarily ordered, possibly overlapping), return a list of in-order,
33
+ * non-overlapping regions and a corresponding list containing a set of the original `Region`s that the new region
34
+ * is a subset of.
35
+ */
22
36
  function refineRegions(regions) {
23
- const _regions = [...regions];
24
- _regions.sort((a, b) => a.start - b.start);
25
- const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));
26
- const cutPoints = Array.from(cutPointsSet);
27
- cutPoints.sort((a, b) => a - b);
28
- const retRegions = [];
29
- const retRegionsContainedIn = [];
30
- let seekIndex = 0;
31
- for (let i = 0; i < cutPoints.length - 1; i++) {
32
- const start = cutPoints[i];
33
- const end = cutPoints[i + 1];
34
- const region = { start, end };
35
- const regionContainedIn = /* @__PURE__ */ new Set();
36
- let encounteredEndPastStart = false;
37
- for (let j = seekIndex; j < _regions.length; j++) {
38
- const superRegion = _regions[j];
39
- if (superRegion.end >= region.start) {
40
- encounteredEndPastStart = true;
41
- }
42
- if (!encounteredEndPastStart && superRegion.end < region.start) {
43
- seekIndex = j + 1;
44
- continue;
45
- }
46
- if (superRegion.start > end) {
47
- break;
48
- }
49
- if (superRegion.start <= region.start && superRegion.end >= region.end) {
50
- encounteredEndPastStart = true;
51
- regionContainedIn.add(superRegion);
52
- }
53
- }
54
- if (regionContainedIn.size > 0) {
55
- retRegions.push(region);
56
- retRegionsContainedIn.push(regionContainedIn);
57
- }
58
- }
59
- return { regions: retRegions, regionsContainedIn: retRegionsContainedIn };
37
+ const _regions = [...regions];
38
+ _regions.sort((a, b) => a.start - b.start);
39
+ const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));
40
+ const cutPoints = Array.from(cutPointsSet);
41
+ cutPoints.sort((a, b) => a - b);
42
+ const retRegions = [];
43
+ const retRegionsContainedIn = [];
44
+ let seekIndex = 0;
45
+ for (let i = 0; i < cutPoints.length - 1; i++) {
46
+ const start = cutPoints[i];
47
+ const end = cutPoints[i + 1];
48
+ const region = {
49
+ start,
50
+ end
51
+ };
52
+ const regionContainedIn = /* @__PURE__ */ new Set();
53
+ let encounteredEndPastStart = false;
54
+ for (let j = seekIndex; j < _regions.length; j++) {
55
+ const superRegion = _regions[j];
56
+ if (superRegion.end >= region.start) encounteredEndPastStart = true;
57
+ if (!encounteredEndPastStart && superRegion.end < region.start) {
58
+ seekIndex = j + 1;
59
+ continue;
60
+ }
61
+ if (superRegion.start > end) break;
62
+ if (superRegion.start <= region.start && superRegion.end >= region.end) {
63
+ encounteredEndPastStart = true;
64
+ regionContainedIn.add(superRegion);
65
+ }
66
+ }
67
+ if (regionContainedIn.size > 0) {
68
+ retRegions.push(region);
69
+ retRegionsContainedIn.push(regionContainedIn);
70
+ }
71
+ }
72
+ return {
73
+ regions: retRegions,
74
+ regionsContainedIn: retRegionsContainedIn
75
+ };
60
76
  }
77
+ /**
78
+ * Split an array up into the disjoint regions specified by `regionRecord`.
79
+ * Returned is a list of tuples, the first item being the key of `regionRecord` if there
80
+ * was a corresponding region, or `null` if there was no corresponding region.
81
+ *
82
+ * This function assumes that the regions in `regionRecord` are disjoint and fully contained
83
+ * within the bounds of `array`.
84
+ */
61
85
  function splitByRegions(array, regionsRecord) {
62
- const ret = [];
63
- const indices = [0, array.length];
64
- const reverseMap = {};
65
- for (const [key, records] of Object.entries(regionsRecord)) {
66
- indices.push(
67
- ...records.flatMap((r) => {
68
- reverseMap["" + [r.start, r.end]] = key;
69
- return [r.start, r.end];
70
- })
71
- );
72
- }
73
- indices.sort((a, b) => a - b);
74
- for (let i = 0; i < indices.length - 1; i++) {
75
- const start = indices[i];
76
- const end = indices[i + 1];
77
- if (start === end) {
78
- continue;
79
- }
80
- const regionKey = reverseMap["" + [start, end]];
81
- ret.push([regionKey || null, array.slice(start, end)]);
82
- }
83
- return ret;
86
+ const ret = [];
87
+ const indices = [0, array.length];
88
+ const reverseMap = {};
89
+ for (const [key, records] of Object.entries(regionsRecord)) indices.push(...records.flatMap((r) => {
90
+ reverseMap["" + [r.start, r.end]] = key;
91
+ return [r.start, r.end];
92
+ }));
93
+ indices.sort((a, b) => a - b);
94
+ for (let i = 0; i < indices.length - 1; i++) {
95
+ const start = indices[i];
96
+ const end = indices[i + 1];
97
+ if (start === end) continue;
98
+ const regionKey = reverseMap["" + [start, end]];
99
+ ret.push([regionKey || null, array.slice(start, end)]);
100
+ }
101
+ return ret;
84
102
  }
103
+ //#endregion
104
+ //#region libs/reparse-macro-names.ts
105
+ /**
106
+ * Escape a string so that it can be used to build a regular expression.
107
+ *
108
+ * From: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
109
+ */
85
110
  function escapeRegExp(str) {
86
- return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
111
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
87
112
  }
113
+ /**
114
+ * Build a regular expression that matches everything up to the first non-allowed symbol.
115
+ */
88
116
  function buildWordRegex(allowedSet) {
89
- const regexpStr = `^(${["\\p{L}"].concat(Array.from(allowedSet).map(escapeRegExp)).join("|")})*`;
90
- return new RegExp(regexpStr, "u");
117
+ const regexpStr = `^(${["\\p{L}"].concat(Array.from(allowedSet).map(escapeRegExp)).join("|")})*`;
118
+ return new RegExp(regexpStr, "u");
91
119
  }
120
+ /**
121
+ * Checks whether the array has a macro that could be reparsed given the `allowedTokens` but
122
+ * do not do any reparsing. This function can be used in auto-detection schemes to determine if
123
+ * macro names should actually be reparsed.
124
+ */
92
125
  function hasReparsableMacroNamesInArray(tree, allowedTokens) {
93
- for (let i = 0; i < tree.length; i++) {
94
- const macro = tree[i];
95
- const string = tree[i + 1];
96
- if (match.anyMacro(macro) && match.anyString(string)) {
97
- if (allowedTokens.has(
98
- macro.content.charAt(macro.content.length - 1)
99
- ) || allowedTokens.has(string.content.charAt(0))) {
100
- return true;
101
- }
102
- }
103
- }
104
- return false;
126
+ for (let i = 0; i < tree.length; i++) {
127
+ const macro = tree[i];
128
+ const string = tree[i + 1];
129
+ if (match.anyMacro(macro) && match.anyString(string)) {
130
+ if (allowedTokens.has(macro.content.charAt(macro.content.length - 1)) || allowedTokens.has(string.content.charAt(0))) return true;
131
+ }
132
+ }
133
+ return false;
105
134
  }
135
+ /**
136
+ * Checks whether `tree` has a macro that could be reparsed given the `allowedTokens` but
137
+ * do not do any reparsing. This function can be used in auto-detection schemes to determine if
138
+ * macro names should actually be reparsed.
139
+ */
106
140
  function hasReparsableMacroNames(tree, allowedTokens) {
107
- if (typeof allowedTokens === "string") {
108
- allowedTokens = new Set(allowedTokens.split(""));
109
- }
110
- const _allowedTokens = allowedTokens;
111
- for (const v of _allowedTokens) {
112
- if (v.length > 1) {
113
- throw new Error(
114
- `Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
115
- );
116
- }
117
- }
118
- let ret = false;
119
- visit(
120
- tree,
121
- (nodes) => {
122
- if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {
123
- ret = true;
124
- return EXIT;
125
- }
126
- },
127
- { includeArrays: true, test: Array.isArray }
128
- );
129
- return ret;
141
+ if (typeof allowedTokens === "string") allowedTokens = new Set(allowedTokens.split(""));
142
+ const _allowedTokens = allowedTokens;
143
+ for (const v of _allowedTokens) if (v.length > 1) throw new Error(`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`);
144
+ let ret = false;
145
+ visit(tree, (nodes) => {
146
+ if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {
147
+ ret = true;
148
+ return EXIT;
149
+ }
150
+ }, {
151
+ includeArrays: true,
152
+ test: Array.isArray
153
+ });
154
+ return ret;
130
155
  }
156
+ /**
157
+ * Reparses all macro names in the array so that they may optionally include characters listed in `allowedTokens`.
158
+ * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
159
+ * `_` is normally stops the parsing for a macro name).
160
+ */
131
161
  function reparseMacroNamesInArray(tree, allowedTokens) {
132
- var _a, _b, _c;
133
- const regex = buildWordRegex(allowedTokens);
134
- let i = 0;
135
- while (i < tree.length) {
136
- const macro = tree[i];
137
- const string = tree[i + 1];
138
- if (match.anyMacro(macro) && // The _^ macros in math mode should not be extended no-matter what;
139
- // So we check to make sure that the macro we're dealing with has the default escape token.
140
- (macro.escapeToken == null || macro.escapeToken === "\\") && match.anyString(string) && // There are two options. Either the macro ends with the special character,
141
- // e.g. `\@foo` or the special character starts the next string, e.g. `\foo@`.
142
- (allowedTokens.has(
143
- macro.content.charAt(macro.content.length - 1)
144
- ) || allowedTokens.has(string.content.charAt(0)))) {
145
- const match2 = string.content.match(regex);
146
- const takeable = match2 ? match2[0] : "";
147
- if (takeable.length > 0) {
148
- if (takeable.length === string.content.length) {
149
- macro.content += string.content;
150
- tree.splice(i + 1, 1);
151
- if (macro.position && ((_a = string.position) == null ? void 0 : _a.end)) {
152
- macro.position.end = string.position.end;
153
- }
154
- } else {
155
- macro.content += takeable;
156
- string.content = string.content.slice(takeable.length);
157
- if ((_b = macro.position) == null ? void 0 : _b.end) {
158
- macro.position.end.offset += takeable.length;
159
- macro.position.end.column += takeable.length;
160
- }
161
- if ((_c = string.position) == null ? void 0 : _c.start) {
162
- string.position.start.offset += takeable.length;
163
- string.position.start.column += takeable.length;
164
- }
165
- }
166
- } else {
167
- i++;
168
- }
169
- } else {
170
- ++i;
171
- }
172
- }
162
+ const regex = buildWordRegex(allowedTokens);
163
+ let i = 0;
164
+ while (i < tree.length) {
165
+ const macro = tree[i];
166
+ const string = tree[i + 1];
167
+ if (match.anyMacro(macro) && (macro.escapeToken == null || macro.escapeToken === "\\") && match.anyString(string) && (allowedTokens.has(macro.content.charAt(macro.content.length - 1)) || allowedTokens.has(string.content.charAt(0)))) {
168
+ const match = string.content.match(regex);
169
+ const takeable = match ? match[0] : "";
170
+ if (takeable.length > 0) if (takeable.length === string.content.length) {
171
+ macro.content += string.content;
172
+ tree.splice(i + 1, 1);
173
+ if (macro.position && string.position?.end) macro.position.end = string.position.end;
174
+ } else {
175
+ macro.content += takeable;
176
+ string.content = string.content.slice(takeable.length);
177
+ if (macro.position?.end) {
178
+ macro.position.end.offset += takeable.length;
179
+ macro.position.end.column += takeable.length;
180
+ }
181
+ if (string.position?.start) {
182
+ string.position.start.offset += takeable.length;
183
+ string.position.start.column += takeable.length;
184
+ }
185
+ }
186
+ else i++;
187
+ } else ++i;
188
+ }
173
189
  }
190
+ /**
191
+ * Reparses all macro names so that they may optionally include characters listed in `allowedTokens`.
192
+ * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
193
+ * `_` is normally stops the parsing for a macro name). Thus, a macro `\foo_bar:Nn` would be parsed as having
194
+ * the name `foo_bar:Nn` rather than as `foo` followed by the strings `_`, `bar`, `:`, `Nn`.
195
+ */
174
196
  function reparseMacroNames(tree, allowedTokens) {
175
- if (typeof allowedTokens === "string") {
176
- allowedTokens = new Set(allowedTokens.split(""));
177
- }
178
- const _allowedTokens = allowedTokens;
179
- for (const v of _allowedTokens) {
180
- if (v.length > 1) {
181
- throw new Error(
182
- `Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
183
- );
184
- }
185
- }
186
- visit(
187
- tree,
188
- (nodes) => {
189
- reparseMacroNamesInArray(nodes, _allowedTokens);
190
- },
191
- { includeArrays: true, test: Array.isArray }
192
- );
197
+ if (typeof allowedTokens === "string") allowedTokens = new Set(allowedTokens.split(""));
198
+ const _allowedTokens = allowedTokens;
199
+ for (const v of _allowedTokens) if (v.length > 1) throw new Error(`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`);
200
+ visit(tree, (nodes) => {
201
+ reparseMacroNamesInArray(nodes, _allowedTokens);
202
+ }, {
203
+ includeArrays: true,
204
+ test: Array.isArray
205
+ });
193
206
  }
194
- const expl3Find = {
195
- start: match.createMacroMatcher(["ExplSyntaxOn"]),
196
- end: match.createMacroMatcher(["ExplSyntaxOff"])
207
+ //#endregion
208
+ //#region libs/special-regions.ts
209
+ var expl3Find = {
210
+ start: match.createMacroMatcher(["ExplSyntaxOn"]),
211
+ end: match.createMacroMatcher(["ExplSyntaxOff"])
197
212
  };
198
- const atLetterFind = {
199
- start: match.createMacroMatcher(["makeatletter"]),
200
- end: match.createMacroMatcher(["makeatother"])
213
+ var atLetterFind = {
214
+ start: match.createMacroMatcher(["makeatletter"]),
215
+ end: match.createMacroMatcher(["makeatother"])
201
216
  };
217
+ /**
218
+ * Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`.
219
+ * Returns an object containing regions where one or both syntax's apply.
220
+ */
202
221
  function findExpl3AndAtLetterRegionsInArray(tree) {
203
- const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);
204
- const atLetter = findRegionInArray(
205
- tree,
206
- atLetterFind.start,
207
- atLetterFind.end
208
- );
209
- const regionMap = new Map([
210
- ...expl3.map((x) => [x, "expl"]),
211
- ...atLetter.map((x) => [x, "atLetter"])
212
- ]);
213
- const all = refineRegions([...expl3, ...atLetter]);
214
- const ret = {
215
- explOnly: [],
216
- atLetterOnly: [],
217
- both: []
218
- };
219
- for (let i = 0; i < all.regions.length; i++) {
220
- const region = all.regions[i];
221
- const containedIn = all.regionsContainedIn[i];
222
- if (containedIn.size === 2) {
223
- ret.both.push(region);
224
- continue;
225
- }
226
- for (const v of containedIn.values()) {
227
- if (regionMap.get(v) === "expl") {
228
- ret.explOnly.push(region);
229
- }
230
- if (regionMap.get(v) === "atLetter") {
231
- ret.atLetterOnly.push(region);
232
- }
233
- }
234
- }
235
- ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);
236
- ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);
237
- ret.both = ret.both.filter((r) => r.end - r.start > 1);
238
- return ret;
222
+ const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);
223
+ const atLetter = findRegionInArray(tree, atLetterFind.start, atLetterFind.end);
224
+ const regionMap = new Map([...expl3.map((x) => [x, "expl"]), ...atLetter.map((x) => [x, "atLetter"])]);
225
+ const all = refineRegions([...expl3, ...atLetter]);
226
+ const ret = {
227
+ explOnly: [],
228
+ atLetterOnly: [],
229
+ both: []
230
+ };
231
+ for (let i = 0; i < all.regions.length; i++) {
232
+ const region = all.regions[i];
233
+ const containedIn = all.regionsContainedIn[i];
234
+ if (containedIn.size === 2) {
235
+ ret.both.push(region);
236
+ continue;
237
+ }
238
+ for (const v of containedIn.values()) {
239
+ if (regionMap.get(v) === "expl") ret.explOnly.push(region);
240
+ if (regionMap.get(v) === "atLetter") ret.atLetterOnly.push(region);
241
+ }
242
+ }
243
+ ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);
244
+ ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);
245
+ ret.both = ret.both.filter((r) => r.end - r.start > 1);
246
+ return ret;
239
247
  }
240
- const atLetterSet = /* @__PURE__ */ new Set(["@"]);
241
- const explSet = /* @__PURE__ */ new Set(["_", ":"]);
242
- const bothSet = /* @__PURE__ */ new Set(["_", ":", "@"]);
248
+ var atLetterSet = new Set(["@"]);
249
+ var explSet = new Set(["_", ":"]);
250
+ var bothSet = new Set([
251
+ "_",
252
+ ":",
253
+ "@"
254
+ ]);
255
+ /**
256
+ * Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`
257
+ * and reparse their contents so that the relevant characters (e.g., `@`, `_`, and `:`) become
258
+ * part of the macro names.
259
+ */
243
260
  function reparseExpl3AndAtLetterRegions(tree) {
244
- visit(
245
- tree,
246
- {
247
- leave: (nodes) => {
248
- const regions = findExpl3AndAtLetterRegionsInArray(nodes);
249
- const totalNumRegions = regions.both.length + regions.atLetterOnly.length + regions.explOnly.length;
250
- if (totalNumRegions === 0) {
251
- return;
252
- }
253
- const splits = splitByRegions(nodes, regions);
254
- const processed = [];
255
- for (const [key, slice] of splits) {
256
- switch (key) {
257
- case null:
258
- processed.push(...slice);
259
- continue;
260
- case "atLetterOnly":
261
- reparseMacroNames(slice, atLetterSet);
262
- processed.push(...slice);
263
- continue;
264
- case "explOnly":
265
- reparseMacroNames(slice, explSet);
266
- processed.push(...slice);
267
- continue;
268
- case "both":
269
- reparseMacroNames(slice, bothSet);
270
- processed.push(...slice);
271
- continue;
272
- default:
273
- throw new Error(
274
- `Unexpected case when splitting ${key}`
275
- );
276
- }
277
- }
278
- nodes.length = 0;
279
- nodes.push(...processed);
280
- return SKIP;
281
- }
282
- },
283
- { includeArrays: true, test: Array.isArray }
284
- );
261
+ visit(tree, { leave: (nodes) => {
262
+ const regions = findExpl3AndAtLetterRegionsInArray(nodes);
263
+ if (regions.both.length + regions.atLetterOnly.length + regions.explOnly.length === 0) return;
264
+ const splits = splitByRegions(nodes, regions);
265
+ const processed = [];
266
+ for (const [key, slice] of splits) switch (key) {
267
+ case null:
268
+ processed.push(...slice);
269
+ continue;
270
+ case "atLetterOnly":
271
+ reparseMacroNames(slice, atLetterSet);
272
+ processed.push(...slice);
273
+ continue;
274
+ case "explOnly":
275
+ reparseMacroNames(slice, explSet);
276
+ processed.push(...slice);
277
+ continue;
278
+ case "both":
279
+ reparseMacroNames(slice, bothSet);
280
+ processed.push(...slice);
281
+ continue;
282
+ default: throw new Error(`Unexpected case when splitting ${key}`);
283
+ }
284
+ nodes.length = 0;
285
+ nodes.push(...processed);
286
+ return SKIP;
287
+ } }, {
288
+ includeArrays: true,
289
+ test: Array.isArray
290
+ });
285
291
  }
286
- export {
287
- findExpl3AndAtLetterRegionsInArray,
288
- findRegionInArray,
289
- hasReparsableMacroNames,
290
- hasReparsableMacroNamesInArray,
291
- reparseExpl3AndAtLetterRegions,
292
- reparseMacroNames,
293
- reparseMacroNamesInArray
294
- };
295
- //# sourceMappingURL=index.js.map
292
+ //#endregion
293
+ //#region index.ts
294
+ /**
295
+ * ## What is this?
296
+ *
297
+ * Functions to identify regions of a `unified-latex` Abstract Syntax Tree (AST) that need to be reparsed because of different
298
+ * category codes. For example, regions between `\makeatletter` and `\makeatother`.
299
+ *
300
+ * ## When should I use this?
301
+ *
302
+ * If you need to identify regions of the AST that need to be reparsed.
303
+ */
304
+ //#endregion
305
+ export { findExpl3AndAtLetterRegionsInArray, findRegionInArray, hasReparsableMacroNames, hasReparsableMacroNamesInArray, reparseExpl3AndAtLetterRegions, reparseMacroNames, reparseMacroNamesInArray };
306
+
307
+ //# sourceMappingURL=index.js.map