@unified-latex/unified-latex-util-catcode 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -0
- package/index.cjs +327 -0
- package/index.cjs.map +7 -0
- package/index.d.ts +14 -0
- package/index.d.ts.map +1 -0
- package/index.js +304 -0
- package/index.js.map +7 -0
- package/libs/find-region.d.ts +8 -0
- package/libs/find-region.d.ts.map +1 -0
- package/libs/regions.d.ts +23 -0
- package/libs/regions.d.ts.map +1 -0
- package/libs/reparse-macro-names.d.ts +27 -0
- package/libs/reparse-macro-names.d.ts.map +1 -0
- package/libs/special-regions.d.ts +18 -0
- package/libs/special-regions.d.ts.map +1 -0
- package/package.json +52 -0
package/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
<!-- DO NOT MODIFY -->
|
|
2
|
+
<!-- This file was autogenerated by build-docs.ts -->
|
|
3
|
+
<!-- Edit the docstring in index.ts and regenerate -->
|
|
4
|
+
<!-- rather than editing this file directly. -->
|
|
5
|
+
# unified-latex-util-catcode
|
|
6
|
+
|
|
7
|
+
## What is this?
|
|
8
|
+
|
|
9
|
+
Functions to identify regions of a `unified-latex` Abstract Syntax Tree (AST) that need to be reparsed because of different
|
|
10
|
+
category codes. For example, regions between `\makeatletter` and `\makeatother`.
|
|
11
|
+
|
|
12
|
+
## When should I use this?
|
|
13
|
+
|
|
14
|
+
If you need to identify regions of the AST that need to be reparsed.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @unified-latex/unified-latex-util-catcode
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
This package contains both esm and commonjs exports. To explicitly access the esm export,
|
|
23
|
+
import the `.js` file. To explicitly access the commonjs export, import the `.cjs` file.
|
|
24
|
+
|
|
25
|
+
# Functions
|
|
26
|
+
|
|
27
|
+
## `findExpl3AndAtLetterRegionsInArray(tree)`
|
|
28
|
+
|
|
29
|
+
Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`.
|
|
30
|
+
Returns an object containing regions where one or both syntax's apply.
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
function findExpl3AndAtLetterRegionsInArray(tree: Ast.Node[]): {
|
|
34
|
+
explOnly: Region[];
|
|
35
|
+
atLetterOnly: Region[];
|
|
36
|
+
both: Region[];
|
|
37
|
+
};
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**Parameters**
|
|
41
|
+
|
|
42
|
+
| Param | Type |
|
|
43
|
+
| :---- | :----------- |
|
|
44
|
+
| tree | `Ast.Node[]` |
|
|
45
|
+
|
|
46
|
+
## `findRegionInArray(tree, start, end)`
|
|
47
|
+
|
|
48
|
+
Find all contiguous segments in the array that are between start and end blocks.
|
|
49
|
+
The `start` and `end` are functions that determine when a region starts and ends.
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
function findRegionInArray(
|
|
53
|
+
tree: Ast.Node[],
|
|
54
|
+
start: (node: Ast.Node) => boolean,
|
|
55
|
+
end: (node: Ast.Node) => boolean
|
|
56
|
+
): Region[];
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**Parameters**
|
|
60
|
+
|
|
61
|
+
| Param | Type |
|
|
62
|
+
| :---- | :---------------------------- |
|
|
63
|
+
| tree | `Ast.Node[]` |
|
|
64
|
+
| start | `(node: Ast.Node) => boolean` |
|
|
65
|
+
| end | `(node: Ast.Node) => boolean` |
|
|
66
|
+
|
|
67
|
+
## `hasReparsableMacroNames(tree, allowedTokens)`
|
|
68
|
+
|
|
69
|
+
Checks whether `tree` has a macro that could be reparsed given the `allowedTokens` but
|
|
70
|
+
do not do any reparsing. This function can be used in auto-detection schemes to determine if
|
|
71
|
+
macro names should actually be reparsed.
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
function hasReparsableMacroNames(
|
|
75
|
+
tree: Ast.Ast,
|
|
76
|
+
allowedTokens: string | Set<string>
|
|
77
|
+
): boolean;
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Parameters**
|
|
81
|
+
|
|
82
|
+
| Param | Type |
|
|
83
|
+
| :------------ | :---------------------- |
|
|
84
|
+
| tree | `Ast.Ast` |
|
|
85
|
+
| allowedTokens | `string \| Set<string>` |
|
|
86
|
+
|
|
87
|
+
## `hasReparsableMacroNamesInArray(tree, allowedTokens)`
|
|
88
|
+
|
|
89
|
+
Checks whether the array has a macro that could be reparsed given the `allowedTokens` but
|
|
90
|
+
do not do any reparsing. This function can be used in auto-detection schemes to determine if
|
|
91
|
+
macro names should actually be reparsed.
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
function hasReparsableMacroNamesInArray(
|
|
95
|
+
tree: Ast.Node[],
|
|
96
|
+
allowedTokens: Set<string>
|
|
97
|
+
): boolean;
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Parameters**
|
|
101
|
+
|
|
102
|
+
| Param | Type |
|
|
103
|
+
| :------------ | :------------ |
|
|
104
|
+
| tree | `Ast.Node[]` |
|
|
105
|
+
| allowedTokens | `Set<string>` |
|
|
106
|
+
|
|
107
|
+
## `reparseExpl3AndAtLetterRegions(tree)`
|
|
108
|
+
|
|
109
|
+
Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`
|
|
110
|
+
and reparse their contents so that the relevant characters (e.g., `@`, `_`, and `:`) become
|
|
111
|
+
part of the macro names.
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
function reparseExpl3AndAtLetterRegions(tree: Ast.Ast): void;
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
**Parameters**
|
|
118
|
+
|
|
119
|
+
| Param | Type |
|
|
120
|
+
| :---- | :-------- |
|
|
121
|
+
| tree | `Ast.Ast` |
|
|
122
|
+
|
|
123
|
+
## `reparseMacroNames(tree, allowedTokens)`
|
|
124
|
+
|
|
125
|
+
Reparses all macro names so that they may optionally include characters listed in `allowedTokens`.
|
|
126
|
+
This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
|
|
127
|
+
`_` is normally stops the parsing for a macro name). Thus, a macro `\foo_bar:Nn` would be parsed as having
|
|
128
|
+
the name `foo_bar:Nn` rather than as `foo` followed by the strings `_`, `bar`, `:`, `Nn`.
|
|
129
|
+
|
|
130
|
+
```typescript
|
|
131
|
+
function reparseMacroNames(
|
|
132
|
+
tree: Ast.Ast,
|
|
133
|
+
allowedTokens: string | Set<string>
|
|
134
|
+
): void;
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Parameters**
|
|
138
|
+
|
|
139
|
+
| Param | Type |
|
|
140
|
+
| :------------ | :---------------------- |
|
|
141
|
+
| tree | `Ast.Ast` |
|
|
142
|
+
| allowedTokens | `string \| Set<string>` |
|
|
143
|
+
|
|
144
|
+
## `reparseMacroNamesInArray(tree, allowedTokens)`
|
|
145
|
+
|
|
146
|
+
Reparses all macro names in the array so that they may optionally include characters listed in `allowedTokens`.
|
|
147
|
+
This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
|
|
148
|
+
`_` is normally stops the parsing for a macro name).
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
function reparseMacroNamesInArray(
|
|
152
|
+
tree: Ast.Node[],
|
|
153
|
+
allowedTokens: Set<string>
|
|
154
|
+
): void;
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Parameters**
|
|
158
|
+
|
|
159
|
+
| Param | Type |
|
|
160
|
+
| :------------ | :------------ |
|
|
161
|
+
| tree | `Ast.Node[]` |
|
|
162
|
+
| allowedTokens | `Set<string>` |
|
package/index.cjs
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// index.ts
|
|
21
|
+
var unified_latex_util_catcode_exports = {};
|
|
22
|
+
__export(unified_latex_util_catcode_exports, {
|
|
23
|
+
findExpl3AndAtLetterRegionsInArray: () => findExpl3AndAtLetterRegionsInArray,
|
|
24
|
+
findRegionInArray: () => findRegionInArray,
|
|
25
|
+
hasReparsableMacroNames: () => hasReparsableMacroNames,
|
|
26
|
+
hasReparsableMacroNamesInArray: () => hasReparsableMacroNamesInArray,
|
|
27
|
+
reparseExpl3AndAtLetterRegions: () => reparseExpl3AndAtLetterRegions,
|
|
28
|
+
reparseMacroNames: () => reparseMacroNames,
|
|
29
|
+
reparseMacroNamesInArray: () => reparseMacroNamesInArray
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(unified_latex_util_catcode_exports);
|
|
32
|
+
|
|
33
|
+
// libs/find-region.ts
|
|
34
|
+
function findRegionInArray(tree, start, end) {
|
|
35
|
+
const ret = [];
|
|
36
|
+
let currRegion = { start: void 0, end: tree.length };
|
|
37
|
+
for (let i = 0; i < tree.length; i++) {
|
|
38
|
+
const node = tree[i];
|
|
39
|
+
if (start(node)) {
|
|
40
|
+
currRegion.start = i;
|
|
41
|
+
}
|
|
42
|
+
if (end(node)) {
|
|
43
|
+
currRegion.end = i + 1;
|
|
44
|
+
ret.push(currRegion);
|
|
45
|
+
currRegion = { start: void 0, end: tree.length };
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (currRegion.start != null) {
|
|
49
|
+
ret.push(currRegion);
|
|
50
|
+
}
|
|
51
|
+
return ret;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// libs/special-regions.ts
|
|
55
|
+
var import_unified_latex_util_match2 = require("@unified-latex/unified-latex-util-match");
|
|
56
|
+
|
|
57
|
+
// libs/regions.ts
|
|
58
|
+
function refineRegions(regions) {
|
|
59
|
+
const _regions = [...regions];
|
|
60
|
+
_regions.sort((a, b) => a.start - b.start);
|
|
61
|
+
const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));
|
|
62
|
+
const cutPoints = Array.from(cutPointsSet);
|
|
63
|
+
cutPoints.sort((a, b) => a - b);
|
|
64
|
+
const retRegions = [];
|
|
65
|
+
const retRegionsContainedIn = [];
|
|
66
|
+
let seekIndex = 0;
|
|
67
|
+
for (let i = 0; i < cutPoints.length - 1; i++) {
|
|
68
|
+
const start = cutPoints[i];
|
|
69
|
+
const end = cutPoints[i + 1];
|
|
70
|
+
const region = { start, end };
|
|
71
|
+
const regionContainedIn = /* @__PURE__ */ new Set();
|
|
72
|
+
let encounteredEndPastStart = false;
|
|
73
|
+
for (let j = seekIndex; j < _regions.length; j++) {
|
|
74
|
+
const superRegion = _regions[j];
|
|
75
|
+
if (superRegion.end >= region.start) {
|
|
76
|
+
encounteredEndPastStart = true;
|
|
77
|
+
}
|
|
78
|
+
if (!encounteredEndPastStart && superRegion.end < region.start) {
|
|
79
|
+
seekIndex = j + 1;
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
if (superRegion.start > end) {
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
if (superRegion.start <= region.start && superRegion.end >= region.end) {
|
|
86
|
+
encounteredEndPastStart = true;
|
|
87
|
+
regionContainedIn.add(superRegion);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
if (regionContainedIn.size > 0) {
|
|
91
|
+
retRegions.push(region);
|
|
92
|
+
retRegionsContainedIn.push(regionContainedIn);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { regions: retRegions, regionsContainedIn: retRegionsContainedIn };
|
|
96
|
+
}
|
|
97
|
+
function splitByRegions(array, regionsRecord) {
|
|
98
|
+
const ret = [];
|
|
99
|
+
const indices = [0, array.length];
|
|
100
|
+
const reverseMap = {};
|
|
101
|
+
for (const [key, records] of Object.entries(regionsRecord)) {
|
|
102
|
+
indices.push(
|
|
103
|
+
...records.flatMap((r) => {
|
|
104
|
+
reverseMap["" + [r.start, r.end]] = key;
|
|
105
|
+
return [r.start, r.end];
|
|
106
|
+
})
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
indices.sort((a, b) => a - b);
|
|
110
|
+
for (let i = 0; i < indices.length - 1; i++) {
|
|
111
|
+
const start = indices[i];
|
|
112
|
+
const end = indices[i + 1];
|
|
113
|
+
if (start === end) {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
const regionKey = reverseMap["" + [start, end]];
|
|
117
|
+
ret.push([regionKey || null, array.slice(start, end)]);
|
|
118
|
+
}
|
|
119
|
+
return ret;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// libs/special-regions.ts
|
|
123
|
+
var import_unified_latex_util_visit2 = require("@unified-latex/unified-latex-util-visit");
|
|
124
|
+
|
|
125
|
+
// libs/reparse-macro-names.ts
|
|
126
|
+
var import_unified_latex_util_match = require("@unified-latex/unified-latex-util-match");
|
|
127
|
+
var import_unified_latex_util_visit = require("@unified-latex/unified-latex-util-visit");
|
|
128
|
+
function escapeRegExp(str) {
|
|
129
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
130
|
+
}
|
|
131
|
+
function buildWordRegex(allowedSet) {
|
|
132
|
+
const regexpStr = `^(${["\\p{L}"].concat(Array.from(allowedSet).map(escapeRegExp)).join("|")})*`;
|
|
133
|
+
return new RegExp(regexpStr, "u");
|
|
134
|
+
}
|
|
135
|
+
function hasReparsableMacroNamesInArray(tree, allowedTokens) {
|
|
136
|
+
for (let i = 0; i < tree.length; i++) {
|
|
137
|
+
const macro = tree[i];
|
|
138
|
+
const string = tree[i + 1];
|
|
139
|
+
if (import_unified_latex_util_match.match.anyMacro(macro) && import_unified_latex_util_match.match.anyString(string)) {
|
|
140
|
+
if (allowedTokens.has(
|
|
141
|
+
macro.content.charAt(macro.content.length - 1)
|
|
142
|
+
) || allowedTokens.has(string.content.charAt(0))) {
|
|
143
|
+
return true;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
function hasReparsableMacroNames(tree, allowedTokens) {
|
|
150
|
+
if (typeof allowedTokens === "string") {
|
|
151
|
+
allowedTokens = new Set(allowedTokens.split(""));
|
|
152
|
+
}
|
|
153
|
+
const _allowedTokens = allowedTokens;
|
|
154
|
+
for (const v of _allowedTokens) {
|
|
155
|
+
if (v.length > 1) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
let ret = false;
|
|
162
|
+
(0, import_unified_latex_util_visit.visit)(
|
|
163
|
+
tree,
|
|
164
|
+
(nodes) => {
|
|
165
|
+
if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {
|
|
166
|
+
ret = true;
|
|
167
|
+
return import_unified_latex_util_visit.EXIT;
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
{ includeArrays: true, test: Array.isArray }
|
|
171
|
+
);
|
|
172
|
+
return ret;
|
|
173
|
+
}
|
|
174
|
+
function reparseMacroNamesInArray(tree, allowedTokens) {
|
|
175
|
+
var _a, _b, _c;
|
|
176
|
+
const regex = buildWordRegex(allowedTokens);
|
|
177
|
+
let i = 0;
|
|
178
|
+
while (i < tree.length) {
|
|
179
|
+
const macro = tree[i];
|
|
180
|
+
const string = tree[i + 1];
|
|
181
|
+
if (import_unified_latex_util_match.match.anyMacro(macro) && (macro.escapeToken == null || macro.escapeToken === "\\") && import_unified_latex_util_match.match.anyString(string) && (allowedTokens.has(
|
|
182
|
+
macro.content.charAt(macro.content.length - 1)
|
|
183
|
+
) || allowedTokens.has(string.content.charAt(0)))) {
|
|
184
|
+
const match3 = string.content.match(regex);
|
|
185
|
+
const takeable = match3 ? match3[0] : "";
|
|
186
|
+
if (takeable.length > 0) {
|
|
187
|
+
if (takeable.length === string.content.length) {
|
|
188
|
+
macro.content += string.content;
|
|
189
|
+
tree.splice(i + 1, 1);
|
|
190
|
+
if (macro.position && ((_a = string.position) == null ? void 0 : _a.end)) {
|
|
191
|
+
macro.position.end = string.position.end;
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
macro.content += takeable;
|
|
195
|
+
string.content = string.content.slice(takeable.length);
|
|
196
|
+
if ((_b = macro.position) == null ? void 0 : _b.end) {
|
|
197
|
+
macro.position.end.offset += takeable.length;
|
|
198
|
+
macro.position.end.column += takeable.length;
|
|
199
|
+
}
|
|
200
|
+
if ((_c = string.position) == null ? void 0 : _c.start) {
|
|
201
|
+
string.position.start.offset += takeable.length;
|
|
202
|
+
string.position.start.column += takeable.length;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
} else {
|
|
206
|
+
i++;
|
|
207
|
+
}
|
|
208
|
+
} else {
|
|
209
|
+
++i;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
function reparseMacroNames(tree, allowedTokens) {
|
|
214
|
+
if (typeof allowedTokens === "string") {
|
|
215
|
+
allowedTokens = new Set(allowedTokens.split(""));
|
|
216
|
+
}
|
|
217
|
+
const _allowedTokens = allowedTokens;
|
|
218
|
+
for (const v of _allowedTokens) {
|
|
219
|
+
if (v.length > 1) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
(0, import_unified_latex_util_visit.visit)(
|
|
226
|
+
tree,
|
|
227
|
+
(nodes) => {
|
|
228
|
+
reparseMacroNamesInArray(nodes, _allowedTokens);
|
|
229
|
+
},
|
|
230
|
+
{ includeArrays: true, test: Array.isArray }
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// libs/special-regions.ts
|
|
235
|
+
var expl3Find = {
|
|
236
|
+
start: import_unified_latex_util_match2.match.createMacroMatcher(["ExplSyntaxOn"]),
|
|
237
|
+
end: import_unified_latex_util_match2.match.createMacroMatcher(["ExplSyntaxOff"])
|
|
238
|
+
};
|
|
239
|
+
var atLetterFind = {
|
|
240
|
+
start: import_unified_latex_util_match2.match.createMacroMatcher(["makeatletter"]),
|
|
241
|
+
end: import_unified_latex_util_match2.match.createMacroMatcher(["makeatother"])
|
|
242
|
+
};
|
|
243
|
+
function findExpl3AndAtLetterRegionsInArray(tree) {
|
|
244
|
+
const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);
|
|
245
|
+
const atLetter = findRegionInArray(
|
|
246
|
+
tree,
|
|
247
|
+
atLetterFind.start,
|
|
248
|
+
atLetterFind.end
|
|
249
|
+
);
|
|
250
|
+
const regionMap = new Map([
|
|
251
|
+
...expl3.map((x) => [x, "expl"]),
|
|
252
|
+
...atLetter.map((x) => [x, "atLetter"])
|
|
253
|
+
]);
|
|
254
|
+
const all = refineRegions([...expl3, ...atLetter]);
|
|
255
|
+
const ret = {
|
|
256
|
+
explOnly: [],
|
|
257
|
+
atLetterOnly: [],
|
|
258
|
+
both: []
|
|
259
|
+
};
|
|
260
|
+
for (let i = 0; i < all.regions.length; i++) {
|
|
261
|
+
const region = all.regions[i];
|
|
262
|
+
const containedIn = all.regionsContainedIn[i];
|
|
263
|
+
if (containedIn.size === 2) {
|
|
264
|
+
ret.both.push(region);
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
for (const v of containedIn.values()) {
|
|
268
|
+
if (regionMap.get(v) === "expl") {
|
|
269
|
+
ret.explOnly.push(region);
|
|
270
|
+
}
|
|
271
|
+
if (regionMap.get(v) === "atLetter") {
|
|
272
|
+
ret.atLetterOnly.push(region);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);
|
|
277
|
+
ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);
|
|
278
|
+
ret.both = ret.both.filter((r) => r.end - r.start > 1);
|
|
279
|
+
return ret;
|
|
280
|
+
}
|
|
281
|
+
var atLetterSet = /* @__PURE__ */ new Set(["@"]);
|
|
282
|
+
var explSet = /* @__PURE__ */ new Set(["_", ":"]);
|
|
283
|
+
var bothSet = /* @__PURE__ */ new Set(["_", ":", "@"]);
|
|
284
|
+
function reparseExpl3AndAtLetterRegions(tree) {
|
|
285
|
+
(0, import_unified_latex_util_visit2.visit)(
|
|
286
|
+
tree,
|
|
287
|
+
{
|
|
288
|
+
leave: (nodes) => {
|
|
289
|
+
const regions = findExpl3AndAtLetterRegionsInArray(nodes);
|
|
290
|
+
const totalNumRegions = regions.both.length + regions.atLetterOnly.length + regions.explOnly.length;
|
|
291
|
+
if (totalNumRegions === 0) {
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
const splits = splitByRegions(nodes, regions);
|
|
295
|
+
const processed = [];
|
|
296
|
+
for (const [key, slice] of splits) {
|
|
297
|
+
switch (key) {
|
|
298
|
+
case null:
|
|
299
|
+
processed.push(...slice);
|
|
300
|
+
continue;
|
|
301
|
+
case "atLetterOnly":
|
|
302
|
+
reparseMacroNames(slice, atLetterSet);
|
|
303
|
+
processed.push(...slice);
|
|
304
|
+
continue;
|
|
305
|
+
case "explOnly":
|
|
306
|
+
reparseMacroNames(slice, explSet);
|
|
307
|
+
processed.push(...slice);
|
|
308
|
+
continue;
|
|
309
|
+
case "both":
|
|
310
|
+
reparseMacroNames(slice, bothSet);
|
|
311
|
+
processed.push(...slice);
|
|
312
|
+
continue;
|
|
313
|
+
default:
|
|
314
|
+
throw new Error(
|
|
315
|
+
`Unexpected case when splitting ${key}`
|
|
316
|
+
);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
nodes.length = 0;
|
|
320
|
+
nodes.push(...processed);
|
|
321
|
+
return import_unified_latex_util_visit2.SKIP;
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
{ includeArrays: true, test: Array.isArray }
|
|
325
|
+
);
|
|
326
|
+
}
|
|
327
|
+
//# sourceMappingURL=index.cjs.map
|
package/index.cjs.map
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../index.ts", "../libs/find-region.ts", "../libs/special-regions.ts", "../libs/regions.ts", "../libs/reparse-macro-names.ts"],
|
|
4
|
+
"sourcesContent": ["export * from \"./libs/find-region\";\nexport * from \"./libs/special-regions\";\nexport * from \"./libs/reparse-macro-names\";\n\n// NOTE: The docstring comment must be the last item in the index.ts file!\n/**\n * ## What is this?\n *\n * Functions to identify regions of a `unified-latex` Abstract Syntax Tree (AST) that need to be reparsed because of different\n * category codes. For example, regions between `\\makeatletter` and `\\makeatother`.\n *\n * ## When should I use this?\n *\n * If you need to identify regions of the AST that need to be reparsed.\n */\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { Region } from \"./regions\";\n\n/**\n * Find all contiguous segments in the array that are between start and end blocks.\n * The `start` and `end` are functions that determine when a region starts and ends.\n */\nexport function findRegionInArray(\n tree: Ast.Node[],\n start: (node: Ast.Node) => boolean,\n end: (node: Ast.Node) => boolean\n): Region[] {\n const ret: Region[] = [];\n let currRegion: Region = { start: undefined as any, end: tree.length };\n for (let i = 0; i < tree.length; i++) {\n const node = tree[i];\n if (start(node)) {\n currRegion.start = i;\n }\n if (end(node)) {\n currRegion.end = i + 1;\n ret.push(currRegion);\n currRegion = { start: undefined as any, end: tree.length };\n }\n }\n\n if (currRegion.start != null) {\n // Regions don't necessarily have to encounter an `end` to end.\n ret.push(currRegion);\n }\n return ret;\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { match } from \"@unified-latex/unified-latex-util-match\";\nimport { findRegionInArray } from \"./find-region\";\nimport { refineRegions, Region, splitByRegions } from \"./regions\";\nimport { SKIP, visit } from \"@unified-latex/unified-latex-util-visit\";\nimport { reparseMacroNames } from \"./reparse-macro-names\";\n\nconst expl3Find = {\n start: match.createMacroMatcher([\"ExplSyntaxOn\"]),\n end: match.createMacroMatcher([\"ExplSyntaxOff\"]),\n};\nconst atLetterFind = {\n start: match.createMacroMatcher([\"makeatletter\"]),\n end: match.createMacroMatcher([\"makeatother\"]),\n};\n\n/**\n * Find regions between `\\ExplSyntaxOn...\\ExplSyntaxOff` and `\\makeatletter...\\makeatother`.\n * Returns an object containing regions where one or both syntax's apply.\n */\nexport function findExpl3AndAtLetterRegionsInArray(tree: Ast.Node[]): {\n explOnly: Region[];\n atLetterOnly: Region[];\n both: Region[];\n} {\n const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);\n const atLetter = findRegionInArray(\n tree,\n atLetterFind.start,\n atLetterFind.end\n );\n\n const regionMap = new Map([\n ...(expl3.map((x) => [x, \"expl\"]) as [Region, \"expl\"][]),\n ...(atLetter.map((x) => [x, \"atLetter\"]) as [Region, \"atLetter\"][]),\n ]);\n const all = refineRegions([...expl3, ...atLetter]);\n\n const ret = {\n explOnly: [] as Region[],\n atLetterOnly: [] as Region[],\n both: [] as Region[],\n };\n\n for (let i = 0; i < all.regions.length; i++) {\n const region = all.regions[i];\n const containedIn = all.regionsContainedIn[i];\n if (containedIn.size === 2) {\n ret.both.push(region);\n continue;\n }\n for (const v of containedIn.values()) {\n if (regionMap.get(v) === \"expl\") {\n ret.explOnly.push(region);\n }\n if (regionMap.get(v) === \"atLetter\") {\n ret.atLetterOnly.push(region);\n }\n }\n }\n\n // Regions of size 1 only contain the starting/stopping macro, so they should be discarded\n ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);\n ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);\n ret.both = ret.both.filter((r) => r.end - r.start > 1);\n\n return ret;\n}\n\nconst atLetterSet = new Set([\"@\"]);\nconst explSet = new Set([\"_\", \":\"]);\nconst bothSet = new Set([\"_\", \":\", \"@\"]);\n\n/**\n * Find regions between `\\ExplSyntaxOn...\\ExplSyntaxOff` and `\\makeatletter...\\makeatother`\n * and reparse their contents so that the relevant characters (e.g., `@`, `_`, and `:`) become\n * part of the macro names.\n */\nexport function reparseExpl3AndAtLetterRegions(tree: Ast.Ast) {\n visit(\n tree,\n {\n leave: (nodes) => {\n const regions = findExpl3AndAtLetterRegionsInArray(nodes);\n // In all likelihood, we don't need to do any reparsing, so bail early here\n const totalNumRegions =\n regions.both.length +\n regions.atLetterOnly.length +\n regions.explOnly.length;\n if (totalNumRegions === 0) {\n return;\n }\n\n const splits = splitByRegions(nodes, regions);\n const processed: typeof nodes = [];\n for (const [key, slice] of splits) {\n switch (key) {\n case null:\n processed.push(...slice);\n continue;\n case \"atLetterOnly\":\n reparseMacroNames(slice, atLetterSet);\n processed.push(...slice);\n continue;\n case \"explOnly\":\n reparseMacroNames(slice, explSet);\n processed.push(...slice);\n continue;\n case \"both\":\n reparseMacroNames(slice, bothSet);\n processed.push(...slice);\n continue;\n default:\n throw new Error(\n `Unexpected case when splitting ${key}`\n );\n }\n }\n\n nodes.length = 0;\n nodes.push(...processed);\n return SKIP;\n },\n },\n { includeArrays: true, test: Array.isArray }\n );\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\n\nexport type Region = { start: number; end: number };\n\n/**\n * Given `regions`, a list of `Region`s (not necessarily ordered, possibly overlapping), return a list of in-order,\n * non-overlapping regions and a corresponding list containing a set of the original `Region`s that the new region\n * is a subset of.\n */\nexport function refineRegions(regions: Region[]): {\n regions: Region[];\n regionsContainedIn: Set<Region>[];\n} {\n const _regions = [...regions];\n _regions.sort((a, b) => a.start - b.start);\n const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));\n const cutPoints = Array.from(cutPointsSet);\n cutPoints.sort((a, b) => a - b);\n\n const retRegions: Region[] = [];\n const retRegionsContainedIn: Set<Region>[] = [];\n\n // We will be checking what regions we are completely contained in.\n // Because `_regions` is sorted by start, `seekIndex` will be incremented\n // by end, so that we don't do too much array testing.\n let seekIndex = 0;\n for (let i = 0; i < cutPoints.length - 1; i++) {\n const start = cutPoints[i];\n const end = cutPoints[i + 1];\n const region = { start, end };\n const regionContainedIn: Set<Region> = new Set();\n\n let encounteredEndPastStart = false;\n for (let j = seekIndex; j < _regions.length; j++) {\n const superRegion = _regions[j];\n if (superRegion.end >= region.start) {\n encounteredEndPastStart = true;\n }\n if (!encounteredEndPastStart && superRegion.end < region.start) {\n // In this case, the region (and all regions that came before)\n // end before the region we are testing, so we may safely skip past it\n // from here on out.\n seekIndex = j + 1;\n continue;\n }\n\n if (superRegion.start > end) {\n // Because `_regions` is sorted, we can stop here\n break;\n }\n if (\n superRegion.start <= region.start &&\n superRegion.end >= region.end\n ) {\n encounteredEndPastStart = true;\n regionContainedIn.add(superRegion);\n }\n }\n\n if (regionContainedIn.size > 0) {\n // We only count if we are contained in a subregion\n retRegions.push(region);\n retRegionsContainedIn.push(regionContainedIn);\n }\n }\n\n return { regions: retRegions, regionsContainedIn: retRegionsContainedIn };\n}\n\n/**\n * Split an array up into the disjoint regions specified by `regionRecord`.\n * Returned is a list of tuples, the first item being the key of `regionRecord` if there\n * was a corresponding region, or `null` if there was no corresponding region.\n *\n * This function assumes that the regions in `regionRecord` are disjoint and fully contained\n * within the bounds of `array`.\n */\nexport function splitByRegions<\n T extends unknown,\n RegionRecord extends Record<string, Region[]>\n>(array: T[], regionsRecord: RegionRecord) {\n const ret: [keyof RegionRecord | null, T[]][] = [];\n\n const indices = [0, array.length];\n const reverseMap: Record<string, keyof RegionRecord> = {};\n for (const [key, records] of Object.entries(regionsRecord)) {\n indices.push(\n ...records.flatMap((r) => {\n reverseMap[\"\" + [r.start, r.end]] = key;\n return [r.start, r.end];\n })\n );\n }\n indices.sort((a, b) => a - b);\n\n for (let i = 0; i < indices.length - 1; i++) {\n const start = indices[i];\n const end = indices[i + 1];\n if (start === end) {\n continue;\n }\n const regionKey = reverseMap[\"\" + [start, end]];\n\n ret.push([regionKey || null, array.slice(start, end)]);\n }\n\n return ret;\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { match } from \"@unified-latex/unified-latex-util-match\";\nimport { EXIT, visit } from \"@unified-latex/unified-latex-util-visit\";\n\n/**\n * Escape a string so that it can be used to build a regular expression.\n *\n * From: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions\n */\nfunction escapeRegExp(str: string) {\n return str.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\"); // $& means the whole matched string\n}\n\n/**\n * Build a regular expression that matches everything up to the first non-allowed symbol.\n */\nfunction buildWordRegex(allowedSet: Set<string>): RegExp {\n // /\\p{L}/ matches all letters, including unicode letters. We join this with\n // everything allowed in our set to form a regexp like\n // /(\\p{L}|_|:)*/u\n // The `u` at the end allows unicode characters to be matched.\n const regexpStr = `^(${[\"\\\\p{L}\"]\n .concat(Array.from(allowedSet).map(escapeRegExp))\n .join(\"|\")})*`;\n return new RegExp(regexpStr, \"u\");\n}\n\n/**\n * Checks whether the array has a macro that could be reparsed given the `allowedTokens` but\n * do not do any reparsing. This function can be used in auto-detection schemes to determine if\n * macro names should actually be reparsed.\n */\nexport function hasReparsableMacroNamesInArray(\n tree: Ast.Node[],\n allowedTokens: Set<string>\n): boolean {\n for (let i = 0; i < tree.length; i++) {\n const macro = tree[i];\n const string = tree[i + 1];\n if (match.anyMacro(macro) && match.anyString(string)) {\n // There are two options. Either the macro ends with the special character,\n // e.g. `\\@foo` or the special character starts the next string, e.g. `\\foo@`.\n if (\n allowedTokens.has(\n macro.content.charAt(macro.content.length - 1)\n ) ||\n allowedTokens.has(string.content.charAt(0))\n ) {\n return true;\n }\n }\n }\n return false;\n}\n\n/**\n * Checks whether `tree` has a macro that could be reparsed given the `allowedTokens` but\n * do not do any reparsing. This function can be used in auto-detection schemes to determine if\n * macro names should actually be reparsed.\n */\nexport function hasReparsableMacroNames(\n tree: Ast.Ast,\n allowedTokens: string | Set<string>\n): boolean {\n if (typeof allowedTokens === \"string\") {\n allowedTokens = new Set(allowedTokens.split(\"\"));\n }\n // Recast so typescript doesn't complain\n const _allowedTokens = allowedTokens;\n for (const v of _allowedTokens) {\n if (v.length > 1) {\n throw new Error(\n `Only single characters are allowed as \\`allowedTokens\\` when reparsing macro names, not \\`${v}\\`.`\n );\n }\n }\n\n let ret = false;\n visit(\n tree,\n (nodes) => {\n if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {\n ret = true;\n return EXIT;\n }\n },\n { includeArrays: true, test: Array.isArray }\n );\n return ret;\n}\n\n/**\n * Reparses all macro names in the array so that they may optionally include characters listed in `allowedTokens`.\n * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though\n * `_` is normally stops the parsing for a macro name).\n */\nexport function reparseMacroNamesInArray(\n tree: Ast.Node[],\n allowedTokens: Set<string>\n) {\n const regex = buildWordRegex(allowedTokens);\n let i = 0;\n while (i < tree.length) {\n const macro = tree[i];\n const string = tree[i + 1];\n if (\n match.anyMacro(macro) &&\n // The _^ macros in math mode should not be extended no-matter what;\n // So we check to make sure that the macro we're dealing with has the default escape token.\n (macro.escapeToken == null || macro.escapeToken === \"\\\\\") &&\n match.anyString(string) &&\n // There are two options. Either the macro ends with the special character,\n // e.g. `\\@foo` or the special character starts the next string, e.g. `\\foo@`.\n (allowedTokens.has(\n macro.content.charAt(macro.content.length - 1)\n ) ||\n allowedTokens.has(string.content.charAt(0)))\n ) {\n // There might be a number somewhere in the string. If so, we should\n // break the string apart at that number\n const match = string.content.match(regex);\n const takeable = match ? match[0] : \"\";\n if (takeable.length > 0) {\n if (takeable.length === string.content.length) {\n // The whole string can be appended to the macro name\n macro.content += string.content;\n tree.splice(i + 1, 1);\n\n // Preserve the source location if available\n if (macro.position && string.position?.end) {\n macro.position.end = string.position.end;\n }\n } else {\n // Only part of the string can be appended to the macro name\n macro.content += takeable;\n string.content = string.content.slice(takeable.length);\n\n // Preserve the source location if available\n if (macro.position?.end) {\n macro.position.end.offset += takeable.length;\n macro.position.end.column += takeable.length;\n }\n if (string.position?.start) {\n string.position.start.offset += takeable.length;\n string.position.start.column += takeable.length;\n }\n }\n } else {\n i++;\n }\n } else {\n ++i;\n }\n }\n}\n\n/**\n * Reparses all macro names so that they may optionally include characters listed in `allowedTokens`.\n * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though\n * `_` is normally stops the parsing for a macro name). Thus, a macro `\\foo_bar:Nn` would be parsed as having\n * the name `foo_bar:Nn` rather than as `foo` followed by the strings `_`, `bar`, `:`, `Nn`.\n */\nexport function reparseMacroNames(\n tree: Ast.Ast,\n allowedTokens: string | Set<string>\n) {\n if (typeof allowedTokens === \"string\") {\n allowedTokens = new Set(allowedTokens.split(\"\"));\n }\n // Recast so typescript doesn't complain\n const _allowedTokens = allowedTokens;\n for (const v of _allowedTokens) {\n if (v.length > 1) {\n throw new Error(\n `Only single characters are allowed as \\`allowedTokens\\` when reparsing macro names, not \\`${v}\\`.`\n );\n }\n }\n\n visit(\n tree,\n (nodes) => {\n reparseMacroNamesInArray(nodes, _allowedTokens);\n },\n { includeArrays: true, test: Array.isArray }\n );\n}\n"],
|
|
5
|
+
"mappings": ";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACOO,SAAS,kBACZ,MACA,OACA,KACQ;AACR,QAAM,MAAgB,CAAC;AACvB,MAAI,aAAqB,EAAE,OAAO,QAAkB,KAAK,KAAK,OAAO;AACrE,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,UAAM,OAAO,KAAK;AAClB,QAAI,MAAM,IAAI,GAAG;AACb,iBAAW,QAAQ;AAAA,IACvB;AACA,QAAI,IAAI,IAAI,GAAG;AACX,iBAAW,MAAM,IAAI;AACrB,UAAI,KAAK,UAAU;AACnB,mBAAa,EAAE,OAAO,QAAkB,KAAK,KAAK,OAAO;AAAA,IAC7D;AAAA,EACJ;AAEA,MAAI,WAAW,SAAS,MAAM;AAE1B,QAAI,KAAK,UAAU;AAAA,EACvB;AACA,SAAO;AACX;;;AC9BA,IAAAA,mCAAsB;;;ACQf,SAAS,cAAc,SAG5B;AACE,QAAM,WAAW,CAAC,GAAG,OAAO;AAC5B,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACzC,QAAM,eAAe,IAAI,IAAI,SAAS,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;AACtE,QAAM,YAAY,MAAM,KAAK,YAAY;AACzC,YAAU,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE9B,QAAM,aAAuB,CAAC;AAC9B,QAAM,wBAAuC,CAAC;AAK9C,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,UAAU,SAAS,GAAG,KAAK;AAC3C,UAAM,QAAQ,UAAU;AACxB,UAAM,MAAM,UAAU,IAAI;AAC1B,UAAM,SAAS,EAAE,OAAO,IAAI;AAC5B,UAAM,oBAAiC,oBAAI,IAAI;AAE/C,QAAI,0BAA0B;AAC9B,aAAS,IAAI,WAAW,IAAI,SAAS,QAAQ,KAAK;AAC9C,YAAM,cAAc,SAAS;AAC7B,UAAI,YAAY,OAAO,OAAO,OAAO;AACjC,kCAA0B;AAAA,MAC9B;AACA,UAAI,CAAC,2BAA2B,YAAY,MAAM,OAAO,OAAO;AAI5D,oBAAY,IAAI;AAChB;AAAA,MACJ;AAEA,UAAI,YAAY,QAAQ,KAAK;AAEzB;AAAA,MACJ;AACA,UACI,YAAY,SAAS,OAAO,SAC5B,YAAY,OAAO,OAAO,KAC5B;AACE,kCAA0B;AAC1B,0BAAkB,IAAI,WAAW;AAAA,MACrC;AAAA,IACJ;AAEA,QAAI,kBAAkB,OAAO,GAAG;AAE5B,iBAAW,KAAK,MAAM;AACtB,4BAAsB,KAAK,iBAAiB;AAAA,IAChD;AAAA,EACJ;AAEA,SAAO,EAAE,SAAS,YAAY,oBAAoB,sBAAsB;AAC5E;AAUO,SAAS,eAGd,OAAY,eAA6B;AACvC,QAAM,MAA0C,CAAC;AAEjD,QAAM,UAAU,CAAC,GAAG,MAAM,MAAM;AAChC,QAAM,aAAiD,CAAC;AACxD,aAAW,CAAC,KAAK,OAAO,KAAK,OAAO,QAAQ,aAAa,GAAG;AACxD,YAAQ;AAAA,MACJ,GAAG,QAAQ,QAAQ,CAAC,MAAM;AACtB,mBAAW,KAAK,CAAC,EAAE,OAAO,EAAE,GAAG,KAAK;AACpC,eAAO,CAAC,EAAE,OAAO,EAAE,GAAG;AAAA,MAC1B,CAAC;AAAA,IACL;AAAA,EACJ;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE5B,WAAS,IAAI,GAAG,IAAI,QAAQ,SAAS,GAAG,KAAK;AACzC,UAAM,QAAQ,QAAQ;AACtB,UAAM,MAAM,QAAQ,IAAI;AACxB,QAAI,UAAU,KAAK;AACf;AAAA,IACJ;AACA,UAAM,YAAY,WAAW,KAAK,CAAC,OAAO,GAAG;AAE7C,QAAI,KAAK,CAAC,aAAa,MAAM,MAAM,MAAM,OAAO,GAAG,CAAC,CAAC;AAAA,EACzD;AAEA,SAAO;AACX;;;ADvGA,IAAAC,mCAA4B;;;AEH5B,sCAAsB;AACtB,sCAA4B;AAO5B,SAAS,aAAa,KAAa;AAC/B,SAAO,IAAI,QAAQ,uBAAuB,MAAM;AACpD;AAKA,SAAS,eAAe,YAAiC;AAKrD,QAAM,YAAY,KAAK,CAAC,QAAQ,EAC3B,OAAO,MAAM,KAAK,UAAU,EAAE,IAAI,YAAY,CAAC,EAC/C,KAAK,GAAG;AACb,SAAO,IAAI,OAAO,WAAW,GAAG;AACpC;AAOO,SAAS,+BACZ,MACA,eACO;AACP,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,UAAM,QAAQ,KAAK;AACnB,UAAM,SAAS,KAAK,IAAI;AACxB,QAAI,sCAAM,SAAS,KAAK,KAAK,sCAAM,UAAU,MAAM,GAAG;AAGlD,UACI,cAAc;AAAA,QACV,MAAM,QAAQ,OAAO,MAAM,QAAQ,SAAS,CAAC;AAAA,MACjD,KACA,cAAc,IAAI,OAAO,QAAQ,OAAO,CAAC,CAAC,GAC5C;AACE,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,EACJ;AACA,SAAO;AACX;AAOO,SAAS,wBACZ,MACA,eACO;AACP,MAAI,OAAO,kBAAkB,UAAU;AACnC,oBAAgB,IAAI,IAAI,cAAc,MAAM,EAAE,CAAC;AAAA,EACnD;AAEA,QAAM,iBAAiB;AACvB,aAAW,KAAK,gBAAgB;AAC5B,QAAI,EAAE,SAAS,GAAG;AACd,YAAM,IAAI;AAAA,QACN,6FAA6F;AAAA,MACjG;AAAA,IACJ;AAAA,EACJ;AAEA,MAAI,MAAM;AACV;AAAA,IACI;AAAA,IACA,CAAC,UAAU;AACP,UAAI,+BAA+B,OAAO,cAAc,GAAG;AACvD,cAAM;AACN,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACA,SAAO;AACX;AAOO,SAAS,yBACZ,MACA,eACF;AAnGF;AAoGI,QAAM,QAAQ,eAAe,aAAa;AAC1C,MAAI,IAAI;AACR,SAAO,IAAI,KAAK,QAAQ;AACpB,UAAM,QAAQ,KAAK;AACnB,UAAM,SAAS,KAAK,IAAI;AACxB,QACI,sCAAM,SAAS,KAAK,MAGnB,MAAM,eAAe,QAAQ,MAAM,gBAAgB,SACpD,sCAAM,UAAU,MAAM,MAGrB,cAAc;AAAA,MACX,MAAM,QAAQ,OAAO,MAAM,QAAQ,SAAS,CAAC;AAAA,IACjD,KACI,cAAc,IAAI,OAAO,QAAQ,OAAO,CAAC,CAAC,IAChD;AAGE,YAAMC,SAAQ,OAAO,QAAQ,MAAM,KAAK;AACxC,YAAM,WAAWA,SAAQA,OAAM,KAAK;AACpC,UAAI,SAAS,SAAS,GAAG;AACrB,YAAI,SAAS,WAAW,OAAO,QAAQ,QAAQ;AAE3C,gBAAM,WAAW,OAAO;AACxB,eAAK,OAAO,IAAI,GAAG,CAAC;AAGpB,cAAI,MAAM,cAAY,YAAO,aAAP,mBAAiB,MAAK;AACxC,kBAAM,SAAS,MAAM,OAAO,SAAS;AAAA,UACzC;AAAA,QACJ,OAAO;AAEH,gBAAM,WAAW;AACjB,iBAAO,UAAU,OAAO,QAAQ,MAAM,SAAS,MAAM;AAGrD,eAAI,WAAM,aAAN,mBAAgB,KAAK;AACrB,kBAAM,SAAS,IAAI,UAAU,SAAS;AACtC,kBAAM,SAAS,IAAI,UAAU,SAAS;AAAA,UAC1C;AACA,eAAI,YAAO,aAAP,mBAAiB,OAAO;AACxB,mBAAO,SAAS,MAAM,UAAU,SAAS;AACzC,mBAAO,SAAS,MAAM,UAAU,SAAS;AAAA,UAC7C;AAAA,QACJ;AAAA,MACJ,OAAO;AACH;AAAA,MACJ;AAAA,IACJ,OAAO;AACH,QAAE;AAAA,IACN;AAAA,EACJ;AACJ;AAQO,SAAS,kBACZ,MACA,eACF;AACE,MAAI,OAAO,kBAAkB,UAAU;AACnC,oBAAgB,IAAI,IAAI,cAAc,MAAM,EAAE,CAAC;AAAA,EACnD;AAEA,QAAM,iBAAiB;AACvB,aAAW,KAAK,gBAAgB;AAC5B,QAAI,EAAE,SAAS,GAAG;AACd,YAAM,IAAI;AAAA,QACN,6FAA6F;AAAA,MACjG;AAAA,IACJ;AAAA,EACJ;AAEA;AAAA,IACI;AAAA,IACA,CAAC,UAAU;AACP,+BAAyB,OAAO,cAAc;AAAA,IAClD;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACJ;;;AFnLA,IAAM,YAAY;AAAA,EACd,OAAO,uCAAM,mBAAmB,CAAC,cAAc,CAAC;AAAA,EAChD,KAAK,uCAAM,mBAAmB,CAAC,eAAe,CAAC;AACnD;AACA,IAAM,eAAe;AAAA,EACjB,OAAO,uCAAM,mBAAmB,CAAC,cAAc,CAAC;AAAA,EAChD,KAAK,uCAAM,mBAAmB,CAAC,aAAa,CAAC;AACjD;AAMO,SAAS,mCAAmC,MAIjD;AACE,QAAM,QAAQ,kBAAkB,MAAM,UAAU,OAAO,UAAU,GAAG;AACpE,QAAM,WAAW;AAAA,IACb;AAAA,IACA,aAAa;AAAA,IACb,aAAa;AAAA,EACjB;AAEA,QAAM,YAAY,IAAI,IAAI;AAAA,IACtB,GAAI,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;AAAA,IAChC,GAAI,SAAS,IAAI,CAAC,MAAM,CAAC,GAAG,UAAU,CAAC;AAAA,EAC3C,CAAC;AACD,QAAM,MAAM,cAAc,CAAC,GAAG,OAAO,GAAG,QAAQ,CAAC;AAEjD,QAAM,MAAM;AAAA,IACR,UAAU,CAAC;AAAA,IACX,cAAc,CAAC;AAAA,IACf,MAAM,CAAC;AAAA,EACX;AAEA,WAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AACzC,UAAM,SAAS,IAAI,QAAQ;AAC3B,UAAM,cAAc,IAAI,mBAAmB;AAC3C,QAAI,YAAY,SAAS,GAAG;AACxB,UAAI,KAAK,KAAK,MAAM;AACpB;AAAA,IACJ;AACA,eAAW,KAAK,YAAY,OAAO,GAAG;AAClC,UAAI,UAAU,IAAI,CAAC,MAAM,QAAQ;AAC7B,YAAI,SAAS,KAAK,MAAM;AAAA,MAC5B;AACA,UAAI,UAAU,IAAI,CAAC,MAAM,YAAY;AACjC,YAAI,aAAa,KAAK,MAAM;AAAA,MAChC;AAAA,IACJ;AAAA,EACJ;AAGA,MAAI,WAAW,IAAI,SAAS,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AAC7D,MAAI,eAAe,IAAI,aAAa,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AACrE,MAAI,OAAO,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AAErD,SAAO;AACX;AAEA,IAAM,cAAc,oBAAI,IAAI,CAAC,GAAG,CAAC;AACjC,IAAM,UAAU,oBAAI,IAAI,CAAC,KAAK,GAAG,CAAC;AAClC,IAAM,UAAU,oBAAI,IAAI,CAAC,KAAK,KAAK,GAAG,CAAC;AAOhC,SAAS,+BAA+B,MAAe;AAC1D;AAAA,IACI;AAAA,IACA;AAAA,MACI,OAAO,CAAC,UAAU;AACd,cAAM,UAAU,mCAAmC,KAAK;AAExD,cAAM,kBACF,QAAQ,KAAK,SACb,QAAQ,aAAa,SACrB,QAAQ,SAAS;AACrB,YAAI,oBAAoB,GAAG;AACvB;AAAA,QACJ;AAEA,cAAM,SAAS,eAAe,OAAO,OAAO;AAC5C,cAAM,YAA0B,CAAC;AACjC,mBAAW,CAAC,KAAK,KAAK,KAAK,QAAQ;AAC/B,kBAAQ,KAAK;AAAA,YACT,KAAK;AACD,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,WAAW;AACpC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,OAAO;AAChC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,OAAO;AAChC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ;AACI,oBAAM,IAAI;AAAA,gBACN,kCAAkC;AAAA,cACtC;AAAA,UACR;AAAA,QACJ;AAEA,cAAM,SAAS;AACf,cAAM,KAAK,GAAG,SAAS;AACvB,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACJ;",
|
|
6
|
+
"names": ["import_unified_latex_util_match", "import_unified_latex_util_visit", "match"]
|
|
7
|
+
}
|
package/index.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export * from "./libs/find-region";
|
|
2
|
+
export * from "./libs/special-regions";
|
|
3
|
+
export * from "./libs/reparse-macro-names";
|
|
4
|
+
/**
|
|
5
|
+
* ## What is this?
|
|
6
|
+
*
|
|
7
|
+
* Functions to identify regions of a `unified-latex` Abstract Syntax Tree (AST) that need to be reparsed because of different
|
|
8
|
+
* category codes. For example, regions between `\makeatletter` and `\makeatother`.
|
|
9
|
+
*
|
|
10
|
+
* ## When should I use this?
|
|
11
|
+
*
|
|
12
|
+
* If you need to identify regions of the AST that need to be reparsed.
|
|
13
|
+
*/
|
|
14
|
+
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,cAAc,oBAAoB,CAAC;AACnC,cAAc,wBAAwB,CAAC;AACvC,cAAc,4BAA4B,CAAC;AAG3C;;;;;;;;;GASG"}
|
package/index.js
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
// libs/find-region.ts
|
|
2
|
+
function findRegionInArray(tree, start, end) {
|
|
3
|
+
const ret = [];
|
|
4
|
+
let currRegion = { start: void 0, end: tree.length };
|
|
5
|
+
for (let i = 0; i < tree.length; i++) {
|
|
6
|
+
const node = tree[i];
|
|
7
|
+
if (start(node)) {
|
|
8
|
+
currRegion.start = i;
|
|
9
|
+
}
|
|
10
|
+
if (end(node)) {
|
|
11
|
+
currRegion.end = i + 1;
|
|
12
|
+
ret.push(currRegion);
|
|
13
|
+
currRegion = { start: void 0, end: tree.length };
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
if (currRegion.start != null) {
|
|
17
|
+
ret.push(currRegion);
|
|
18
|
+
}
|
|
19
|
+
return ret;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// libs/special-regions.ts
|
|
23
|
+
import { match as match2 } from "@unified-latex/unified-latex-util-match";
|
|
24
|
+
|
|
25
|
+
// libs/regions.ts
|
|
26
|
+
function refineRegions(regions) {
|
|
27
|
+
const _regions = [...regions];
|
|
28
|
+
_regions.sort((a, b) => a.start - b.start);
|
|
29
|
+
const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));
|
|
30
|
+
const cutPoints = Array.from(cutPointsSet);
|
|
31
|
+
cutPoints.sort((a, b) => a - b);
|
|
32
|
+
const retRegions = [];
|
|
33
|
+
const retRegionsContainedIn = [];
|
|
34
|
+
let seekIndex = 0;
|
|
35
|
+
for (let i = 0; i < cutPoints.length - 1; i++) {
|
|
36
|
+
const start = cutPoints[i];
|
|
37
|
+
const end = cutPoints[i + 1];
|
|
38
|
+
const region = { start, end };
|
|
39
|
+
const regionContainedIn = /* @__PURE__ */ new Set();
|
|
40
|
+
let encounteredEndPastStart = false;
|
|
41
|
+
for (let j = seekIndex; j < _regions.length; j++) {
|
|
42
|
+
const superRegion = _regions[j];
|
|
43
|
+
if (superRegion.end >= region.start) {
|
|
44
|
+
encounteredEndPastStart = true;
|
|
45
|
+
}
|
|
46
|
+
if (!encounteredEndPastStart && superRegion.end < region.start) {
|
|
47
|
+
seekIndex = j + 1;
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (superRegion.start > end) {
|
|
51
|
+
break;
|
|
52
|
+
}
|
|
53
|
+
if (superRegion.start <= region.start && superRegion.end >= region.end) {
|
|
54
|
+
encounteredEndPastStart = true;
|
|
55
|
+
regionContainedIn.add(superRegion);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (regionContainedIn.size > 0) {
|
|
59
|
+
retRegions.push(region);
|
|
60
|
+
retRegionsContainedIn.push(regionContainedIn);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return { regions: retRegions, regionsContainedIn: retRegionsContainedIn };
|
|
64
|
+
}
|
|
65
|
+
function splitByRegions(array, regionsRecord) {
|
|
66
|
+
const ret = [];
|
|
67
|
+
const indices = [0, array.length];
|
|
68
|
+
const reverseMap = {};
|
|
69
|
+
for (const [key, records] of Object.entries(regionsRecord)) {
|
|
70
|
+
indices.push(
|
|
71
|
+
...records.flatMap((r) => {
|
|
72
|
+
reverseMap["" + [r.start, r.end]] = key;
|
|
73
|
+
return [r.start, r.end];
|
|
74
|
+
})
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
indices.sort((a, b) => a - b);
|
|
78
|
+
for (let i = 0; i < indices.length - 1; i++) {
|
|
79
|
+
const start = indices[i];
|
|
80
|
+
const end = indices[i + 1];
|
|
81
|
+
if (start === end) {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
const regionKey = reverseMap["" + [start, end]];
|
|
85
|
+
ret.push([regionKey || null, array.slice(start, end)]);
|
|
86
|
+
}
|
|
87
|
+
return ret;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// libs/special-regions.ts
|
|
91
|
+
import { SKIP, visit as visit2 } from "@unified-latex/unified-latex-util-visit";
|
|
92
|
+
|
|
93
|
+
// libs/reparse-macro-names.ts
|
|
94
|
+
import { match } from "@unified-latex/unified-latex-util-match";
|
|
95
|
+
import { EXIT, visit } from "@unified-latex/unified-latex-util-visit";
|
|
96
|
+
function escapeRegExp(str) {
|
|
97
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
98
|
+
}
|
|
99
|
+
function buildWordRegex(allowedSet) {
|
|
100
|
+
const regexpStr = `^(${["\\p{L}"].concat(Array.from(allowedSet).map(escapeRegExp)).join("|")})*`;
|
|
101
|
+
return new RegExp(regexpStr, "u");
|
|
102
|
+
}
|
|
103
|
+
function hasReparsableMacroNamesInArray(tree, allowedTokens) {
|
|
104
|
+
for (let i = 0; i < tree.length; i++) {
|
|
105
|
+
const macro = tree[i];
|
|
106
|
+
const string = tree[i + 1];
|
|
107
|
+
if (match.anyMacro(macro) && match.anyString(string)) {
|
|
108
|
+
if (allowedTokens.has(
|
|
109
|
+
macro.content.charAt(macro.content.length - 1)
|
|
110
|
+
) || allowedTokens.has(string.content.charAt(0))) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
function hasReparsableMacroNames(tree, allowedTokens) {
|
|
118
|
+
if (typeof allowedTokens === "string") {
|
|
119
|
+
allowedTokens = new Set(allowedTokens.split(""));
|
|
120
|
+
}
|
|
121
|
+
const _allowedTokens = allowedTokens;
|
|
122
|
+
for (const v of _allowedTokens) {
|
|
123
|
+
if (v.length > 1) {
|
|
124
|
+
throw new Error(
|
|
125
|
+
`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
let ret = false;
|
|
130
|
+
visit(
|
|
131
|
+
tree,
|
|
132
|
+
(nodes) => {
|
|
133
|
+
if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {
|
|
134
|
+
ret = true;
|
|
135
|
+
return EXIT;
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
{ includeArrays: true, test: Array.isArray }
|
|
139
|
+
);
|
|
140
|
+
return ret;
|
|
141
|
+
}
|
|
142
|
+
function reparseMacroNamesInArray(tree, allowedTokens) {
|
|
143
|
+
var _a, _b, _c;
|
|
144
|
+
const regex = buildWordRegex(allowedTokens);
|
|
145
|
+
let i = 0;
|
|
146
|
+
while (i < tree.length) {
|
|
147
|
+
const macro = tree[i];
|
|
148
|
+
const string = tree[i + 1];
|
|
149
|
+
if (match.anyMacro(macro) && (macro.escapeToken == null || macro.escapeToken === "\\") && match.anyString(string) && (allowedTokens.has(
|
|
150
|
+
macro.content.charAt(macro.content.length - 1)
|
|
151
|
+
) || allowedTokens.has(string.content.charAt(0)))) {
|
|
152
|
+
const match3 = string.content.match(regex);
|
|
153
|
+
const takeable = match3 ? match3[0] : "";
|
|
154
|
+
if (takeable.length > 0) {
|
|
155
|
+
if (takeable.length === string.content.length) {
|
|
156
|
+
macro.content += string.content;
|
|
157
|
+
tree.splice(i + 1, 1);
|
|
158
|
+
if (macro.position && ((_a = string.position) == null ? void 0 : _a.end)) {
|
|
159
|
+
macro.position.end = string.position.end;
|
|
160
|
+
}
|
|
161
|
+
} else {
|
|
162
|
+
macro.content += takeable;
|
|
163
|
+
string.content = string.content.slice(takeable.length);
|
|
164
|
+
if ((_b = macro.position) == null ? void 0 : _b.end) {
|
|
165
|
+
macro.position.end.offset += takeable.length;
|
|
166
|
+
macro.position.end.column += takeable.length;
|
|
167
|
+
}
|
|
168
|
+
if ((_c = string.position) == null ? void 0 : _c.start) {
|
|
169
|
+
string.position.start.offset += takeable.length;
|
|
170
|
+
string.position.start.column += takeable.length;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
} else {
|
|
174
|
+
i++;
|
|
175
|
+
}
|
|
176
|
+
} else {
|
|
177
|
+
++i;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
function reparseMacroNames(tree, allowedTokens) {
|
|
182
|
+
if (typeof allowedTokens === "string") {
|
|
183
|
+
allowedTokens = new Set(allowedTokens.split(""));
|
|
184
|
+
}
|
|
185
|
+
const _allowedTokens = allowedTokens;
|
|
186
|
+
for (const v of _allowedTokens) {
|
|
187
|
+
if (v.length > 1) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`Only single characters are allowed as \`allowedTokens\` when reparsing macro names, not \`${v}\`.`
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
visit(
|
|
194
|
+
tree,
|
|
195
|
+
(nodes) => {
|
|
196
|
+
reparseMacroNamesInArray(nodes, _allowedTokens);
|
|
197
|
+
},
|
|
198
|
+
{ includeArrays: true, test: Array.isArray }
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// libs/special-regions.ts
|
|
203
|
+
var expl3Find = {
|
|
204
|
+
start: match2.createMacroMatcher(["ExplSyntaxOn"]),
|
|
205
|
+
end: match2.createMacroMatcher(["ExplSyntaxOff"])
|
|
206
|
+
};
|
|
207
|
+
var atLetterFind = {
|
|
208
|
+
start: match2.createMacroMatcher(["makeatletter"]),
|
|
209
|
+
end: match2.createMacroMatcher(["makeatother"])
|
|
210
|
+
};
|
|
211
|
+
function findExpl3AndAtLetterRegionsInArray(tree) {
|
|
212
|
+
const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);
|
|
213
|
+
const atLetter = findRegionInArray(
|
|
214
|
+
tree,
|
|
215
|
+
atLetterFind.start,
|
|
216
|
+
atLetterFind.end
|
|
217
|
+
);
|
|
218
|
+
const regionMap = new Map([
|
|
219
|
+
...expl3.map((x) => [x, "expl"]),
|
|
220
|
+
...atLetter.map((x) => [x, "atLetter"])
|
|
221
|
+
]);
|
|
222
|
+
const all = refineRegions([...expl3, ...atLetter]);
|
|
223
|
+
const ret = {
|
|
224
|
+
explOnly: [],
|
|
225
|
+
atLetterOnly: [],
|
|
226
|
+
both: []
|
|
227
|
+
};
|
|
228
|
+
for (let i = 0; i < all.regions.length; i++) {
|
|
229
|
+
const region = all.regions[i];
|
|
230
|
+
const containedIn = all.regionsContainedIn[i];
|
|
231
|
+
if (containedIn.size === 2) {
|
|
232
|
+
ret.both.push(region);
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
for (const v of containedIn.values()) {
|
|
236
|
+
if (regionMap.get(v) === "expl") {
|
|
237
|
+
ret.explOnly.push(region);
|
|
238
|
+
}
|
|
239
|
+
if (regionMap.get(v) === "atLetter") {
|
|
240
|
+
ret.atLetterOnly.push(region);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);
|
|
245
|
+
ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);
|
|
246
|
+
ret.both = ret.both.filter((r) => r.end - r.start > 1);
|
|
247
|
+
return ret;
|
|
248
|
+
}
|
|
249
|
+
var atLetterSet = /* @__PURE__ */ new Set(["@"]);
|
|
250
|
+
var explSet = /* @__PURE__ */ new Set(["_", ":"]);
|
|
251
|
+
var bothSet = /* @__PURE__ */ new Set(["_", ":", "@"]);
|
|
252
|
+
function reparseExpl3AndAtLetterRegions(tree) {
|
|
253
|
+
visit2(
|
|
254
|
+
tree,
|
|
255
|
+
{
|
|
256
|
+
leave: (nodes) => {
|
|
257
|
+
const regions = findExpl3AndAtLetterRegionsInArray(nodes);
|
|
258
|
+
const totalNumRegions = regions.both.length + regions.atLetterOnly.length + regions.explOnly.length;
|
|
259
|
+
if (totalNumRegions === 0) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
const splits = splitByRegions(nodes, regions);
|
|
263
|
+
const processed = [];
|
|
264
|
+
for (const [key, slice] of splits) {
|
|
265
|
+
switch (key) {
|
|
266
|
+
case null:
|
|
267
|
+
processed.push(...slice);
|
|
268
|
+
continue;
|
|
269
|
+
case "atLetterOnly":
|
|
270
|
+
reparseMacroNames(slice, atLetterSet);
|
|
271
|
+
processed.push(...slice);
|
|
272
|
+
continue;
|
|
273
|
+
case "explOnly":
|
|
274
|
+
reparseMacroNames(slice, explSet);
|
|
275
|
+
processed.push(...slice);
|
|
276
|
+
continue;
|
|
277
|
+
case "both":
|
|
278
|
+
reparseMacroNames(slice, bothSet);
|
|
279
|
+
processed.push(...slice);
|
|
280
|
+
continue;
|
|
281
|
+
default:
|
|
282
|
+
throw new Error(
|
|
283
|
+
`Unexpected case when splitting ${key}`
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
nodes.length = 0;
|
|
288
|
+
nodes.push(...processed);
|
|
289
|
+
return SKIP;
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
{ includeArrays: true, test: Array.isArray }
|
|
293
|
+
);
|
|
294
|
+
}
|
|
295
|
+
export {
|
|
296
|
+
findExpl3AndAtLetterRegionsInArray,
|
|
297
|
+
findRegionInArray,
|
|
298
|
+
hasReparsableMacroNames,
|
|
299
|
+
hasReparsableMacroNamesInArray,
|
|
300
|
+
reparseExpl3AndAtLetterRegions,
|
|
301
|
+
reparseMacroNames,
|
|
302
|
+
reparseMacroNamesInArray
|
|
303
|
+
};
|
|
304
|
+
//# sourceMappingURL=index.js.map
|
package/index.js.map
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../libs/find-region.ts", "../libs/special-regions.ts", "../libs/regions.ts", "../libs/reparse-macro-names.ts"],
|
|
4
|
+
"sourcesContent": ["import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { Region } from \"./regions\";\n\n/**\n * Find all contiguous segments in the array that are between start and end blocks.\n * The `start` and `end` are functions that determine when a region starts and ends.\n */\nexport function findRegionInArray(\n tree: Ast.Node[],\n start: (node: Ast.Node) => boolean,\n end: (node: Ast.Node) => boolean\n): Region[] {\n const ret: Region[] = [];\n let currRegion: Region = { start: undefined as any, end: tree.length };\n for (let i = 0; i < tree.length; i++) {\n const node = tree[i];\n if (start(node)) {\n currRegion.start = i;\n }\n if (end(node)) {\n currRegion.end = i + 1;\n ret.push(currRegion);\n currRegion = { start: undefined as any, end: tree.length };\n }\n }\n\n if (currRegion.start != null) {\n // Regions don't necessarily have to encounter an `end` to end.\n ret.push(currRegion);\n }\n return ret;\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { match } from \"@unified-latex/unified-latex-util-match\";\nimport { findRegionInArray } from \"./find-region\";\nimport { refineRegions, Region, splitByRegions } from \"./regions\";\nimport { SKIP, visit } from \"@unified-latex/unified-latex-util-visit\";\nimport { reparseMacroNames } from \"./reparse-macro-names\";\n\nconst expl3Find = {\n start: match.createMacroMatcher([\"ExplSyntaxOn\"]),\n end: match.createMacroMatcher([\"ExplSyntaxOff\"]),\n};\nconst atLetterFind = {\n start: match.createMacroMatcher([\"makeatletter\"]),\n end: match.createMacroMatcher([\"makeatother\"]),\n};\n\n/**\n * Find regions between `\\ExplSyntaxOn...\\ExplSyntaxOff` and `\\makeatletter...\\makeatother`.\n * Returns an object containing regions where one or both syntax's apply.\n */\nexport function findExpl3AndAtLetterRegionsInArray(tree: Ast.Node[]): {\n explOnly: Region[];\n atLetterOnly: Region[];\n both: Region[];\n} {\n const expl3 = findRegionInArray(tree, expl3Find.start, expl3Find.end);\n const atLetter = findRegionInArray(\n tree,\n atLetterFind.start,\n atLetterFind.end\n );\n\n const regionMap = new Map([\n ...(expl3.map((x) => [x, \"expl\"]) as [Region, \"expl\"][]),\n ...(atLetter.map((x) => [x, \"atLetter\"]) as [Region, \"atLetter\"][]),\n ]);\n const all = refineRegions([...expl3, ...atLetter]);\n\n const ret = {\n explOnly: [] as Region[],\n atLetterOnly: [] as Region[],\n both: [] as Region[],\n };\n\n for (let i = 0; i < all.regions.length; i++) {\n const region = all.regions[i];\n const containedIn = all.regionsContainedIn[i];\n if (containedIn.size === 2) {\n ret.both.push(region);\n continue;\n }\n for (const v of containedIn.values()) {\n if (regionMap.get(v) === \"expl\") {\n ret.explOnly.push(region);\n }\n if (regionMap.get(v) === \"atLetter\") {\n ret.atLetterOnly.push(region);\n }\n }\n }\n\n // Regions of size 1 only contain the starting/stopping macro, so they should be discarded\n ret.explOnly = ret.explOnly.filter((r) => r.end - r.start > 1);\n ret.atLetterOnly = ret.atLetterOnly.filter((r) => r.end - r.start > 1);\n ret.both = ret.both.filter((r) => r.end - r.start > 1);\n\n return ret;\n}\n\nconst atLetterSet = new Set([\"@\"]);\nconst explSet = new Set([\"_\", \":\"]);\nconst bothSet = new Set([\"_\", \":\", \"@\"]);\n\n/**\n * Find regions between `\\ExplSyntaxOn...\\ExplSyntaxOff` and `\\makeatletter...\\makeatother`\n * and reparse their contents so that the relevant characters (e.g., `@`, `_`, and `:`) become\n * part of the macro names.\n */\nexport function reparseExpl3AndAtLetterRegions(tree: Ast.Ast) {\n visit(\n tree,\n {\n leave: (nodes) => {\n const regions = findExpl3AndAtLetterRegionsInArray(nodes);\n // In all likelihood, we don't need to do any reparsing, so bail early here\n const totalNumRegions =\n regions.both.length +\n regions.atLetterOnly.length +\n regions.explOnly.length;\n if (totalNumRegions === 0) {\n return;\n }\n\n const splits = splitByRegions(nodes, regions);\n const processed: typeof nodes = [];\n for (const [key, slice] of splits) {\n switch (key) {\n case null:\n processed.push(...slice);\n continue;\n case \"atLetterOnly\":\n reparseMacroNames(slice, atLetterSet);\n processed.push(...slice);\n continue;\n case \"explOnly\":\n reparseMacroNames(slice, explSet);\n processed.push(...slice);\n continue;\n case \"both\":\n reparseMacroNames(slice, bothSet);\n processed.push(...slice);\n continue;\n default:\n throw new Error(\n `Unexpected case when splitting ${key}`\n );\n }\n }\n\n nodes.length = 0;\n nodes.push(...processed);\n return SKIP;\n },\n },\n { includeArrays: true, test: Array.isArray }\n );\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\n\nexport type Region = { start: number; end: number };\n\n/**\n * Given `regions`, a list of `Region`s (not necessarily ordered, possibly overlapping), return a list of in-order,\n * non-overlapping regions and a corresponding list containing a set of the original `Region`s that the new region\n * is a subset of.\n */\nexport function refineRegions(regions: Region[]): {\n regions: Region[];\n regionsContainedIn: Set<Region>[];\n} {\n const _regions = [...regions];\n _regions.sort((a, b) => a.start - b.start);\n const cutPointsSet = new Set(_regions.flatMap((r) => [r.start, r.end]));\n const cutPoints = Array.from(cutPointsSet);\n cutPoints.sort((a, b) => a - b);\n\n const retRegions: Region[] = [];\n const retRegionsContainedIn: Set<Region>[] = [];\n\n // We will be checking what regions we are completely contained in.\n // Because `_regions` is sorted by start, `seekIndex` will be incremented\n // by end, so that we don't do too much array testing.\n let seekIndex = 0;\n for (let i = 0; i < cutPoints.length - 1; i++) {\n const start = cutPoints[i];\n const end = cutPoints[i + 1];\n const region = { start, end };\n const regionContainedIn: Set<Region> = new Set();\n\n let encounteredEndPastStart = false;\n for (let j = seekIndex; j < _regions.length; j++) {\n const superRegion = _regions[j];\n if (superRegion.end >= region.start) {\n encounteredEndPastStart = true;\n }\n if (!encounteredEndPastStart && superRegion.end < region.start) {\n // In this case, the region (and all regions that came before)\n // end before the region we are testing, so we may safely skip past it\n // from here on out.\n seekIndex = j + 1;\n continue;\n }\n\n if (superRegion.start > end) {\n // Because `_regions` is sorted, we can stop here\n break;\n }\n if (\n superRegion.start <= region.start &&\n superRegion.end >= region.end\n ) {\n encounteredEndPastStart = true;\n regionContainedIn.add(superRegion);\n }\n }\n\n if (regionContainedIn.size > 0) {\n // We only count if we are contained in a subregion\n retRegions.push(region);\n retRegionsContainedIn.push(regionContainedIn);\n }\n }\n\n return { regions: retRegions, regionsContainedIn: retRegionsContainedIn };\n}\n\n/**\n * Split an array up into the disjoint regions specified by `regionRecord`.\n * Returned is a list of tuples, the first item being the key of `regionRecord` if there\n * was a corresponding region, or `null` if there was no corresponding region.\n *\n * This function assumes that the regions in `regionRecord` are disjoint and fully contained\n * within the bounds of `array`.\n */\nexport function splitByRegions<\n T extends unknown,\n RegionRecord extends Record<string, Region[]>\n>(array: T[], regionsRecord: RegionRecord) {\n const ret: [keyof RegionRecord | null, T[]][] = [];\n\n const indices = [0, array.length];\n const reverseMap: Record<string, keyof RegionRecord> = {};\n for (const [key, records] of Object.entries(regionsRecord)) {\n indices.push(\n ...records.flatMap((r) => {\n reverseMap[\"\" + [r.start, r.end]] = key;\n return [r.start, r.end];\n })\n );\n }\n indices.sort((a, b) => a - b);\n\n for (let i = 0; i < indices.length - 1; i++) {\n const start = indices[i];\n const end = indices[i + 1];\n if (start === end) {\n continue;\n }\n const regionKey = reverseMap[\"\" + [start, end]];\n\n ret.push([regionKey || null, array.slice(start, end)]);\n }\n\n return ret;\n}\n", "import * as Ast from \"@unified-latex/unified-latex-types\";\nimport { match } from \"@unified-latex/unified-latex-util-match\";\nimport { EXIT, visit } from \"@unified-latex/unified-latex-util-visit\";\n\n/**\n * Escape a string so that it can be used to build a regular expression.\n *\n * From: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions\n */\nfunction escapeRegExp(str: string) {\n return str.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\"); // $& means the whole matched string\n}\n\n/**\n * Build a regular expression that matches everything up to the first non-allowed symbol.\n */\nfunction buildWordRegex(allowedSet: Set<string>): RegExp {\n // /\\p{L}/ matches all letters, including unicode letters. We join this with\n // everything allowed in our set to form a regexp like\n // /(\\p{L}|_|:)*/u\n // The `u` at the end allows unicode characters to be matched.\n const regexpStr = `^(${[\"\\\\p{L}\"]\n .concat(Array.from(allowedSet).map(escapeRegExp))\n .join(\"|\")})*`;\n return new RegExp(regexpStr, \"u\");\n}\n\n/**\n * Checks whether the array has a macro that could be reparsed given the `allowedTokens` but\n * do not do any reparsing. This function can be used in auto-detection schemes to determine if\n * macro names should actually be reparsed.\n */\nexport function hasReparsableMacroNamesInArray(\n tree: Ast.Node[],\n allowedTokens: Set<string>\n): boolean {\n for (let i = 0; i < tree.length; i++) {\n const macro = tree[i];\n const string = tree[i + 1];\n if (match.anyMacro(macro) && match.anyString(string)) {\n // There are two options. Either the macro ends with the special character,\n // e.g. `\\@foo` or the special character starts the next string, e.g. `\\foo@`.\n if (\n allowedTokens.has(\n macro.content.charAt(macro.content.length - 1)\n ) ||\n allowedTokens.has(string.content.charAt(0))\n ) {\n return true;\n }\n }\n }\n return false;\n}\n\n/**\n * Checks whether `tree` has a macro that could be reparsed given the `allowedTokens` but\n * do not do any reparsing. This function can be used in auto-detection schemes to determine if\n * macro names should actually be reparsed.\n */\nexport function hasReparsableMacroNames(\n tree: Ast.Ast,\n allowedTokens: string | Set<string>\n): boolean {\n if (typeof allowedTokens === \"string\") {\n allowedTokens = new Set(allowedTokens.split(\"\"));\n }\n // Recast so typescript doesn't complain\n const _allowedTokens = allowedTokens;\n for (const v of _allowedTokens) {\n if (v.length > 1) {\n throw new Error(\n `Only single characters are allowed as \\`allowedTokens\\` when reparsing macro names, not \\`${v}\\`.`\n );\n }\n }\n\n let ret = false;\n visit(\n tree,\n (nodes) => {\n if (hasReparsableMacroNamesInArray(nodes, _allowedTokens)) {\n ret = true;\n return EXIT;\n }\n },\n { includeArrays: true, test: Array.isArray }\n );\n return ret;\n}\n\n/**\n * Reparses all macro names in the array so that they may optionally include characters listed in `allowedTokens`.\n * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though\n * `_` is normally stops the parsing for a macro name).\n */\nexport function reparseMacroNamesInArray(\n tree: Ast.Node[],\n allowedTokens: Set<string>\n) {\n const regex = buildWordRegex(allowedTokens);\n let i = 0;\n while (i < tree.length) {\n const macro = tree[i];\n const string = tree[i + 1];\n if (\n match.anyMacro(macro) &&\n // The _^ macros in math mode should not be extended no-matter what;\n // So we check to make sure that the macro we're dealing with has the default escape token.\n (macro.escapeToken == null || macro.escapeToken === \"\\\\\") &&\n match.anyString(string) &&\n // There are two options. Either the macro ends with the special character,\n // e.g. `\\@foo` or the special character starts the next string, e.g. `\\foo@`.\n (allowedTokens.has(\n macro.content.charAt(macro.content.length - 1)\n ) ||\n allowedTokens.has(string.content.charAt(0)))\n ) {\n // There might be a number somewhere in the string. If so, we should\n // break the string apart at that number\n const match = string.content.match(regex);\n const takeable = match ? match[0] : \"\";\n if (takeable.length > 0) {\n if (takeable.length === string.content.length) {\n // The whole string can be appended to the macro name\n macro.content += string.content;\n tree.splice(i + 1, 1);\n\n // Preserve the source location if available\n if (macro.position && string.position?.end) {\n macro.position.end = string.position.end;\n }\n } else {\n // Only part of the string can be appended to the macro name\n macro.content += takeable;\n string.content = string.content.slice(takeable.length);\n\n // Preserve the source location if available\n if (macro.position?.end) {\n macro.position.end.offset += takeable.length;\n macro.position.end.column += takeable.length;\n }\n if (string.position?.start) {\n string.position.start.offset += takeable.length;\n string.position.start.column += takeable.length;\n }\n }\n } else {\n i++;\n }\n } else {\n ++i;\n }\n }\n}\n\n/**\n * Reparses all macro names so that they may optionally include characters listed in `allowedTokens`.\n * This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though\n * `_` is normally stops the parsing for a macro name). Thus, a macro `\\foo_bar:Nn` would be parsed as having\n * the name `foo_bar:Nn` rather than as `foo` followed by the strings `_`, `bar`, `:`, `Nn`.\n */\nexport function reparseMacroNames(\n tree: Ast.Ast,\n allowedTokens: string | Set<string>\n) {\n if (typeof allowedTokens === \"string\") {\n allowedTokens = new Set(allowedTokens.split(\"\"));\n }\n // Recast so typescript doesn't complain\n const _allowedTokens = allowedTokens;\n for (const v of _allowedTokens) {\n if (v.length > 1) {\n throw new Error(\n `Only single characters are allowed as \\`allowedTokens\\` when reparsing macro names, not \\`${v}\\`.`\n );\n }\n }\n\n visit(\n tree,\n (nodes) => {\n reparseMacroNamesInArray(nodes, _allowedTokens);\n },\n { includeArrays: true, test: Array.isArray }\n );\n}\n"],
|
|
5
|
+
"mappings": ";AAOO,SAAS,kBACZ,MACA,OACA,KACQ;AACR,QAAM,MAAgB,CAAC;AACvB,MAAI,aAAqB,EAAE,OAAO,QAAkB,KAAK,KAAK,OAAO;AACrE,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,UAAM,OAAO,KAAK;AAClB,QAAI,MAAM,IAAI,GAAG;AACb,iBAAW,QAAQ;AAAA,IACvB;AACA,QAAI,IAAI,IAAI,GAAG;AACX,iBAAW,MAAM,IAAI;AACrB,UAAI,KAAK,UAAU;AACnB,mBAAa,EAAE,OAAO,QAAkB,KAAK,KAAK,OAAO;AAAA,IAC7D;AAAA,EACJ;AAEA,MAAI,WAAW,SAAS,MAAM;AAE1B,QAAI,KAAK,UAAU;AAAA,EACvB;AACA,SAAO;AACX;;;AC9BA,SAAS,SAAAA,cAAa;;;ACQf,SAAS,cAAc,SAG5B;AACE,QAAM,WAAW,CAAC,GAAG,OAAO;AAC5B,WAAS,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACzC,QAAM,eAAe,IAAI,IAAI,SAAS,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;AACtE,QAAM,YAAY,MAAM,KAAK,YAAY;AACzC,YAAU,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE9B,QAAM,aAAuB,CAAC;AAC9B,QAAM,wBAAuC,CAAC;AAK9C,MAAI,YAAY;AAChB,WAAS,IAAI,GAAG,IAAI,UAAU,SAAS,GAAG,KAAK;AAC3C,UAAM,QAAQ,UAAU;AACxB,UAAM,MAAM,UAAU,IAAI;AAC1B,UAAM,SAAS,EAAE,OAAO,IAAI;AAC5B,UAAM,oBAAiC,oBAAI,IAAI;AAE/C,QAAI,0BAA0B;AAC9B,aAAS,IAAI,WAAW,IAAI,SAAS,QAAQ,KAAK;AAC9C,YAAM,cAAc,SAAS;AAC7B,UAAI,YAAY,OAAO,OAAO,OAAO;AACjC,kCAA0B;AAAA,MAC9B;AACA,UAAI,CAAC,2BAA2B,YAAY,MAAM,OAAO,OAAO;AAI5D,oBAAY,IAAI;AAChB;AAAA,MACJ;AAEA,UAAI,YAAY,QAAQ,KAAK;AAEzB;AAAA,MACJ;AACA,UACI,YAAY,SAAS,OAAO,SAC5B,YAAY,OAAO,OAAO,KAC5B;AACE,kCAA0B;AAC1B,0BAAkB,IAAI,WAAW;AAAA,MACrC;AAAA,IACJ;AAEA,QAAI,kBAAkB,OAAO,GAAG;AAE5B,iBAAW,KAAK,MAAM;AACtB,4BAAsB,KAAK,iBAAiB;AAAA,IAChD;AAAA,EACJ;AAEA,SAAO,EAAE,SAAS,YAAY,oBAAoB,sBAAsB;AAC5E;AAUO,SAAS,eAGd,OAAY,eAA6B;AACvC,QAAM,MAA0C,CAAC;AAEjD,QAAM,UAAU,CAAC,GAAG,MAAM,MAAM;AAChC,QAAM,aAAiD,CAAC;AACxD,aAAW,CAAC,KAAK,OAAO,KAAK,OAAO,QAAQ,aAAa,GAAG;AACxD,YAAQ;AAAA,MACJ,GAAG,QAAQ,QAAQ,CAAC,MAAM;AACtB,mBAAW,KAAK,CAAC,EAAE,OAAO,EAAE,GAAG,KAAK;AACpC,eAAO,CAAC,EAAE,OAAO,EAAE,GAAG;AAAA,MAC1B,CAAC;AAAA,IACL;AAAA,EACJ;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE5B,WAAS,IAAI,GAAG,IAAI,QAAQ,SAAS,GAAG,KAAK;AACzC,UAAM,QAAQ,QAAQ;AACtB,UAAM,MAAM,QAAQ,IAAI;AACxB,QAAI,UAAU,KAAK;AACf;AAAA,IACJ;AACA,UAAM,YAAY,WAAW,KAAK,CAAC,OAAO,GAAG;AAE7C,QAAI,KAAK,CAAC,aAAa,MAAM,MAAM,MAAM,OAAO,GAAG,CAAC,CAAC;AAAA,EACzD;AAEA,SAAO;AACX;;;ADvGA,SAAS,MAAM,SAAAC,cAAa;;;AEH5B,SAAS,aAAa;AACtB,SAAS,MAAM,aAAa;AAO5B,SAAS,aAAa,KAAa;AAC/B,SAAO,IAAI,QAAQ,uBAAuB,MAAM;AACpD;AAKA,SAAS,eAAe,YAAiC;AAKrD,QAAM,YAAY,KAAK,CAAC,QAAQ,EAC3B,OAAO,MAAM,KAAK,UAAU,EAAE,IAAI,YAAY,CAAC,EAC/C,KAAK,GAAG;AACb,SAAO,IAAI,OAAO,WAAW,GAAG;AACpC;AAOO,SAAS,+BACZ,MACA,eACO;AACP,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,UAAM,QAAQ,KAAK;AACnB,UAAM,SAAS,KAAK,IAAI;AACxB,QAAI,MAAM,SAAS,KAAK,KAAK,MAAM,UAAU,MAAM,GAAG;AAGlD,UACI,cAAc;AAAA,QACV,MAAM,QAAQ,OAAO,MAAM,QAAQ,SAAS,CAAC;AAAA,MACjD,KACA,cAAc,IAAI,OAAO,QAAQ,OAAO,CAAC,CAAC,GAC5C;AACE,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,EACJ;AACA,SAAO;AACX;AAOO,SAAS,wBACZ,MACA,eACO;AACP,MAAI,OAAO,kBAAkB,UAAU;AACnC,oBAAgB,IAAI,IAAI,cAAc,MAAM,EAAE,CAAC;AAAA,EACnD;AAEA,QAAM,iBAAiB;AACvB,aAAW,KAAK,gBAAgB;AAC5B,QAAI,EAAE,SAAS,GAAG;AACd,YAAM,IAAI;AAAA,QACN,6FAA6F;AAAA,MACjG;AAAA,IACJ;AAAA,EACJ;AAEA,MAAI,MAAM;AACV;AAAA,IACI;AAAA,IACA,CAAC,UAAU;AACP,UAAI,+BAA+B,OAAO,cAAc,GAAG;AACvD,cAAM;AACN,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACA,SAAO;AACX;AAOO,SAAS,yBACZ,MACA,eACF;AAnGF;AAoGI,QAAM,QAAQ,eAAe,aAAa;AAC1C,MAAI,IAAI;AACR,SAAO,IAAI,KAAK,QAAQ;AACpB,UAAM,QAAQ,KAAK;AACnB,UAAM,SAAS,KAAK,IAAI;AACxB,QACI,MAAM,SAAS,KAAK,MAGnB,MAAM,eAAe,QAAQ,MAAM,gBAAgB,SACpD,MAAM,UAAU,MAAM,MAGrB,cAAc;AAAA,MACX,MAAM,QAAQ,OAAO,MAAM,QAAQ,SAAS,CAAC;AAAA,IACjD,KACI,cAAc,IAAI,OAAO,QAAQ,OAAO,CAAC,CAAC,IAChD;AAGE,YAAMC,SAAQ,OAAO,QAAQ,MAAM,KAAK;AACxC,YAAM,WAAWA,SAAQA,OAAM,KAAK;AACpC,UAAI,SAAS,SAAS,GAAG;AACrB,YAAI,SAAS,WAAW,OAAO,QAAQ,QAAQ;AAE3C,gBAAM,WAAW,OAAO;AACxB,eAAK,OAAO,IAAI,GAAG,CAAC;AAGpB,cAAI,MAAM,cAAY,YAAO,aAAP,mBAAiB,MAAK;AACxC,kBAAM,SAAS,MAAM,OAAO,SAAS;AAAA,UACzC;AAAA,QACJ,OAAO;AAEH,gBAAM,WAAW;AACjB,iBAAO,UAAU,OAAO,QAAQ,MAAM,SAAS,MAAM;AAGrD,eAAI,WAAM,aAAN,mBAAgB,KAAK;AACrB,kBAAM,SAAS,IAAI,UAAU,SAAS;AACtC,kBAAM,SAAS,IAAI,UAAU,SAAS;AAAA,UAC1C;AACA,eAAI,YAAO,aAAP,mBAAiB,OAAO;AACxB,mBAAO,SAAS,MAAM,UAAU,SAAS;AACzC,mBAAO,SAAS,MAAM,UAAU,SAAS;AAAA,UAC7C;AAAA,QACJ;AAAA,MACJ,OAAO;AACH;AAAA,MACJ;AAAA,IACJ,OAAO;AACH,QAAE;AAAA,IACN;AAAA,EACJ;AACJ;AAQO,SAAS,kBACZ,MACA,eACF;AACE,MAAI,OAAO,kBAAkB,UAAU;AACnC,oBAAgB,IAAI,IAAI,cAAc,MAAM,EAAE,CAAC;AAAA,EACnD;AAEA,QAAM,iBAAiB;AACvB,aAAW,KAAK,gBAAgB;AAC5B,QAAI,EAAE,SAAS,GAAG;AACd,YAAM,IAAI;AAAA,QACN,6FAA6F;AAAA,MACjG;AAAA,IACJ;AAAA,EACJ;AAEA;AAAA,IACI;AAAA,IACA,CAAC,UAAU;AACP,+BAAyB,OAAO,cAAc;AAAA,IAClD;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACJ;;;AFnLA,IAAM,YAAY;AAAA,EACd,OAAOC,OAAM,mBAAmB,CAAC,cAAc,CAAC;AAAA,EAChD,KAAKA,OAAM,mBAAmB,CAAC,eAAe,CAAC;AACnD;AACA,IAAM,eAAe;AAAA,EACjB,OAAOA,OAAM,mBAAmB,CAAC,cAAc,CAAC;AAAA,EAChD,KAAKA,OAAM,mBAAmB,CAAC,aAAa,CAAC;AACjD;AAMO,SAAS,mCAAmC,MAIjD;AACE,QAAM,QAAQ,kBAAkB,MAAM,UAAU,OAAO,UAAU,GAAG;AACpE,QAAM,WAAW;AAAA,IACb;AAAA,IACA,aAAa;AAAA,IACb,aAAa;AAAA,EACjB;AAEA,QAAM,YAAY,IAAI,IAAI;AAAA,IACtB,GAAI,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;AAAA,IAChC,GAAI,SAAS,IAAI,CAAC,MAAM,CAAC,GAAG,UAAU,CAAC;AAAA,EAC3C,CAAC;AACD,QAAM,MAAM,cAAc,CAAC,GAAG,OAAO,GAAG,QAAQ,CAAC;AAEjD,QAAM,MAAM;AAAA,IACR,UAAU,CAAC;AAAA,IACX,cAAc,CAAC;AAAA,IACf,MAAM,CAAC;AAAA,EACX;AAEA,WAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AACzC,UAAM,SAAS,IAAI,QAAQ;AAC3B,UAAM,cAAc,IAAI,mBAAmB;AAC3C,QAAI,YAAY,SAAS,GAAG;AACxB,UAAI,KAAK,KAAK,MAAM;AACpB;AAAA,IACJ;AACA,eAAW,KAAK,YAAY,OAAO,GAAG;AAClC,UAAI,UAAU,IAAI,CAAC,MAAM,QAAQ;AAC7B,YAAI,SAAS,KAAK,MAAM;AAAA,MAC5B;AACA,UAAI,UAAU,IAAI,CAAC,MAAM,YAAY;AACjC,YAAI,aAAa,KAAK,MAAM;AAAA,MAChC;AAAA,IACJ;AAAA,EACJ;AAGA,MAAI,WAAW,IAAI,SAAS,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AAC7D,MAAI,eAAe,IAAI,aAAa,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AACrE,MAAI,OAAO,IAAI,KAAK,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;AAErD,SAAO;AACX;AAEA,IAAM,cAAc,oBAAI,IAAI,CAAC,GAAG,CAAC;AACjC,IAAM,UAAU,oBAAI,IAAI,CAAC,KAAK,GAAG,CAAC;AAClC,IAAM,UAAU,oBAAI,IAAI,CAAC,KAAK,KAAK,GAAG,CAAC;AAOhC,SAAS,+BAA+B,MAAe;AAC1D,EAAAC;AAAA,IACI;AAAA,IACA;AAAA,MACI,OAAO,CAAC,UAAU;AACd,cAAM,UAAU,mCAAmC,KAAK;AAExD,cAAM,kBACF,QAAQ,KAAK,SACb,QAAQ,aAAa,SACrB,QAAQ,SAAS;AACrB,YAAI,oBAAoB,GAAG;AACvB;AAAA,QACJ;AAEA,cAAM,SAAS,eAAe,OAAO,OAAO;AAC5C,cAAM,YAA0B,CAAC;AACjC,mBAAW,CAAC,KAAK,KAAK,KAAK,QAAQ;AAC/B,kBAAQ,KAAK;AAAA,YACT,KAAK;AACD,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,WAAW;AACpC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,OAAO;AAChC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ,KAAK;AACD,gCAAkB,OAAO,OAAO;AAChC,wBAAU,KAAK,GAAG,KAAK;AACvB;AAAA,YACJ;AACI,oBAAM,IAAI;AAAA,gBACN,kCAAkC;AAAA,cACtC;AAAA,UACR;AAAA,QACJ;AAEA,cAAM,SAAS;AACf,cAAM,KAAK,GAAG,SAAS;AACvB,eAAO;AAAA,MACX;AAAA,IACJ;AAAA,IACA,EAAE,eAAe,MAAM,MAAM,MAAM,QAAQ;AAAA,EAC/C;AACJ;",
|
|
6
|
+
"names": ["match", "visit", "match", "match", "visit"]
|
|
7
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import * as Ast from "@unified-latex/unified-latex-types";
|
|
2
|
+
import { Region } from "./regions";
|
|
3
|
+
/**
|
|
4
|
+
* Find all contiguous segments in the array that are between start and end blocks.
|
|
5
|
+
* The `start` and `end` are functions that determine when a region starts and ends.
|
|
6
|
+
*/
|
|
7
|
+
export declare function findRegionInArray(tree: Ast.Node[], start: (node: Ast.Node) => boolean, end: (node: Ast.Node) => boolean): Region[];
|
|
8
|
+
//# sourceMappingURL=find-region.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"find-region.d.ts","sourceRoot":"","sources":["../../libs/find-region.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,GAAG,MAAM,oCAAoC,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAEnC;;;GAGG;AACH,wBAAgB,iBAAiB,CAC7B,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,EAChB,KAAK,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,KAAK,OAAO,EAClC,GAAG,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,KAAK,OAAO,GACjC,MAAM,EAAE,CAoBV"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export declare type Region = {
|
|
2
|
+
start: number;
|
|
3
|
+
end: number;
|
|
4
|
+
};
|
|
5
|
+
/**
|
|
6
|
+
* Given `regions`, a list of `Region`s (not necessarily ordered, possibly overlapping), return a list of in-order,
|
|
7
|
+
* non-overlapping regions and a corresponding list containing a set of the original `Region`s that the new region
|
|
8
|
+
* is a subset of.
|
|
9
|
+
*/
|
|
10
|
+
export declare function refineRegions(regions: Region[]): {
|
|
11
|
+
regions: Region[];
|
|
12
|
+
regionsContainedIn: Set<Region>[];
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Split an array up into the disjoint regions specified by `regionRecord`.
|
|
16
|
+
* Returned is a list of tuples, the first item being the key of `regionRecord` if there
|
|
17
|
+
* was a corresponding region, or `null` if there was no corresponding region.
|
|
18
|
+
*
|
|
19
|
+
* This function assumes that the regions in `regionRecord` are disjoint and fully contained
|
|
20
|
+
* within the bounds of `array`.
|
|
21
|
+
*/
|
|
22
|
+
export declare function splitByRegions<T extends unknown, RegionRecord extends Record<string, Region[]>>(array: T[], regionsRecord: RegionRecord): [keyof RegionRecord | null, T[]][];
|
|
23
|
+
//# sourceMappingURL=regions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regions.d.ts","sourceRoot":"","sources":["../../libs/regions.ts"],"names":[],"mappings":"AAEA,oBAAY,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC;AAEpD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG;IAC9C,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;CACrC,CAuDA;AAED;;;;;;;GAOG;AACH,wBAAgB,cAAc,CAC1B,CAAC,SAAS,OAAO,EACjB,YAAY,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,EAC/C,KAAK,EAAE,CAAC,EAAE,EAAE,aAAa,EAAE,YAAY,sCA2BxC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import * as Ast from "@unified-latex/unified-latex-types";
|
|
2
|
+
/**
|
|
3
|
+
* Checks whether the array has a macro that could be reparsed given the `allowedTokens` but
|
|
4
|
+
* do not do any reparsing. This function can be used in auto-detection schemes to determine if
|
|
5
|
+
* macro names should actually be reparsed.
|
|
6
|
+
*/
|
|
7
|
+
export declare function hasReparsableMacroNamesInArray(tree: Ast.Node[], allowedTokens: Set<string>): boolean;
|
|
8
|
+
/**
|
|
9
|
+
* Checks whether `tree` has a macro that could be reparsed given the `allowedTokens` but
|
|
10
|
+
* do not do any reparsing. This function can be used in auto-detection schemes to determine if
|
|
11
|
+
* macro names should actually be reparsed.
|
|
12
|
+
*/
|
|
13
|
+
export declare function hasReparsableMacroNames(tree: Ast.Ast, allowedTokens: string | Set<string>): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Reparses all macro names in the array so that they may optionally include characters listed in `allowedTokens`.
|
|
16
|
+
* This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
|
|
17
|
+
* `_` is normally stops the parsing for a macro name).
|
|
18
|
+
*/
|
|
19
|
+
export declare function reparseMacroNamesInArray(tree: Ast.Node[], allowedTokens: Set<string>): void;
|
|
20
|
+
/**
|
|
21
|
+
* Reparses all macro names so that they may optionally include characters listed in `allowedTokens`.
|
|
22
|
+
* This is used, for example, when parsing expl3 syntax which allows `_` to be used in a macro name (even though
|
|
23
|
+
* `_` is normally stops the parsing for a macro name). Thus, a macro `\foo_bar:Nn` would be parsed as having
|
|
24
|
+
* the name `foo_bar:Nn` rather than as `foo` followed by the strings `_`, `bar`, `:`, `Nn`.
|
|
25
|
+
*/
|
|
26
|
+
export declare function reparseMacroNames(tree: Ast.Ast, allowedTokens: string | Set<string>): void;
|
|
27
|
+
//# sourceMappingURL=reparse-macro-names.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reparse-macro-names.d.ts","sourceRoot":"","sources":["../../libs/reparse-macro-names.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,GAAG,MAAM,oCAAoC,CAAC;AA2B1D;;;;GAIG;AACH,wBAAgB,8BAA8B,CAC1C,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,EAChB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,GAC3B,OAAO,CAkBT;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CACnC,IAAI,EAAE,GAAG,CAAC,GAAG,EACb,aAAa,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GACpC,OAAO,CA0BT;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CACpC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,EAChB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,QAwD7B;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAC7B,IAAI,EAAE,GAAG,CAAC,GAAG,EACb,aAAa,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,QAsBtC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import * as Ast from "@unified-latex/unified-latex-types";
|
|
2
|
+
import { Region } from "./regions";
|
|
3
|
+
/**
|
|
4
|
+
* Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`.
|
|
5
|
+
* Returns an object containing regions where one or both syntax's apply.
|
|
6
|
+
*/
|
|
7
|
+
export declare function findExpl3AndAtLetterRegionsInArray(tree: Ast.Node[]): {
|
|
8
|
+
explOnly: Region[];
|
|
9
|
+
atLetterOnly: Region[];
|
|
10
|
+
both: Region[];
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Find regions between `\ExplSyntaxOn...\ExplSyntaxOff` and `\makeatletter...\makeatother`
|
|
14
|
+
* and reparse their contents so that the relevant characters (e.g., `@`, `_`, and `:`) become
|
|
15
|
+
* part of the macro names.
|
|
16
|
+
*/
|
|
17
|
+
export declare function reparseExpl3AndAtLetterRegions(tree: Ast.Ast): void;
|
|
18
|
+
//# sourceMappingURL=special-regions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"special-regions.d.ts","sourceRoot":"","sources":["../../libs/special-regions.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,GAAG,MAAM,oCAAoC,CAAC;AAG1D,OAAO,EAAiB,MAAM,EAAkB,MAAM,WAAW,CAAC;AAalE;;;GAGG;AACH,wBAAgB,kCAAkC,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG;IAClE,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,EAAE,CAAC;CAClB,CA2CA;AAMD;;;;GAIG;AACH,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,QAgD3D"}
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@unified-latex/unified-latex-util-catcode",
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "Tools for manipulating unified-latex ASTs",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"dependencies": {
|
|
8
|
+
"@unified-latex/unified-latex-types": "^1.1.0",
|
|
9
|
+
"@unified-latex/unified-latex-util-match": "^1.1.0",
|
|
10
|
+
"@unified-latex/unified-latex-util-visit": "^1.1.0"
|
|
11
|
+
},
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/siefkenj/unified-latex.git"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"pegjs",
|
|
18
|
+
"latex",
|
|
19
|
+
"parser",
|
|
20
|
+
"prettier",
|
|
21
|
+
"unified-latex",
|
|
22
|
+
"unified"
|
|
23
|
+
],
|
|
24
|
+
"author": "Jason Siefken",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"bugs": {
|
|
27
|
+
"url": "https://github.com/siefkenj/unified-latex/issues"
|
|
28
|
+
},
|
|
29
|
+
"homepage": "https://github.com/siefkenj/unified-latex#readme",
|
|
30
|
+
"exports": {
|
|
31
|
+
".": {
|
|
32
|
+
"import": "./index.js",
|
|
33
|
+
"require": "./index.cjs"
|
|
34
|
+
},
|
|
35
|
+
"./*js": "./*js",
|
|
36
|
+
"./*": {
|
|
37
|
+
"import": "./*/index.js",
|
|
38
|
+
"require": "./*/index.cjs",
|
|
39
|
+
"types": "./*/index.d.ts"
|
|
40
|
+
},
|
|
41
|
+
"./*/index": {
|
|
42
|
+
"import": "./*/index.js",
|
|
43
|
+
"require": "./*/index.cjs"
|
|
44
|
+
}
|
|
45
|
+
},
|
|
46
|
+
"files": [
|
|
47
|
+
"**/*ts",
|
|
48
|
+
"**/*js",
|
|
49
|
+
"**/*.map",
|
|
50
|
+
"**/*.json"
|
|
51
|
+
]
|
|
52
|
+
}
|