tex2typst 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +282 -0
- package/dist/index.js +209 -264
- package/dist/tex2typst.min.js +10 -11
- package/package.json +1 -1
- package/src/convert.ts +7 -22
- package/src/index.ts +3 -1
- package/src/lex.ts +189 -0
- package/src/map.ts +0 -2
- package/src/tex-semantic-analysis.ts +39 -37
- package/src/tex-tokenizer.ts +30 -35
- package/src/typst-semantic-analyais.ts +38 -0
- package/src/typst-tokenizer.ts +44 -40
- package/src/typst-types.ts +55 -0
- package/dist/parser.js +0 -23
- package/src/jslex.ts +0 -311
package/package.json
CHANGED
package/src/convert.ts
CHANGED
|
@@ -87,9 +87,6 @@ function tex_token_to_typst(token: TexToken, options: Tex2TypstOptions): TypstTo
|
|
|
87
87
|
if (token.value === '\\\\') {
|
|
88
88
|
// \\ -> \
|
|
89
89
|
return new TypstToken(TypstTokenType.CONTROL, '\\');
|
|
90
|
-
} else if (token.value === '\\!') {
|
|
91
|
-
// \! -> #h(-math.thin.amount)
|
|
92
|
-
return new TypstToken(TypstTokenType.SYMBOL, '#h(-math.thin.amount)');
|
|
93
90
|
} else if (token.value === '~') {
|
|
94
91
|
// ~ -> space.nobreak
|
|
95
92
|
const typst_symbol = symbolMap.get('~')!;
|
|
@@ -215,6 +212,13 @@ export function convert_tex_node_to_typst(abstractNode: TexNode, options: Tex2Ty
|
|
|
215
212
|
switch (abstractNode.type) {
|
|
216
213
|
case 'terminal': {
|
|
217
214
|
const node = abstractNode as TexTerminal;
|
|
215
|
+
// \! -> #h(-math.thin.amount)
|
|
216
|
+
if (node.head.eq(new TexToken(TexTokenType.CONTROL, '\\!'))) {
|
|
217
|
+
return new TypstFuncCall(
|
|
218
|
+
new TypstToken(TypstTokenType.SYMBOL, '#h'),
|
|
219
|
+
[new TypstToken(TypstTokenType.LITERAL, '-math.thin.amount').toNode()]
|
|
220
|
+
);
|
|
221
|
+
}
|
|
218
222
|
return tex_token_to_typst(node.head, options).toNode();
|
|
219
223
|
}
|
|
220
224
|
case 'text': {
|
|
@@ -752,25 +756,6 @@ export function convert_typst_node_to_tex(abstractNode: TypstNode, options: Typs
|
|
|
752
756
|
case 'terminal': {
|
|
753
757
|
const node = abstractNode as TypstTerminal;
|
|
754
758
|
if (node.head.type === TypstTokenType.SYMBOL) {
|
|
755
|
-
// special hook for eq.def
|
|
756
|
-
if (node.head.value === 'eq.def') {
|
|
757
|
-
return new TexFuncCall(new TexToken(TexTokenType.COMMAND, '\\overset'), [
|
|
758
|
-
new TexText(new TexToken(TexTokenType.LITERAL, 'def')),
|
|
759
|
-
new TexToken(TexTokenType.ELEMENT, '=').toNode()
|
|
760
|
-
]);
|
|
761
|
-
}
|
|
762
|
-
// special hook for comma
|
|
763
|
-
if(node.head.value === 'comma') {
|
|
764
|
-
return new TexToken(TexTokenType.ELEMENT, ',').toNode();
|
|
765
|
-
}
|
|
766
|
-
// special hook for dif
|
|
767
|
-
if(node.head.value === 'dif') {
|
|
768
|
-
return new TexFuncCall(new TexToken(TexTokenType.COMMAND, '\\mathrm'), [new TexToken(TexTokenType.ELEMENT, 'd').toNode()]);
|
|
769
|
-
}
|
|
770
|
-
// special hook for hyph and hyph.minus
|
|
771
|
-
if(node.head.value === 'hyph' || node.head.value === 'hyph.minus') {
|
|
772
|
-
return new TexText(new TexToken(TexTokenType.LITERAL, '-'));
|
|
773
|
-
}
|
|
774
759
|
// special hook for mathbb{R} <-- RR
|
|
775
760
|
if(/^([A-Z])\1$/.test(node.head.value)) {
|
|
776
761
|
return new TexFuncCall(new TexToken(TexTokenType.COMMAND, '\\mathbb'), [
|
package/src/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { parseTypst } from "./typst-parser";
|
|
|
8
8
|
import { TexWriter } from "./tex-writer";
|
|
9
9
|
import { shorthandMap } from "./typst-shorthands";
|
|
10
10
|
import { expand_tex_predefined_macros } from "./tex-semantic-analysis";
|
|
11
|
+
import { expand_typst_predefined_variables } from "./typst-semantic-analyais";
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
export function tex2typst(tex: string, options: Partial<Tex2TypstOptions> = {}): string {
|
|
@@ -53,7 +54,8 @@ export function typst2tex(typst: string, options: Partial<Typst2TexOptions> = {}
|
|
|
53
54
|
}
|
|
54
55
|
|
|
55
56
|
const typstTree = parseTypst(typst);
|
|
56
|
-
const
|
|
57
|
+
const preprocessedTypstTree = expand_typst_predefined_variables(typstTree);
|
|
58
|
+
const texTree = convert_typst_node_to_tex(preprocessedTypstTree, opt);
|
|
57
59
|
const writer = new TexWriter();
|
|
58
60
|
writer.append(texTree);
|
|
59
61
|
return writer.finalize();
|
package/src/lex.ts
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Last modified: 2026-05-30
|
|
3
|
+
* Adapted from jslex - A lexer in JavaScript. https://github.com/jimbojw/jslex
|
|
4
|
+
* Licensed under MIT license
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export type ScannerCallback<T> = (a: ScannerState) => ScanResult<T>;
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
type TypeEOF = null;
|
|
11
|
+
const EOF: TypeEOF = null;
|
|
12
|
+
|
|
13
|
+
interface IRule<T> {
|
|
14
|
+
re: RegExp;
|
|
15
|
+
action: ScannerCallback<T>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface IMatch<T> {
|
|
19
|
+
index: number;
|
|
20
|
+
rule: IRule<T>;
|
|
21
|
+
reMatchArray: RegExpMatchArray;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Utility function for comparing two matches.
|
|
26
|
+
* @param {object} m1 Left-hand side match.
|
|
27
|
+
* @param {object} m2 Right-hand side match.
|
|
28
|
+
* @return {int} Difference between the matches.
|
|
29
|
+
*/
|
|
30
|
+
function matchcompare<T>(m1: IMatch<T>, m2: IMatch<T>): number {
|
|
31
|
+
const m1_len = m1.reMatchArray[0].length;
|
|
32
|
+
const m2_len = m2.reMatchArray[0].length;
|
|
33
|
+
if(m2_len !== m1_len) {
|
|
34
|
+
return m2_len - m1_len;
|
|
35
|
+
} else {
|
|
36
|
+
return m1.index - m2.index;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
enum ScanResultStatus {
|
|
41
|
+
ACCEPTED = 0,
|
|
42
|
+
REJECTED = 1,
|
|
43
|
+
ERROR = 2,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export class ScanResult<T> {
|
|
47
|
+
public result: T | T[];
|
|
48
|
+
public status: ScanResultStatus;
|
|
49
|
+
constructor(status: ScanResultStatus, result: T | T[]) {
|
|
50
|
+
this.result = result;
|
|
51
|
+
this.status = status;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
public static Accepted<U>(result: U | U[]): ScanResult<U> {
|
|
56
|
+
return new ScanResult(ScanResultStatus.ACCEPTED, result);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
public static Rejected<U>(): ScanResult<U> {
|
|
60
|
+
return new ScanResult(ScanResultStatus.REJECTED, []);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
public static Error<U>(message: string): ScanResult<U> {
|
|
64
|
+
return new ScanResult(ScanResultStatus.ERROR, []);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
interface ScannerState {
|
|
69
|
+
pos: number;
|
|
70
|
+
text: string;
|
|
71
|
+
reMatchArray: RegExpMatchArray;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export class Scanner<T> {
|
|
75
|
+
private readonly _input: string;
|
|
76
|
+
private readonly rules: IRule<T>[];
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
// position within input stream
|
|
80
|
+
private _pos: number = 0;
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
constructor(input: string, rules: IRule<T>[]) {
|
|
84
|
+
this._input = input;
|
|
85
|
+
this.rules = rules;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Scan method to be returned to caller - grabs the next token and fires appropriate calback.
|
|
91
|
+
* @return {T} The next token extracted from the stream.
|
|
92
|
+
*/
|
|
93
|
+
public scan(): T | T[] | TypeEOF {
|
|
94
|
+
if(this._pos >= this._input.length) {
|
|
95
|
+
return EOF;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const str = this._input.substring(this._pos);
|
|
99
|
+
const rules = this.rules;
|
|
100
|
+
const matches: IMatch<T>[] = [];
|
|
101
|
+
for (let i = 0; i < rules.length; i++) {
|
|
102
|
+
const rule = rules[i];
|
|
103
|
+
const mt = str.match(rule.re);
|
|
104
|
+
if (mt !== null && mt[0].length > 0) {
|
|
105
|
+
matches.push({
|
|
106
|
+
index: i,
|
|
107
|
+
rule: rule,
|
|
108
|
+
reMatchArray: mt,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (matches.length === 0) {
|
|
113
|
+
throw new Error("No match found for input '" + str + "'");
|
|
114
|
+
}
|
|
115
|
+
matches.sort(matchcompare);
|
|
116
|
+
|
|
117
|
+
for (const m of matches) {
|
|
118
|
+
const matched_text = m.reMatchArray[0];
|
|
119
|
+
const result = m.rule.action({
|
|
120
|
+
pos: this._pos,
|
|
121
|
+
text: matched_text,
|
|
122
|
+
reMatchArray: m.reMatchArray,
|
|
123
|
+
});
|
|
124
|
+
if (result.status === ScanResultStatus.ACCEPTED) {
|
|
125
|
+
this._pos += matched_text.length;
|
|
126
|
+
return result.result;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
throw new Error("No match found for input '" + str + "'");
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
export class JSLex<T> {
|
|
134
|
+
public readonly rules: IRule<T>[] = [];
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
constructor(ruleMap: Map<string, ScannerCallback<T>>) {
|
|
138
|
+
for (const [k, v] of ruleMap.entries()) {
|
|
139
|
+
let re: RegExp;
|
|
140
|
+
try {
|
|
141
|
+
// FIXME: e.g. "neg|norm" becomes /^neg|norm/,
|
|
142
|
+
// but what we really want is /^(neg|norm)/ .
|
|
143
|
+
// This will cause error when tokenize input like "...norm..."
|
|
144
|
+
re = new RegExp('^' + k);
|
|
145
|
+
} catch (err) {
|
|
146
|
+
throw "Invalid regexp '" + k + "' (" + (err as Error).message + ")";
|
|
147
|
+
}
|
|
148
|
+
this.rules.push({
|
|
149
|
+
re: re,
|
|
150
|
+
action: v
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Similar to lex's yylex() function, consumes all input, calling calback for each token.
|
|
158
|
+
* @param {string} input Text to lex.
|
|
159
|
+
* @param {function} callback Function to execute for each token.
|
|
160
|
+
*/
|
|
161
|
+
public lex(input: string, callback: (arg0: T | T[]) => void) {
|
|
162
|
+
const scanner = new Scanner(input, this.rules);
|
|
163
|
+
while (true) {
|
|
164
|
+
const token = scanner.scan();
|
|
165
|
+
if (token === EOF) {
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
callback(token);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Consumes all input, collecting tokens along the way.
|
|
174
|
+
* @param {string} input Text to lex.
|
|
175
|
+
* @return {array} List of tokens, may contain an Error at the end.
|
|
176
|
+
*/
|
|
177
|
+
public collect(input: string): T[] {
|
|
178
|
+
const tokens: T[] = [];
|
|
179
|
+
const callback = function(item: T | T[]) {
|
|
180
|
+
if (Array.isArray(item)) {
|
|
181
|
+
tokens.push(...item);
|
|
182
|
+
} else {
|
|
183
|
+
tokens.push(item);
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
this.lex(input, callback);
|
|
187
|
+
return tokens;
|
|
188
|
+
}
|
|
189
|
+
};
|
package/src/map.ts
CHANGED
|
@@ -52,7 +52,6 @@ const symbolMap = new Map<string, string>([
|
|
|
52
52
|
['neq', 'eq.not'],
|
|
53
53
|
['dot', 'dot'],
|
|
54
54
|
['ddot', 'dot.double'],
|
|
55
|
-
['doteq', 'dot(eq)'],
|
|
56
55
|
['dots', 'dots.h'],
|
|
57
56
|
['vdots', 'dots.v'],
|
|
58
57
|
['ddots', 'dots.down'],
|
|
@@ -1136,7 +1135,6 @@ const reverseSymbolMap = new Map<string, string>();
|
|
|
1136
1135
|
for(const [key, value] of Array.from(symbolMap.entries()).reverse()) {
|
|
1137
1136
|
reverseSymbolMap.set(value, key);
|
|
1138
1137
|
}
|
|
1139
|
-
reverseSymbolMap.set('oo', 'infty');
|
|
1140
1138
|
|
|
1141
1139
|
// force override some one-to-multiple mappings
|
|
1142
1140
|
const typst_to_tex_map = new Map<string, string>([
|
|
@@ -1,37 +1,39 @@
|
|
|
1
|
-
import { parseTex } from "./tex-parser";
|
|
2
|
-
import { TexNode,
|
|
3
|
-
|
|
4
|
-
const TEX_PREDEFINED_MACROS: Map<string, string> = new Map([
|
|
5
|
-
// https://github.com/KaTeX/KaTeX/blob/434d4b8aef4c3311ebfd3405a9f0cce18ead953b/src/macros.ts#L351-L367
|
|
6
|
-
["\\varGamma", "\\mathit{\\Gamma}"],
|
|
7
|
-
["\\varDelta", "\\mathit{\\Delta}"],
|
|
8
|
-
["\\varTheta", "\\mathit{\\Theta}"],
|
|
9
|
-
["\\varLambda", "\\mathit{\\Lambda}"],
|
|
10
|
-
["\\varXi", "\\mathit{\\Xi}"],
|
|
11
|
-
["\\varPi", "\\mathit{\\Pi}"],
|
|
12
|
-
["\\varSigma", "\\mathit{\\Sigma}"],
|
|
13
|
-
["\\varUpsilon", "\\mathit{\\Upsilon}"],
|
|
14
|
-
["\\varPhi", "\\mathit{\\Phi}"],
|
|
15
|
-
["\\varPsi", "\\mathit{\\Psi}"],
|
|
16
|
-
["\\varOmega", "\\mathit{\\Omega}"],
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
1
|
+
import { parseTex } from "./tex-parser";
|
|
2
|
+
import { TexNode, TexTokenType } from "./tex-types";
|
|
3
|
+
|
|
4
|
+
const TEX_PREDEFINED_MACROS: Map<string, string> = new Map([
|
|
5
|
+
// https://github.com/KaTeX/KaTeX/blob/434d4b8aef4c3311ebfd3405a9f0cce18ead953b/src/macros.ts#L351-L367
|
|
6
|
+
["\\varGamma", "\\mathit{\\Gamma}"],
|
|
7
|
+
["\\varDelta", "\\mathit{\\Delta}"],
|
|
8
|
+
["\\varTheta", "\\mathit{\\Theta}"],
|
|
9
|
+
["\\varLambda", "\\mathit{\\Lambda}"],
|
|
10
|
+
["\\varXi", "\\mathit{\\Xi}"],
|
|
11
|
+
["\\varPi", "\\mathit{\\Pi}"],
|
|
12
|
+
["\\varSigma", "\\mathit{\\Sigma}"],
|
|
13
|
+
["\\varUpsilon", "\\mathit{\\Upsilon}"],
|
|
14
|
+
["\\varPhi", "\\mathit{\\Phi}"],
|
|
15
|
+
["\\varPsi", "\\mathit{\\Psi}"],
|
|
16
|
+
["\\varOmega", "\\mathit{\\Omega}"],
|
|
17
|
+
|
|
18
|
+
["\\doteq", "\\dot{=}"],
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
function _expand_tex_predefined_macros(node: TexNode): TexNode {
|
|
22
|
+
switch (node.type) {
|
|
23
|
+
case "terminal": {
|
|
24
|
+
if (node.head.type === TexTokenType.COMMAND) {
|
|
25
|
+
if (TEX_PREDEFINED_MACROS.has(node.head.value)) {
|
|
26
|
+
const target_str = TEX_PREDEFINED_MACROS.get(node.head.value)!;
|
|
27
|
+
return parseTex(target_str);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
case "funcCall":
|
|
32
|
+
default:
|
|
33
|
+
return node;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function expand_tex_predefined_macros(node: TexNode): TexNode {
|
|
38
|
+
return node.bottomTopTraversalTransform(_expand_tex_predefined_macros);
|
|
39
|
+
}
|
package/src/tex-tokenizer.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { TexToken, TexTokenType } from "./tex-types";
|
|
2
|
-
import { JSLex,
|
|
2
|
+
import { JSLex, ScannerCallback, ScanResult } from "./lex";
|
|
3
3
|
|
|
4
4
|
export const TEX_UNARY_COMMANDS = [
|
|
5
5
|
'sqrt',
|
|
@@ -70,12 +70,12 @@ function unescape(str: string): string {
|
|
|
70
70
|
return str;
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
const rules_map = new Map<string,
|
|
73
|
+
const rules_map = new Map<string, ScannerCallback<TexToken>>([
|
|
74
74
|
// match `\begin{array}{cc}`
|
|
75
75
|
[
|
|
76
76
|
String.raw`\\begin{(array|subarry)}{(.+?)}`, (s) => {
|
|
77
|
-
const match = s.reMatchArray
|
|
78
|
-
return [
|
|
77
|
+
const match = s.reMatchArray;
|
|
78
|
+
return ScanResult.Accepted([
|
|
79
79
|
new TexToken(TexTokenType.COMMAND, '\\begin'),
|
|
80
80
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
81
81
|
new TexToken(TexTokenType.LITERAL, match[1]),
|
|
@@ -83,74 +83,69 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
|
|
|
83
83
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
84
84
|
new TexToken(TexTokenType.LITERAL, match[2]),
|
|
85
85
|
new TexToken(TexTokenType.CONTROL, '}'),
|
|
86
|
-
]
|
|
86
|
+
]);
|
|
87
87
|
}
|
|
88
88
|
],
|
|
89
89
|
[
|
|
90
90
|
String.raw`\\(text|operatorname\*?|textcolor|begin|end|hspace|array)\s*{(.+?)}`, (s) => {
|
|
91
|
-
const match = s.reMatchArray
|
|
92
|
-
return [
|
|
91
|
+
const match = s.reMatchArray;
|
|
92
|
+
return ScanResult.Accepted([
|
|
93
93
|
new TexToken(TexTokenType.COMMAND, '\\' + match[1]),
|
|
94
94
|
new TexToken(TexTokenType.CONTROL, '{'),
|
|
95
95
|
new TexToken(TexTokenType.LITERAL, unescape(match[2])),
|
|
96
96
|
new TexToken(TexTokenType.CONTROL, '}')
|
|
97
|
-
]
|
|
97
|
+
]);
|
|
98
98
|
}
|
|
99
99
|
],
|
|
100
|
-
[String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text
|
|
101
|
-
[String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
102
|
-
[String.raw`\\[\\,:;!> ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
103
|
-
[String.raw`~`, (s) => new TexToken(TexTokenType.CONTROL, s.text
|
|
104
|
-
[String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
|
|
105
|
-
[String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text
|
|
106
|
-
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
100
|
+
[String.raw`%[^\n]*`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.COMMENT, s.text.substring(1)))],
|
|
101
|
+
[String.raw`[{}_^&]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
102
|
+
[String.raw`\\[\\,:;!> ]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
103
|
+
[String.raw`~`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.CONTROL, s.text)])],
|
|
104
|
+
[String.raw`\r?\n`, (_s) => ScanResult.Accepted([new TexToken(TexTokenType.NEWLINE, "\n")])],
|
|
105
|
+
[String.raw`\s+`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.SPACE, s.text)])],
|
|
106
|
+
[String.raw`\\[{}%$&#_|]`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.ELEMENT, s.text)])],
|
|
107
107
|
// e.g. match `\frac13`, `\frac1 b`, `\frac a b`
|
|
108
108
|
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
|
|
109
|
-
const match = s.reMatchArray
|
|
109
|
+
const match = s.reMatchArray;
|
|
110
110
|
const command = match![1];
|
|
111
111
|
if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
|
|
112
112
|
const arg1 = match[2].trimStart();
|
|
113
113
|
const arg2 = match[3];
|
|
114
|
-
return [
|
|
114
|
+
return ScanResult.Accepted([
|
|
115
115
|
new TexToken(TexTokenType.COMMAND, command),
|
|
116
116
|
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
117
117
|
new TexToken(TexTokenType.ELEMENT, arg2),
|
|
118
|
-
];
|
|
118
|
+
]);
|
|
119
119
|
} else {
|
|
120
|
-
|
|
121
|
-
return [];
|
|
120
|
+
return ScanResult.Rejected();
|
|
122
121
|
}
|
|
123
122
|
}],
|
|
124
123
|
// e.g. match `\sqrt3`, `\sqrt a`
|
|
125
124
|
[String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
|
|
126
|
-
const match = s.reMatchArray
|
|
125
|
+
const match = s.reMatchArray;
|
|
127
126
|
const command = match[1];
|
|
128
127
|
if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
|
|
129
128
|
const arg1 = match[2].trimStart();
|
|
130
|
-
return [
|
|
129
|
+
return ScanResult.Accepted([
|
|
131
130
|
new TexToken(TexTokenType.COMMAND, command),
|
|
132
131
|
new TexToken(TexTokenType.ELEMENT, arg1),
|
|
133
|
-
];
|
|
132
|
+
]);
|
|
134
133
|
} else {
|
|
135
|
-
|
|
136
|
-
return [];
|
|
134
|
+
return ScanResult.Rejected();
|
|
137
135
|
}
|
|
138
136
|
}],
|
|
139
|
-
[String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text
|
|
137
|
+
[String.raw`\\[a-zA-Z]+`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.COMMAND, s.text))],
|
|
140
138
|
// Numbers like "123", "3.14"
|
|
141
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
142
|
-
[String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
143
|
-
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
139
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
140
|
+
[String.raw`[a-zA-Z]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
141
|
+
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
144
142
|
// non-ASCII characters
|
|
145
|
-
[String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text
|
|
146
|
-
[String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text
|
|
143
|
+
[String.raw`[^\x00-\x7F]`, (s) => ScanResult.Accepted(new TexToken(TexTokenType.ELEMENT, s.text))],
|
|
144
|
+
[String.raw`.`, (s) => ScanResult.Accepted([new TexToken(TexTokenType.UNKNOWN, s.text)])],
|
|
147
145
|
]);
|
|
148
146
|
|
|
149
|
-
const spec = {
|
|
150
|
-
"start": rules_map
|
|
151
|
-
};
|
|
152
147
|
|
|
153
148
|
export function tokenize_tex(input: string): TexToken[] {
|
|
154
|
-
const lexer = new JSLex<TexToken>(
|
|
149
|
+
const lexer = new JSLex<TexToken>(rules_map);
|
|
155
150
|
return lexer.collect(input);
|
|
156
151
|
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { parseTypst } from "./typst-parser";
|
|
2
|
+
import { TypstNode, TypstTokenType } from "./typst-types";
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
const TEX_PREDEFINED_VARIABLES: Map<string, string> = new Map([
|
|
6
|
+
["dif", "upright(d)"],
|
|
7
|
+
["eq.def", 'limits(=)^"def"'],
|
|
8
|
+
["oo", "infinity"],
|
|
9
|
+
["comma", ","],
|
|
10
|
+
["hyph", '"-"'],
|
|
11
|
+
["hyph.minus", '"-"'],
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
["AA", "bb(A)"],
|
|
15
|
+
["BB", "bb(B)"],
|
|
16
|
+
["CC", "bb(C)"],
|
|
17
|
+
*/
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
function _expand_typst_predefined_variables(node: TypstNode): TypstNode {
|
|
21
|
+
switch (node.type) {
|
|
22
|
+
case "terminal": {
|
|
23
|
+
if (node.head.type === TypstTokenType.SYMBOL) {
|
|
24
|
+
if (TEX_PREDEFINED_VARIABLES.has(node.head.value)) {
|
|
25
|
+
const target_str = TEX_PREDEFINED_VARIABLES.get(node.head.value)!;
|
|
26
|
+
return parseTypst(target_str);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
case "funcCall":
|
|
31
|
+
default:
|
|
32
|
+
return node;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function expand_typst_predefined_variables(node: TypstNode): TypstNode {
|
|
37
|
+
return node.bottomTopTraversalTransform(_expand_typst_predefined_variables);
|
|
38
|
+
}
|
package/src/typst-tokenizer.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { TypstToken } from "./typst-types";
|
|
2
2
|
import { TypstTokenType } from "./typst-types";
|
|
3
3
|
import { reverseShorthandMap } from "./typst-shorthands";
|
|
4
|
-
import { JSLex,
|
|
4
|
+
import { JSLex, ScannerCallback, ScanResult } from "./lex";
|
|
5
5
|
|
|
6
6
|
const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
|
|
7
7
|
|
|
@@ -20,67 +20,71 @@ function generate_regex_for_shorthands(): string {
|
|
|
20
20
|
|
|
21
21
|
const REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
22
22
|
|
|
23
|
-
const rules_map = new Map<string,
|
|
24
|
-
[String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text
|
|
25
|
-
[String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
26
|
-
[String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text
|
|
27
|
-
[String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
|
|
28
|
-
[String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text
|
|
29
|
-
[String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
30
|
-
[
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
23
|
+
const rules_map = new Map<string, ScannerCallback<TypstToken>>([
|
|
24
|
+
[String.raw`//[^\n]*`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.COMMENT, s.text.substring(2)))],
|
|
25
|
+
[String.raw`/`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
26
|
+
[String.raw`[_^&]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.CONTROL, s.text))],
|
|
27
|
+
[String.raw`\r?\n`, (_s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NEWLINE, "\n"))],
|
|
28
|
+
[String.raw`\s+`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.SPACE, s.text))],
|
|
29
|
+
[String.raw`\\[$&#_]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
30
|
+
[
|
|
31
|
+
String.raw`\\\n`,
|
|
32
|
+
(s) => ScanResult.Accepted([
|
|
33
|
+
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
34
|
+
new TypstToken(TypstTokenType.NEWLINE, "\n")
|
|
35
|
+
])
|
|
36
|
+
],
|
|
36
37
|
[String.raw`\\\s`, (s) => {
|
|
37
|
-
return [
|
|
38
|
+
return ScanResult.Accepted([
|
|
38
39
|
new TypstToken(TypstTokenType.CONTROL, "\\"),
|
|
39
40
|
new TypstToken(TypstTokenType.SPACE, " "),
|
|
40
|
-
]
|
|
41
|
+
])
|
|
41
42
|
}],
|
|
42
43
|
// this backslash is dummy and will be ignored in later stages
|
|
43
|
-
[String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
|
|
44
|
+
[String.raw`\\\S`, (_s) => ScanResult.Accepted(new TypstToken(TypstTokenType.CONTROL, ""))],
|
|
44
45
|
[
|
|
45
46
|
String.raw`"([^"]|(\\"))*"`,
|
|
46
47
|
(s) => {
|
|
47
|
-
const text = s.text
|
|
48
|
+
const text = s.text.substring(1, s.text.length - 1);
|
|
48
49
|
// replace all escape characters with their actual characters
|
|
49
50
|
text.replaceAll('\\"', '"');
|
|
50
|
-
return new TypstToken(TypstTokenType.TEXT, text);
|
|
51
|
+
return ScanResult.Accepted(new TypstToken(TypstTokenType.TEXT, text));
|
|
51
52
|
}
|
|
52
53
|
],
|
|
53
54
|
[
|
|
54
55
|
REGEX_SHORTHANDS,
|
|
55
56
|
(s) => {
|
|
56
|
-
const shorthand = s.text
|
|
57
|
+
const shorthand = s.text;
|
|
57
58
|
const symbol = reverseShorthandMap.get(shorthand)!;
|
|
58
|
-
return new TypstToken(TypstTokenType.SYMBOL, symbol);
|
|
59
|
+
return ScanResult.Accepted(new TypstToken(TypstTokenType.SYMBOL, symbol));
|
|
59
60
|
}
|
|
60
61
|
],
|
|
61
|
-
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
62
|
-
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
63
|
-
[
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
62
|
+
[ String.raw`[0-9]+(\.[0-9]+)?`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text)) ],
|
|
63
|
+
[ String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text)) ],
|
|
64
|
+
[
|
|
65
|
+
String.raw`#h\((.+?)\)`,
|
|
66
|
+
(s) => {
|
|
67
|
+
const match = s.reMatchArray;
|
|
68
|
+
return ScanResult.Accepted([
|
|
69
|
+
new TypstToken(TypstTokenType.SYMBOL, "#h"),
|
|
70
|
+
new TypstToken(TypstTokenType.ELEMENT, "("),
|
|
71
|
+
new TypstToken(TypstTokenType.LITERAL, match[1]),
|
|
72
|
+
new TypstToken(TypstTokenType.ELEMENT, ")"),
|
|
73
|
+
]);
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
[String.raw`#none`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NONE, s.text))],
|
|
77
|
+
[
|
|
78
|
+
String.raw`#none`,
|
|
79
|
+
(s) => ScanResult.Accepted(new TypstToken(TypstTokenType.NONE, s.text)),
|
|
80
|
+
],
|
|
73
81
|
[String.raw`#?[a-zA-Z\.]+`, (s) => {
|
|
74
|
-
return new TypstToken(s.text
|
|
82
|
+
return ScanResult.Accepted(new TypstToken(s.text.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text));
|
|
75
83
|
}],
|
|
76
|
-
[String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text
|
|
84
|
+
[String.raw`.`, (s) => ScanResult.Accepted(new TypstToken(TypstTokenType.ELEMENT, s.text))],
|
|
77
85
|
]);
|
|
78
86
|
|
|
79
|
-
const spec = {
|
|
80
|
-
"start": rules_map
|
|
81
|
-
};
|
|
82
|
-
|
|
83
87
|
export function tokenize_typst(input: string): TypstToken[] {
|
|
84
|
-
const lexer = new JSLex<TypstToken>(
|
|
88
|
+
const lexer = new JSLex<TypstToken>(rules_map);
|
|
85
89
|
return lexer.collect(input);
|
|
86
90
|
}
|