tex2typst 0.2.16 → 0.3.0-beta-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/tex-parser.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { symbolMap } from "./map";
2
- import { TexNode, TexSupsubData, TexTokenType } from "./types";
2
+ import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
3
+ import { isalpha, isdigit, assert } from "./util";
3
4
 
4
5
 
5
6
  const UNARY_COMMANDS = [
@@ -45,28 +46,9 @@ const BINARY_COMMANDS = [
45
46
  ]
46
47
 
47
48
 
48
- export class TexToken {
49
- type: TexTokenType;
50
- value: string;
51
-
52
- constructor(type: TexTokenType, value: string) {
53
- this.type = type;
54
- this.value = value;
55
- }
56
-
57
- public eq(token: TexToken): boolean {
58
- return this.type === token.type && this.value === token.value;
59
- }
60
- }
61
49
 
62
50
  const EMPTY_NODE: TexNode = new TexNode('empty', '');
63
51
 
64
- function assert(condition: boolean, message: string = ''): void {
65
- if (!condition) {
66
- throw new LatexParserError(message);
67
- }
68
- }
69
-
70
52
  function get_command_param_num(command: string): number {
71
53
  if (UNARY_COMMANDS.includes(command)) {
72
54
  return 1;
@@ -80,58 +62,10 @@ function get_command_param_num(command: string): number {
80
62
  const LEFT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '{');
81
63
  const RIGHT_CURLY_BRACKET: TexToken = new TexToken(TexTokenType.CONTROL, '}');
82
64
 
83
- function find_closing_curly_bracket(tokens: TexToken[], start: number): number {
84
- assert(tokens[start].eq(LEFT_CURLY_BRACKET));
85
- let count = 1;
86
- let pos = start + 1;
87
-
88
- while (count > 0) {
89
- if (pos >= tokens.length) {
90
- throw new LatexParserError('Unmatched curly brackets');
91
- }
92
- if (tokens[pos].eq(LEFT_CURLY_BRACKET)) {
93
- count += 1;
94
- } else if (tokens[pos].eq(RIGHT_CURLY_BRACKET)) {
95
- count -= 1;
96
- }
97
- pos += 1;
98
- }
99
-
100
- return pos - 1;
101
- }
102
65
 
103
66
  const LEFT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, '[');
104
67
  const RIGHT_SQUARE_BRACKET: TexToken = new TexToken(TexTokenType.ELEMENT, ']');
105
68
 
106
- function find_closing_square_bracket(tokens: TexToken[], start: number): number {
107
- assert(tokens[start].eq(LEFT_SQUARE_BRACKET));
108
- let count = 1;
109
- let pos = start + 1;
110
-
111
- while (count > 0) {
112
- if (pos >= tokens.length) {
113
- throw new LatexParserError('Unmatched square brackets');
114
- }
115
- if (tokens[pos].eq(LEFT_SQUARE_BRACKET)) {
116
- count += 1;
117
- } else if (tokens[pos].eq(RIGHT_SQUARE_BRACKET)) {
118
- count -= 1;
119
- }
120
- pos += 1;
121
- }
122
-
123
- return pos - 1;
124
- }
125
-
126
-
127
- function isalpha(char: string): boolean {
128
- return 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.includes(char);
129
- }
130
-
131
- function isdigit(char: string): boolean {
132
- return '0123456789'.includes(char);
133
- }
134
-
135
69
  function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
136
70
  let pos = start;
137
71
  while (pos < tokens.length && [TexTokenType.SPACE, TexTokenType.NEWLINE].includes(tokens[pos].type)) {
@@ -143,7 +77,7 @@ function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
143
77
 
144
78
  function eat_parenthesis(tokens: TexToken[], start: number): TexToken | null {
145
79
  const firstToken = tokens[start];
146
- if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
80
+ if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}', '.'].includes(firstToken.value)) {
147
81
  return firstToken;
148
82
  } else if (firstToken.type === TexTokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
149
83
  return firstToken;
@@ -170,20 +104,18 @@ function eat_command_name(latex: string, start: number): string {
170
104
  }
171
105
 
172
106
 
173
- const LEFT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\left');
174
- const RIGHT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\right');
175
-
176
- function find_closing_right_command(tokens: TexToken[], start: number): number {
107
+ function find_closing_match(tokens: TexToken[], start: number, leftToken: TexToken, rightToken: TexToken): number {
108
+ assert(tokens[start].eq(leftToken));
177
109
  let count = 1;
178
- let pos = start;
110
+ let pos = start + 1;
179
111
 
180
112
  while (count > 0) {
181
113
  if (pos >= tokens.length) {
182
114
  return -1;
183
115
  }
184
- if (tokens[pos].eq(LEFT_COMMAND)) {
116
+ if (tokens[pos].eq(leftToken)) {
185
117
  count += 1;
186
- } else if (tokens[pos].eq(RIGHT_COMMAND)) {
118
+ } else if (tokens[pos].eq(rightToken)) {
187
119
  count -= 1;
188
120
  }
189
121
  pos += 1;
@@ -193,27 +125,20 @@ function find_closing_right_command(tokens: TexToken[], start: number): number {
193
125
  }
194
126
 
195
127
 
128
+ const LEFT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\left');
129
+ const RIGHT_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\right');
130
+
131
+ function find_closing_right_command(tokens: TexToken[], start: number): number {
132
+ return find_closing_match(tokens, start, LEFT_COMMAND, RIGHT_COMMAND);
133
+ }
134
+
135
+
196
136
  const BEGIN_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\begin');
197
137
  const END_COMMAND: TexToken = new TexToken(TexTokenType.COMMAND, '\\end');
198
138
 
199
139
 
200
140
  function find_closing_end_command(tokens: TexToken[], start: number): number {
201
- let count = 1;
202
- let pos = start;
203
-
204
- while (count > 0) {
205
- if (pos >= tokens.length) {
206
- return -1;
207
- }
208
- if (tokens[pos].eq(BEGIN_COMMAND)) {
209
- count += 1;
210
- } else if (tokens[pos].eq(END_COMMAND)) {
211
- count -= 1;
212
- }
213
- pos += 1;
214
- }
215
-
216
- return pos - 1;
141
+ return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
217
142
  }
218
143
 
219
144
  function find_closing_curly_bracket_char(latex: string, start: number): number {
@@ -297,6 +222,7 @@ export function tokenize(latex: string): TexToken[] {
297
222
  if (['\\\\', '\\,'].includes(firstTwoChars)) {
298
223
  token = new TexToken(TexTokenType.CONTROL, firstTwoChars);
299
224
  } else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_', '\\|'].includes(firstTwoChars)) {
225
+ // \| is double vertical bar, not the same as just |
300
226
  token = new TexToken(TexTokenType.ELEMENT, firstTwoChars);
301
227
  } else {
302
228
  const command = eat_command_name(latex, pos + 1);
@@ -493,7 +419,7 @@ export class LatexParser {
493
419
  const controlChar = firstToken.value;
494
420
  switch (controlChar) {
495
421
  case '{':
496
- const posClosingBracket = find_closing_curly_bracket(tokens, start);
422
+ const posClosingBracket = find_closing_match(tokens, start, LEFT_CURLY_BRACKET, RIGHT_CURLY_BRACKET);
497
423
  const exprInside = tokens.slice(start + 1, posClosingBracket);
498
424
  return [this.parse(exprInside), posClosingBracket + 1];
499
425
  case '}':
@@ -502,12 +428,9 @@ export class LatexParser {
502
428
  return [new TexNode('control', '\\\\'), start + 1];
503
429
  case '\\,':
504
430
  return [new TexNode('control', '\\,'), start + 1];
505
- case '_': {
506
- return [ EMPTY_NODE, start];
507
- }
508
- case '^': {
431
+ case '_':
432
+ case '^':
509
433
  return [ EMPTY_NODE, start];
510
- }
511
434
  case '&':
512
435
  return [new TexNode('control', '&'), start + 1];
513
436
  default:
@@ -540,7 +463,7 @@ export class LatexParser {
540
463
  case 1: {
541
464
  if (command === '\\sqrt' && pos < tokens.length && tokens[pos].eq(LEFT_SQUARE_BRACKET)) {
542
465
  const posLeftSquareBracket = pos;
543
- const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
466
+ const posRightSquareBracket = find_closing_match(tokens, pos, LEFT_SQUARE_BRACKET, RIGHT_SQUARE_BRACKET);
544
467
  const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
545
468
  const exponent = this.parse(exprInside);
546
469
  const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
@@ -584,7 +507,7 @@ export class LatexParser {
584
507
  }
585
508
  pos++;
586
509
  const exprInsideStart = pos;
587
- const idx = find_closing_right_command(tokens, pos);
510
+ const idx = find_closing_right_command(tokens, start);
588
511
  if (idx === -1) {
589
512
  throw new LatexParserError('No matching \\right');
590
513
  }
@@ -627,7 +550,7 @@ export class LatexParser {
627
550
 
628
551
  const exprInsideStart = pos;
629
552
 
630
- const endIdx = find_closing_end_command(tokens, pos);
553
+ const endIdx = find_closing_end_command(tokens, start);
631
554
  if (endIdx === -1) {
632
555
  throw new LatexParserError('No matching \\end');
633
556
  }
@@ -0,0 +1,249 @@
1
+ import { array_includes, array_split } from "./generic";
2
+ import { reverseSymbolMap } from "./map";
3
+ import { TexNode, TexToken, TexSqrtData, TexSupsubData, TexTokenType, TypstNode, TypstSupsubData } from "./types";
4
+
5
+ const TYPST_UNARY_FUNCTIONS: string[] = [
6
+ 'sqrt',
7
+ 'bold',
8
+ 'arrow',
9
+ 'upright',
10
+ 'lr',
11
+ 'op',
12
+ 'macron',
13
+ 'dot',
14
+ 'dot.double',
15
+ 'hat',
16
+ 'tilde',
17
+ 'overline',
18
+ 'underline',
19
+ 'bb',
20
+ 'cal',
21
+ 'frak',
22
+ ];
23
+
24
+ const TYPST_BINARY_FUNCTIONS: string[] = [
25
+ 'frac',
26
+ 'root',
27
+ 'overbrace',
28
+ 'underbrace',
29
+ ];
30
+
31
+ function apply_escape_if_needed(c: string) {
32
+ if (['{', '}', '%'].includes(c)) {
33
+ return '\\' + c;
34
+ }
35
+ return c;
36
+ }
37
+
38
+
39
+ export class TexWriter {
40
+ protected buffer: string = "";
41
+ queue: TexToken[] = [];
42
+
43
+ private writeBuffer(token: TexToken) {
44
+ const str = token.toString();
45
+
46
+ let no_need_space = false;
47
+ if (token.type === TexTokenType.SPACE) {
48
+ no_need_space = true;
49
+ } else {
50
+ // putting the first token in clause
51
+ no_need_space ||= /[{\(\[\|]$/.test(this.buffer);
52
+ // opening a optional [] parameter for a command
53
+ no_need_space ||= /\\\w+$/.test(this.buffer) && str === '[';
54
+ // putting a punctuation
55
+ no_need_space ||= /^[\.,;:!\?\(\)\]{}_^]$/.test(str);
56
+ no_need_space ||= ['\\{', '\\}'].includes(str);
57
+ // putting a prime
58
+ no_need_space ||= str === "'";
59
+ // putting a subscript or superscript
60
+ no_need_space ||= this.buffer.endsWith('_') || this.buffer.endsWith('^');
61
+ // buffer ends with a whitespace
62
+ no_need_space ||= /\s$/.test(this.buffer);
63
+ // token starts with a space
64
+ no_need_space ||= /^\s/.test(str);
65
+ // buffer is empty
66
+ no_need_space ||= this.buffer === '';
67
+ // leading sign. e.g. produce "+1" instead of " +1"
68
+ no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === '-' || this.buffer === '+';
69
+ // "&=" instead of "& ="
70
+ no_need_space ||= this.buffer.endsWith('&') && str === '=';
71
+ }
72
+
73
+ if (!no_need_space) {
74
+ this.buffer += ' ';
75
+ }
76
+ this.buffer += str;
77
+ }
78
+
79
+ public append(node: TexNode) {
80
+ const alignment_char = new TexNode('control', '&');
81
+ const newline_char = new TexNode('control', '\\\\');
82
+
83
+ if (node.type === 'ordgroup' && array_includes(node.args!, alignment_char)) {
84
+ // wrap the whole math formula with \begin{aligned} and \end{aligned}
85
+ const rows = array_split(node.args!, newline_char);
86
+ const data: TexNode[][] = [];
87
+ for(const row of rows) {
88
+ const cells = array_split(row, alignment_char);
89
+ data.push(cells.map(cell => new TexNode('ordgroup', '', cell)));
90
+ }
91
+ node = new TexNode('beginend', 'aligned', [], data);
92
+ }
93
+ this.queue = this.queue.concat(node.serialize());
94
+ }
95
+
96
+ protected flushQueue() {
97
+ for (let i = 0; i < this.queue.length; i++) {
98
+ this.writeBuffer(this.queue[i]);
99
+ }
100
+ this.queue = [];
101
+ }
102
+
103
+ public finalize(): string {
104
+ this.flushQueue();
105
+ return this.buffer;
106
+ }
107
+ }
108
+
109
+ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
110
+ // special hook for eq.def
111
+ if(node.eq(new TypstNode('symbol', 'eq.def'))) {
112
+ return new TexNode('binaryFunc', '\\overset', [
113
+ new TexNode('text', 'def'),
114
+ new TexNode('element', '=')
115
+ ]);
116
+ }
117
+ switch (node.type) {
118
+ case 'empty':
119
+ return new TexNode('empty', '');
120
+ case 'whitespace':
121
+ return new TexNode('whitespace', node.content);
122
+ case 'atom':
123
+ // special hook for colon
124
+ if (node.content === ':') {
125
+ return new TexNode('symbol', '\\colon');
126
+ }
127
+ return new TexNode('element', node.content);
128
+ case 'symbol':
129
+ // special hook for comma
130
+ if (node.content === 'comma') {
131
+ return new TexNode('element', ',');
132
+ }
133
+ return new TexNode('symbol', typst_token_to_tex(node.content));
134
+ case 'text':
135
+ return new TexNode('text', node.content);
136
+ case 'comment':
137
+ return new TexNode('comment', node.content);
138
+ case 'group': {
139
+ const args = node.args!.map(convert_typst_node_to_tex);
140
+ if(node.content === 'parenthesis') {
141
+ args.unshift(new TexNode('element', '('));
142
+ args.push(new TexNode('element', ')'));
143
+ }
144
+ return new TexNode('ordgroup', '', args);
145
+ }
146
+ case 'funcCall': {
147
+ if (TYPST_UNARY_FUNCTIONS.includes(node.content)) {
148
+ // special hook for lr
149
+ if (node.content === 'lr') {
150
+ const body = node.args![0];
151
+ if (body.type === 'group') {
152
+ let left_delim = body.args![0].content;
153
+ let right_delim = body.args![body.args!.length - 1].content;
154
+ left_delim = apply_escape_if_needed(left_delim);
155
+ right_delim = apply_escape_if_needed(right_delim);
156
+ return new TexNode('ordgroup', '', [
157
+ new TexNode('element', '\\left' + left_delim),
158
+ ...body.args!.slice(1, body.args!.length - 1).map(convert_typst_node_to_tex),
159
+ new TexNode('element', '\\right' + right_delim)
160
+ ]);
161
+ }
162
+ }
163
+ const command = typst_token_to_tex(node.content);
164
+ return new TexNode('unaryFunc', command, node.args!.map(convert_typst_node_to_tex));
165
+ } else if (TYPST_BINARY_FUNCTIONS.includes(node.content)) {
166
+ // special hook for root
167
+ if (node.content === 'root') {
168
+ const [degree, radicand] = node.args!;
169
+ const data: TexSqrtData = convert_typst_node_to_tex(degree);
170
+ return new TexNode('unaryFunc', '\\sqrt', [convert_typst_node_to_tex(radicand)], data);
171
+ }
172
+ // special hook for overbrace and underbrace
173
+ if (node.content === 'overbrace' || node.content === 'underbrace') {
174
+ const [body, label] = node.args!;
175
+ const base = new TexNode('unaryFunc', '\\' + node.content, [convert_typst_node_to_tex(body)]);
176
+ const script = convert_typst_node_to_tex(label);
177
+ const data = node.content === 'overbrace' ? { base, sup: script } : { base, sub: script };
178
+ return new TexNode('supsub', '', [], data);
179
+ }
180
+ const command = typst_token_to_tex(node.content);
181
+ return new TexNode('binaryFunc', command, node.args!.map(convert_typst_node_to_tex));
182
+ } else {
183
+ return new TexNode('ordgroup', '', [
184
+ new TexNode('symbol', typst_token_to_tex(node.content)),
185
+ new TexNode('element', '('),
186
+ ...node.args!.map(convert_typst_node_to_tex),
187
+ new TexNode('element', ')')
188
+ ])
189
+ }
190
+ }
191
+ case 'supsub': {
192
+ const { base, sup, sub } = node.data as TypstSupsubData;
193
+ const base_tex = convert_typst_node_to_tex(base);
194
+ let sup_tex: TexNode | undefined;
195
+ let sub_tex: TexNode | undefined;
196
+ if (sup) {
197
+ sup_tex = convert_typst_node_to_tex(sup);
198
+ }
199
+ if (sub) {
200
+ sub_tex = convert_typst_node_to_tex(sub);
201
+ }
202
+ const res = new TexNode('supsub', '', [], {
203
+ base: base_tex,
204
+ sup: sup_tex,
205
+ sub: sub_tex
206
+ });
207
+ return res;
208
+ }
209
+ case 'matrix': {
210
+ const typst_data = node.data as TypstNode[][];
211
+ const tex_data = typst_data.map(row => row.map(convert_typst_node_to_tex));
212
+ const matrix = new TexNode('beginend', 'matrix', [], tex_data);
213
+ return new TexNode('ordgroup', '', [
214
+ new TexNode('element', '\\left('),
215
+ matrix,
216
+ new TexNode('element', '\\right)')
217
+ ]);
218
+ }
219
+ case 'control': {
220
+ switch (node.content) {
221
+ case '\\':
222
+ return new TexNode('control', '\\\\');
223
+ case '&':
224
+ return new TexNode('control', '&');
225
+ default:
226
+ throw new Error('[convert_typst_node_to_tex] Unimplemented control: ' + node.content);
227
+ }
228
+ }
229
+ case 'fraction': {
230
+ const [numerator, denominator] = node.args!;
231
+ const num_tex = convert_typst_node_to_tex(numerator);
232
+ const den_tex = convert_typst_node_to_tex(denominator);
233
+ return new TexNode('binaryFunc', '\\frac', [num_tex, den_tex]);
234
+ }
235
+ default:
236
+ throw new Error('[convert_typst_node_to_tex] Unimplemented type: ' + node.type);
237
+ }
238
+ }
239
+
240
+ export function typst_token_to_tex(token: string): string {
241
+ if (/^[a-zA-Z0-9]$/.test(token)) {
242
+ return token;
243
+ } else if (token === 'thin') {
244
+ return '\\,';
245
+ } else if (reverseSymbolMap.has(token)) {
246
+ return '\\' + reverseSymbolMap.get(token)!;
247
+ }
248
+ return '\\' + token;
249
+ }
package/src/tex2typst.ts CHANGED
@@ -2,8 +2,9 @@
2
2
  * This file is the entry point for bundling the .js file for the browser.
3
3
  */
4
4
 
5
- import { tex2typst } from './index';
5
+ import { tex2typst, typst2tex } from './index';
6
6
 
7
7
  if(typeof window !== 'undefined') {
8
8
  (window as any).tex2typst = tex2typst;
9
+ (window as any).typst2tex = typst2tex;
9
10
  }