tex2typst 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/tex-parser.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { symbolMap } from "./map";
2
2
  import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
3
- import { isalpha, isdigit, assert } from "./util";
4
-
3
+ import { assert } from "./util";
4
+ import { JSLex, Scanner } from "./jslex";
5
5
 
6
6
  const UNARY_COMMANDS = [
7
7
  'sqrt',
@@ -33,6 +33,8 @@ const UNARY_COMMANDS = [
33
33
  'vec',
34
34
  'widehat',
35
35
  'widetilde',
36
+ 'overleftarrow',
37
+ 'overrightarrow',
36
38
  ]
37
39
 
38
40
  const BINARY_COMMANDS = [
@@ -95,15 +97,6 @@ function eat_primes(tokens: TexToken[], start: number): number {
95
97
  }
96
98
 
97
99
 
98
- function eat_command_name(latex: string, start: number): string {
99
- let pos = start;
100
- while (pos < latex.length && isalpha(latex[pos])) {
101
- pos += 1;
102
- }
103
- return latex.substring(start, pos);
104
- }
105
-
106
-
107
100
  function find_closing_match(tokens: TexToken[], start: number, leftToken: TexToken, rightToken: TexToken): number {
108
101
  assert(tokens[start].eq(leftToken));
109
102
  let count = 1;
@@ -141,135 +134,49 @@ function find_closing_end_command(tokens: TexToken[], start: number): number {
141
134
  return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
142
135
  }
143
136
 
144
- function find_closing_curly_bracket_char(latex: string, start: number): number {
145
- assert(latex[start] === '{');
146
- let count = 1;
147
- let pos = start + 1;
148
137
 
149
- while (count > 0) {
150
- if (pos >= latex.length) {
151
- throw new LatexParserError('Unmatched curly brackets');
152
- }
153
- if(pos + 1 < latex.length && (['\\{', '\\}'].includes(latex.substring(pos, pos + 2)))) {
154
- pos += 2;
155
- continue;
156
- }
157
- if (latex[pos] === '{') {
158
- count += 1;
159
- } else if (latex[pos] === '}') {
160
- count -= 1;
161
- }
162
- pos += 1;
138
+ function unescape(str: string): string {
139
+ const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
140
+ for (const char of chars) {
141
+ str = str.replaceAll('\\' + char, char);
163
142
  }
164
-
165
- return pos - 1;
143
+ return str;
166
144
  }
167
145
 
168
-
169
- export function tokenize(latex: string): TexToken[] {
170
- const tokens: TexToken[] = [];
171
- let pos = 0;
172
-
173
- while (pos < latex.length) {
174
- const firstChar = latex[pos];
175
- let token: TexToken;
176
- switch (firstChar) {
177
- case '%': {
178
- let newPos = pos + 1;
179
- while (newPos < latex.length && latex[newPos] !== '\n') {
180
- newPos += 1;
181
- }
182
- token = new TexToken(TexTokenType.COMMENT, latex.slice(pos + 1, newPos));
183
- pos = newPos;
184
- break;
185
- }
186
- case '{':
187
- case '}':
188
- case '_':
189
- case '^':
190
- case '&':
191
- token = new TexToken(TexTokenType.CONTROL, firstChar);
192
- pos++;
193
- break;
194
- case '\n':
195
- token = new TexToken(TexTokenType.NEWLINE, firstChar);
196
- pos++;
197
- break;
198
- case '\r': {
199
- if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
200
- token = new TexToken(TexTokenType.NEWLINE, '\n');
201
- pos += 2;
202
- } else {
203
- token = new TexToken(TexTokenType.NEWLINE, '\n');
204
- pos ++;
205
- }
206
- break;
207
- }
208
- case ' ': {
209
- let newPos = pos;
210
- while (newPos < latex.length && latex[newPos] === ' ') {
211
- newPos += 1;
212
- }
213
- token = new TexToken(TexTokenType.SPACE, latex.slice(pos, newPos));
214
- pos = newPos;
215
- break;
216
- }
217
- case '\\': {
218
- if (pos + 1 >= latex.length) {
219
- throw new LatexParserError('Expecting command name after \\');
220
- }
221
- const firstTwoChars = latex.slice(pos, pos + 2);
222
- if (['\\\\', '\\,'].includes(firstTwoChars)) {
223
- token = new TexToken(TexTokenType.CONTROL, firstTwoChars);
224
- } else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_', '\\|'].includes(firstTwoChars)) {
225
- // \| is double vertical bar, not the same as just |
226
- token = new TexToken(TexTokenType.ELEMENT, firstTwoChars);
227
- } else {
228
- const command = eat_command_name(latex, pos + 1);
229
- token = new TexToken(TexTokenType.COMMAND, '\\' + command);
230
- }
231
- pos += token.value.length;
232
- break;
233
- }
234
- default: {
235
- if (isdigit(firstChar)) {
236
- let newPos = pos;
237
- while (newPos < latex.length && isdigit(latex[newPos])) {
238
- newPos += 1;
239
- }
240
- token = new TexToken(TexTokenType.ELEMENT, latex.slice(pos, newPos));
241
- } else if (isalpha(firstChar)) {
242
- token = new TexToken(TexTokenType.ELEMENT, firstChar);
243
- } else if ('+-*/=\'<>!.,;:?()[]|'.includes(firstChar)) {
244
- token = new TexToken(TexTokenType.ELEMENT, firstChar)
245
- } else {
246
- token = new TexToken(TexTokenType.UNKNOWN, firstChar);
247
- }
248
- pos += token.value.length;
249
- }
250
- }
251
-
252
- tokens.push(token);
253
-
254
- if (token.type === TexTokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
255
- if (pos >= latex.length || latex[pos] !== '{') {
256
- throw new LatexParserError(`No content for ${token.value} command`);
257
- }
258
- tokens.push(new TexToken(TexTokenType.CONTROL, '{'));
259
- const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
260
- pos++;
261
- let textInside = latex.slice(pos, posClosingBracket);
262
- // replace all escape characters with their actual characters
263
- const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
264
- for (const char of chars) {
265
- textInside = textInside.replaceAll('\\' + char, char);
266
- }
267
- tokens.push(new TexToken(TexTokenType.TEXT, textInside));
268
- tokens.push(new TexToken(TexTokenType.CONTROL, '}'));
269
- pos = posClosingBracket + 1;
146
+ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
147
+ [
148
+ String.raw`\\(text|operatorname|begin|end){.+?}`, (s) => {
149
+ const text = s.text()!;
150
+ const command = text.substring(0, text.indexOf('{'));
151
+ const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
152
+ return [
153
+ new TexToken(TexTokenType.COMMAND, command),
154
+ new TexToken(TexTokenType.CONTROL, '{'),
155
+ new TexToken(TexTokenType.TEXT, unescape(text_inside)),
156
+ new TexToken(TexTokenType.CONTROL, '}')
157
+ ]
270
158
  }
271
- }
272
- return tokens;
159
+ ],
160
+ [String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
161
+ [String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
162
+ [String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
163
+ [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
164
+ [String.raw`\\[\\,]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
165
+ [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
166
+ [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
167
+ [String.raw`[0-9]+`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
168
+ [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
169
+ [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
170
+ [String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
171
+ ]);
172
+
173
+ const spec = {
174
+ "start": rules_map
175
+ };
176
+
177
+ export function tokenize_tex(input: string): TexToken[] {
178
+ const lexer = new JSLex<TexToken>(spec);
179
+ return lexer.collect(input);
273
180
  }
274
181
 
275
182
 
@@ -633,7 +540,7 @@ function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: s
633
540
  let out_tokens: TexToken[] = [];
634
541
  for (const token of tokens) {
635
542
  if (token.type === TexTokenType.COMMAND && customTexMacros[token.value]) {
636
- const expanded_tokens = tokenize(customTexMacros[token.value]);
543
+ const expanded_tokens = tokenize_tex(customTexMacros[token.value]);
637
544
  out_tokens = out_tokens.concat(expanded_tokens);
638
545
  } else {
639
546
  out_tokens.push(token);
@@ -644,7 +551,7 @@ function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: s
644
551
 
645
552
  export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
646
553
  const parser = new LatexParser();
647
- let tokens = tokenize(tex);
554
+ let tokens = tokenize_tex(tex);
648
555
  tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
649
556
  tokens = passExpandCustomTexMacros(tokens, customTexMacros);
650
557
  return parser.parse(tokens);
package/src/types.ts CHANGED
@@ -352,7 +352,7 @@ export type TypstArrayData = TypstNode[][];
352
352
  type TypstNodeType = 'atom' | 'symbol' | 'text' | 'control' | 'comment' | 'whitespace'
353
353
  | 'empty' | 'group' | 'supsub' | 'funcCall' | 'fraction' | 'align' | 'matrix' | 'unknown';
354
354
 
355
- export type TypstPrimitiveValue = string | boolean | null;
355
+ export type TypstPrimitiveValue = string | boolean | null | TypstToken;
356
356
  export type TypstNamedParams = { [key: string]: TypstPrimitiveValue };
357
357
 
358
358
  // #none
@@ -389,8 +389,10 @@ export class TypstNode {
389
389
  export interface Tex2TypstOptions {
390
390
  nonStrict?: boolean; // default is true
391
391
  preferTypstIntrinsic?: boolean; // default is true,
392
+ preferShorthands?: boolean; // default is true
392
393
  keepSpaces?: boolean; // default is false
393
394
  fracToSlash?: boolean; // default is true
395
+ inftyToOo?: boolean; // default is false
394
396
  customTexMacros?: { [key: string]: string };
395
397
  // TODO: custom typst functions
396
398
  }
@@ -1,7 +1,14 @@
1
1
 
2
2
  import { array_find } from "./generic";
3
3
  import { TYPST_NONE, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
4
- import { assert, isalpha, isdigit } from "./util";
4
+ import { assert, isalpha } from "./util";
5
+ import { reverseShorthandMap } from "./typst-shorthands";
6
+ import { JSLex, Scanner } from "./jslex";
7
+
8
+
9
+ const TYPST_EMPTY_NODE = new TypstNode('empty', '');
10
+
11
+ const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
5
12
 
6
13
  // TODO: In Typst, y' ' is not the same as y''.
7
14
  // The parser should be able to parse the former correctly.
@@ -14,137 +21,76 @@ function eat_primes(tokens: TypstToken[], start: number): number {
14
21
  }
15
22
 
16
23
 
17
- function eat_identifier_name(typst: string, start: number): string {
18
- let pos = start;
19
- while (pos < typst.length && (isalpha(typst[pos]) || (typst[pos] === '.'))) {
20
- pos += 1;
21
- }
22
- return typst.substring(start, pos);
24
+ function generate_regex_for_shorthands(): string {
25
+ const regex_list = TYPST_SHORTHANDS.map((s) => {
26
+ s = s.replaceAll('|', '\\|');
27
+ s = s.replaceAll('.', '\\.');
28
+ s = s.replaceAll('[', '\\[');
29
+ s = s.replaceAll(']', '\\]');
30
+ return s;
31
+ });
32
+ return `(${regex_list.join('|')})`;
23
33
  }
24
34
 
25
35
 
26
- const TYPST_EMPTY_NODE = new TypstNode('empty', '');
27
-
28
-
29
- export function tokenize_typst(typst: string): TypstToken[] {
30
- const tokens: TypstToken[] = [];
31
-
32
- let pos = 0;
33
-
34
- while (pos < typst.length) {
35
- const firstChar = typst[pos];
36
- let token: TypstToken;
37
- switch (firstChar) {
38
- case '_':
39
- case '^':
40
- case '&':
41
- token = new TypstToken(TypstTokenType.CONTROL, firstChar);
42
- pos++;
43
- break;
44
- case '\n':
45
- token = new TypstToken(TypstTokenType.NEWLINE, firstChar);
46
- pos++;
47
- break;
48
- case '\r': {
49
- if (pos + 1 < typst.length && typst[pos + 1] === '\n') {
50
- token = new TypstToken(TypstTokenType.NEWLINE, '\n');
51
- pos += 2;
52
- } else {
53
- token = new TypstToken(TypstTokenType.NEWLINE, '\n');
54
- pos++;
55
- }
56
- break;
57
- }
58
- case ' ': {
59
- let newPos = pos;
60
- while (newPos < typst.length && typst[newPos] === ' ') {
61
- newPos++;
62
- }
63
- token = new TypstToken(TypstTokenType.SPACE, typst.substring(pos, newPos));
64
- pos = newPos;
65
- break;
66
- }
67
- case '/': {
68
- if (pos < typst.length && typst[pos + 1] === '/') {
69
- let newPos = pos + 2;
70
- while (newPos < typst.length && typst[newPos] !== '\n') {
71
- newPos++;
72
- }
73
- token = new TypstToken(TypstTokenType.COMMENT, typst.slice(pos + 2, newPos));
74
- pos = newPos;
75
- } else {
76
- token = new TypstToken(TypstTokenType.ELEMENT, '/');
77
- pos++;
78
- }
79
- break;
80
- }
81
- case '\\': {
82
- if (pos + 1 >= typst.length) {
83
- throw new Error('Expecting a character after \\');
84
- }
85
- const firstTwoChars = typst.substring(pos, pos + 2);
86
- if (['\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
87
- token = new TypstToken(TypstTokenType.ELEMENT, firstTwoChars);
88
- pos += 2;
89
- } else if (['\\\n', '\\ '].includes(firstTwoChars)) {
90
- token = new TypstToken(TypstTokenType.CONTROL, '\\');
91
- pos += 1;
92
- } else {
93
- // this backslash is dummy and will be ignored in later stages
94
- token = new TypstToken(TypstTokenType.CONTROL, '');
95
- pos++;
96
- }
97
- break;
98
- }
99
- case '"': {
100
- let newPos = pos + 1;
101
- while (newPos < typst.length) {
102
- if (typst[newPos] === '"' && typst[newPos - 1] !== '\\') {
103
- break;
104
- }
105
- newPos++;
106
- }
107
- let text = typst.substring(pos + 1, newPos);
108
- // replace all escape characters with their actual characters
109
- const chars = ['"', '\\'];
110
- for (const char of chars) {
111
- text = text.replaceAll('\\' + char, char);
112
- }
113
- token = new TypstToken(TypstTokenType.TEXT, text);
114
- pos = newPos + 1;
115
- break;
116
- }
117
- default: {
118
- if (isdigit(firstChar)) {
119
- let newPos = pos;
120
- while (newPos < typst.length && isdigit(typst[newPos])) {
121
- newPos += 1;
122
- }
123
- if(newPos < typst.length && typst[newPos] === '.') {
124
- newPos += 1;
125
- while (newPos < typst.length && isdigit(typst[newPos])) {
126
- newPos += 1;
127
- }
128
- }
129
- token = new TypstToken(TypstTokenType.ELEMENT, typst.slice(pos, newPos));
130
- } else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
131
- token = new TypstToken(TypstTokenType.ELEMENT, firstChar)
132
- } else if (isalpha(firstChar)) {
133
- const identifier = eat_identifier_name(typst, pos);
134
- const _type = identifier.length === 1 ? TypstTokenType.ELEMENT : TypstTokenType.SYMBOL;
135
- token = new TypstToken(_type, identifier);
136
- } else {
137
- token = new TypstToken(TypstTokenType.ELEMENT, firstChar);
138
- }
139
- pos += token.value.length;
140
- }
36
+ const REGEX_SHORTHANDS = generate_regex_for_shorthands();
37
+
38
+ const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
39
+ [String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
40
+ [String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
41
+ [String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
42
+ [String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
43
+ [String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
44
+ [String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
45
+ [String.raw`\\\n`, (s) => {
46
+ return [
47
+ new TypstToken(TypstTokenType.CONTROL, "\\"),
48
+ new TypstToken(TypstTokenType.NEWLINE, "\n"),
49
+ ]
50
+ }],
51
+ [String.raw`\\\s`, (s) => {
52
+ return [
53
+ new TypstToken(TypstTokenType.CONTROL, "\\"),
54
+ new TypstToken(TypstTokenType.SPACE, " "),
55
+ ]
56
+ }],
57
+ // this backslash is dummy and will be ignored in later stages
58
+ [String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
59
+ [
60
+ String.raw`"([^"]|(\\"))*"`,
61
+ (s) => {
62
+ const text = s.text()!.substring(1, s.text()!.length - 1);
63
+ // replace all escape characters with their actual characters
64
+ text.replaceAll('\\"', '"');
65
+ return new TypstToken(TypstTokenType.TEXT, text);
141
66
  }
142
- tokens.push(token);
143
- }
144
-
145
- return tokens;
67
+ ],
68
+ [
69
+ REGEX_SHORTHANDS,
70
+ (s) => {
71
+ const shorthand = s.text()!;
72
+ const symbol = reverseShorthandMap.get(shorthand)!;
73
+ return new TypstToken(TypstTokenType.SYMBOL, symbol);
74
+ }
75
+ ],
76
+ [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
77
+ [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
78
+ [String.raw`[a-zA-Z\.]+`, (s) => {
79
+ return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
80
+ }],
81
+ [String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
82
+ ]);
83
+
84
+ const spec = {
85
+ "start": rules_map
86
+ };
87
+
88
+ export function tokenize_typst(input: string): TypstToken[] {
89
+ const lexer = new JSLex<TypstToken>(spec);
90
+ return lexer.collect(input);
146
91
  }
147
92
 
93
+
148
94
  function find_closing_match(tokens: TypstToken[], start: number): number {
149
95
  assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
150
96
  let count = 1;
@@ -0,0 +1,51 @@
1
+ const shorthandMap = new Map<string, string>([
2
+ ['arrow.l.r.double.long', '<==>'],
3
+ ['arrow.l.r.long', '<-->'],
4
+ ['arrow.r.bar', '|->'],
5
+ ['arrow.r.double.bar', '|=>'],
6
+ ['arrow.r.double.long', '==>'],
7
+ ['arrow.r.long', '-->'],
8
+ ['arrow.r.long.squiggly', '~~>'],
9
+ ['arrow.r.tail', '>->'],
10
+ ['arrow.r.twohead', '->>'],
11
+ ['arrow.l.double.long', '<=='],
12
+ ['arrow.l.long', '<--'],
13
+ ['arrow.l.long.squiggly', '<~~'],
14
+ ['arrow.l.tail', '<-<'],
15
+ ['arrow.l.twohead', '<<-'],
16
+ ['arrow.l.r', '<->'],
17
+ ['arrow.l.r.double', '<=>'],
18
+ ['colon.double.eq', '::='],
19
+ ['dots.h', '...'],
20
+ ['gt.triple', '>>>'],
21
+ ['lt.triple', '<<<'],
22
+ ['arrow.r', '->'],
23
+ ['arrow.r.double', '=>'],
24
+ ['arrow.r.squiggly', '~>'],
25
+ ['arrow.l', '<-'],
26
+ ['arrow.l.squiggly', '<~'],
27
+ ['bar.v.double', '||'],
28
+ ['bracket.l.double', '[|'],
29
+ ['bracket.r.double', '|]'],
30
+ ['colon.eq', ':='],
31
+ ['eq.colon', '=:'],
32
+ ['eq.not', '!='],
33
+ ['gt.double', '>>'],
34
+ ['gt.eq', '>='],
35
+ ['lt.double', '<<'],
36
+ ['lt.eq', '<='],
37
+ ['ast.op', '*'],
38
+ ['minus', '-'],
39
+ ['tilde.op', '~'],
40
+ ]);
41
+
42
+
43
+ const reverseShorthandMap = new Map<string, string>();
44
+ for (const [key, value] of shorthandMap.entries()) {
45
+ // filter out single character values ('-', '~', '*')
46
+ if(value.length > 1) {
47
+ reverseShorthandMap.set(value, key);
48
+ }
49
+ }
50
+
51
+ export { shorthandMap, reverseShorthandMap };
@@ -1,24 +1,10 @@
1
1
  import { TexNode, TypstNode, TypstPrimitiveValue, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
2
-
3
-
4
- // symbols that are supported by Typst but not by KaTeX
5
- export const TYPST_INTRINSIC_SYMBOLS = [
6
- 'dim',
7
- 'id',
8
- 'im',
9
- 'mod',
10
- 'Pr',
11
- 'sech',
12
- 'csch',
13
- // 'sgn
14
- ];
15
-
2
+ import { shorthandMap } from "./typst-shorthands";
16
3
 
17
4
  function is_delimiter(c: TypstNode): boolean {
18
5
  return c.type === 'atom' && ['(', ')', '[', ']', '{', '}', '|', '⌊', '⌋', '⌈', '⌉'].includes(c.content);
19
6
  }
20
7
 
21
-
22
8
  const TYPST_LEFT_PARENTHESIS: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '(');
23
9
  const TYPST_RIGHT_PARENTHESIS: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ')');
24
10
  const TYPST_COMMA: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ',');
@@ -35,6 +21,8 @@ function typst_primitive_to_string(value: TypstPrimitiveValue) {
35
21
  default:
36
22
  if (value === null) {
37
23
  return '#none';
24
+ } else if (value instanceof TypstToken) {
25
+ return value.toString();
38
26
  }
39
27
  throw new TypstWriterError(`Invalid primitive value: ${value}`, value);
40
28
  }
@@ -50,20 +38,29 @@ export class TypstWriterError extends Error {
50
38
  }
51
39
  }
52
40
 
41
+ export interface TypstWriterOptions {
42
+ nonStrict: boolean;
43
+ preferShorthands: boolean;
44
+ keepSpaces: boolean;
45
+ inftyToOo: boolean;
46
+ }
47
+
53
48
  export class TypstWriter {
54
49
  private nonStrict: boolean;
55
- private preferTypstIntrinsic: boolean;
50
+ private preferShorthands: boolean;
56
51
  private keepSpaces: boolean;
52
+ private inftyToOo: boolean;
57
53
 
58
54
  protected buffer: string = "";
59
55
  protected queue: TypstToken[] = [];
60
56
 
61
57
  private insideFunctionDepth = 0;
62
58
 
63
- constructor(nonStrict: boolean, preferTypstIntrinsic: boolean, keepSpaces: boolean) {
64
- this.nonStrict = nonStrict;
65
- this.preferTypstIntrinsic = preferTypstIntrinsic;
66
- this.keepSpaces = keepSpaces;
59
+ constructor(opt: TypstWriterOptions) {
60
+ this.nonStrict = opt.nonStrict;
61
+ this.preferShorthands = opt.preferShorthands;
62
+ this.keepSpaces = opt.keepSpaces;
63
+ this.inftyToOo = opt.inftyToOo;
67
64
  }
68
65
 
69
66
 
@@ -123,9 +120,19 @@ export class TypstWriter {
123
120
  }
124
121
  break;
125
122
  }
126
- case 'symbol':
127
- this.queue.push(new TypstToken(TypstTokenType.SYMBOL, node.content));
123
+ case 'symbol': {
124
+ let content = node.content;
125
+ if(this.preferShorthands) {
126
+ if (shorthandMap.has(content)) {
127
+ content = shorthandMap.get(content)!;
128
+ }
129
+ }
130
+ if (this.inftyToOo && content === 'infinity') {
131
+ content = 'oo';
132
+ }
133
+ this.queue.push(new TypstToken(TypstTokenType.SYMBOL, content));
128
134
  break;
135
+ }
129
136
  case 'text':
130
137
  this.queue.push(new TypstToken(TypstTokenType.TEXT, node.content));
131
138
  break;
@@ -0,0 +1,33 @@
1
+ import urllib.request
2
+ import html
3
+ from bs4 import BeautifulSoup
4
+
5
+
6
+ if __name__ == '__main__':
7
+ shorthand_map = []
8
+
9
+
10
+ url = "https://typst.app/docs/reference/symbols/"
11
+ with urllib.request.urlopen(url) as response:
12
+ html_text = response.read().decode('utf-8')
13
+
14
+ soup = BeautifulSoup(html_text, 'html.parser')
15
+
16
+ # <ul class="symbol-grid">
17
+ ul_list = soup.find_all('ul', class_='symbol-grid')
18
+ # ul_shorthands_markup = ul_list[0]
19
+ ul_shorthands_math = ul_list[1]
20
+
21
+ li_list = ul_shorthands_math.find_all('li')
22
+ for li in li_list:
23
+ # e.g. <li id="symbol-arrow.r" data-math-shorthand="-&gt;"><button>...</button></li>
24
+ # ==> typst = "arrow.r"
25
+ # ==> shorthand = "->"
26
+ typst = li['id'][7:]
27
+ shorthand = html.unescape(li['data-math-shorthand'])
28
+ shorthand_map.append((typst, shorthand))
29
+
30
+ # Sort by length of shorthand, order from longest to shortest
31
+ shorthand_map.sort(key=lambda x: len(x[1]), reverse=True)
32
+ for typst, shorthand in shorthand_map:
33
+ print(f"['{typst}', '{shorthand}'],")
@@ -1,12 +1,13 @@
1
- import requests
1
+ import urllib.request
2
2
  from bs4 import BeautifulSoup
3
3
 
4
-
5
4
  if __name__ == '__main__':
6
5
  symbol_map = {}
7
6
 
8
7
  url = "https://typst.app/docs/reference/symbols/sym/"
9
- html_text = requests.get(url).text
8
+ with urllib.request.urlopen(url) as response:
9
+ html_text = response.read().decode('utf-8')
10
+
10
11
  soup = BeautifulSoup(html_text, 'html.parser')
11
12
  # <ul class="symbol-grid">
12
13
  ul = soup.find('ul', class_='symbol-grid')