npm - tex2typst - Versions diffs - 0.3.17 → 0.3.19 - Mend

tex2typst 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +2 -2
package/dist/index.js +230 -172
package/dist/tex-parser.d.ts +0 -1
package/dist/tex-tokenizer.d.ts +4 -0
package/dist/tex2typst.min.js +11 -11
package/dist/types.d.ts +13 -7
package/dist/typst-parser.d.ts +0 -1
package/dist/typst-tokenizer.d.ts +2 -0
package/dist/typst-writer.d.ts +3 -1
package/package.json +1 -1
package/src/convert.ts +129 -69
package/src/index.ts +1 -1
package/src/map.ts +2 -0
package/src/tex-parser.ts +6 -137
package/src/tex-tokenizer.ts +138 -0
package/src/types.ts +20 -7
package/src/typst-parser.ts +1 -74
package/src/typst-tokenizer.ts +76 -0
package/src/typst-writer.ts +36 -18
package/TODO.md +0 -1
package/docs/api-reference.md +0 -64
package/tools/make-shorthand-map.py +0 -33
package/tools/make-symbol-map.py +0 -35

package/src/convert.ts CHANGED Viewed

@@ -22,9 +22,6 @@ function tex_token_to_typst(token: string): string {
         return token;
     } else if (token === '/') {
         return '\\/';
-    } else if (token === '\\|') {
-        // \| in LaTeX is double vertical bar looks like ||
-        return 'parallel';
     } else if (token === '\\\\') {
         return '\\';
     } else if (['\\$', '\\#', '\\&', '\\_'].includes(token)) {
@@ -49,40 +46,52 @@ function tex_token_to_typst(token: string): string {
 function convert_overset(node: TexNode, options: Tex2TypstOptions): TypstNode {
     const [sup, base] = node.args!;
-    const is_def = (n: TexNode): boolean => {
-        if (n.eq(new TexNode('text', 'def'))) {
-            return true;
-        }
-        // \overset{def}{=} is also considered as eq.def
-        if (n.type === 'ordgroup' && n.args!.length === 3) {
-            const [a1, a2, a3] = n.args!;
-            const d = new TexNode('element', 'd');
-            const e = new TexNode('element', 'e');
-            const f = new TexNode('element', 'f');
-            if (a1.eq(d) && a2.eq(e) && a3.eq(f)) {
+    if (options.optimize) {
+        const is_def = (n: TexNode): boolean => {
+            if (n.eq(new TexNode('text', 'def'))) {
                 return true;
             }
+            // \overset{def}{=} is also considered as eq.def
+            if (n.type === 'ordgroup' && n.args!.length === 3) {
+                const [a1, a2, a3] = n.args!;
+                const d = new TexNode('element', 'd');
+                const e = new TexNode('element', 'e');
+                const f = new TexNode('element', 'f');
+                if (a1.eq(d) && a2.eq(e) && a3.eq(f)) {
+                    return true;
+                }
+            }
+            return false;
+        };
+        const is_eq = (n: TexNode): boolean => n.eq(new TexNode('element', '='));
+        if (is_def(sup) && is_eq(base)) {
+            return new TypstNode('symbol', 'eq.def');
         }
-        return false;
-    };
-    const is_eq = (n: TexNode): boolean => n.eq(new TexNode('element', '='));
-    if (is_def(sup) && is_eq(base)) {
-        return new TypstNode('symbol', 'eq.def');
     }
     const limits_call = new TypstNode(
         'funcCall',
         'limits',
         [convert_tex_node_to_typst(base, options)]
     );
-    return new TypstNode(
-        'supsub',
-        '',
-        [],
-        {
+    return new TypstNode('supsub', '', [], {
             base: limits_call,
             sup: convert_tex_node_to_typst(sup, options),
-        }
+    });
+}
+// \underset{X}{Y} -> limits(Y)_X
+function convert_underset(node: TexNode, options: Tex2TypstOptions): TypstNode {
+    const [sub, base] = node.args!;
+    const limits_call = new TypstNode(
+        'funcCall',
+        'limits',
+        [convert_tex_node_to_typst(base, options)]
     );
+    return new TypstNode('supsub', '', [], {
+            base: limits_call,
+            sub: convert_tex_node_to_typst(sub, options),
+    });
 }
@@ -117,7 +126,7 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
         case 'supsub': {
             let { base, sup, sub } = node.data as TexSupsubData;
-            // Special logic for overbrace
+            // special hook for overbrace
             if (base && base.type === 'unaryFunc' && base.content === '\\overbrace' && sup) {
                 return new TypstNode(
                     'funcCall',
@@ -150,40 +159,61 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
             return new TypstNode('supsub', '', [], data);
         }
         case 'leftright': {
-            const [left, body, right] = node.args!;
-            // These pairs will be handled by Typst compiler by default. No need to add lr()
-            const group: TypstNode = new TypstNode(
+            const [left, _body, right] = node.args!;
+            const [typ_left, typ_body, typ_right] = node.args!.map((n) => convert_tex_node_to_typst(n, options));
+            if (options.optimize) {
+                // optimization off: "lr(bar.v.double a + 1/2 bar.v.double)"
+                // optimization on : "norm(a + 1/2)"
+                if (left.content === '\\|' && right.content === '\\|') {
+                    return new TypstNode('funcCall', 'norm', [typ_body]);
+                }
+                // These pairs will be handled by Typst compiler by default. No need to add lr()
+                if ([
+                    "[]", "()", "\\{\\}",
+                    "\\lfloor\\rfloor",
+                    "\\lceil\\rceil",
+                    "\\lfloor\\rceil",
+                ].includes(left.content + right.content)) {
+                    return new TypstNode('group', '', [typ_left, typ_body, typ_right]);
+                }
+            }
+            const group = new TypstNode(
                 'group',
                 '',
-                node.args!.map((n) => convert_tex_node_to_typst(n, options))
+                [typ_left, typ_body, typ_right]
             );
-            if ([
-                "[]", "()", "\\{\\}",
-                "\\lfloor\\rfloor",
-                "\\lceil\\rceil",
-                "\\lfloor\\rceil",
-            ].includes(left.content + right.content)) {
-                return group;
-            }
-            // "\left\{ A \right." -> "{A"
-            // "\left. A \right\}" -> "lr( A} )"
+            // "\left\{ a + \frac{1}{3} \right." -> "lr(\{ a + 1/3)"
+            // "\left. a + \frac{1}{3} \right\}" -> "lr( a + \frac{1}{3} \})"
+            // Note that: In lr(), if one side of delimiter doesn't present (i.e. derived from "\\left." or "\\right."),
+            // "(", ")", "{", "[", should be escaped with "\" to be the other side of delimiter.
+            // Simple "lr({ a+1/3)" doesn't compile in Typst.
+            const escape_curly_or_paren = function(s: string): string {
+                if (["(", ")", "{", "["].includes(s)) {
+                    return "\\" + s;
+                } else {
+                    return s;
+                }
+            };
             if (right.content === '.') {
-                group.args!.pop();
-                return group;
+                typ_left.content = escape_curly_or_paren(typ_left.content);
+                group.args = [typ_left, typ_body];
             } else if (left.content === '.') {
-                group.args!.shift();
-                return new TypstNode('funcCall', 'lr', [group]);
+                typ_right.content = escape_curly_or_paren(typ_right.content);
+                group.args = [typ_body, typ_right];
             }
-            return new TypstNode(
-                'funcCall',
-                'lr',
-                [group]
-            );
+            return new TypstNode('funcCall', 'lr', [group]);
         }
         case 'binaryFunc': {
             if (node.content === '\\overset') {
                 return convert_overset(node, options);
             }
+            if (node.content === '\\underset') {
+                return convert_underset(node, options);
+            }
             // \frac{a}{b} -> a / b
             if (node.content === '\\frac') {
                 if (options.fracToSlash) {
@@ -246,17 +276,14 @@ export function convert_tex_node_to_typst(node: TexNode, options: Tex2TypstOptio
             }
             // \operatorname{opname} -> op("opname")
             if (node.content === '\\operatorname') {
-                const text = arg0.content;
-                if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
-                    return new TypstNode('symbol', text);
-                } else {
-                    return new TypstNode(
-                        'funcCall',
-                        'op',
-                        [arg0]
-                    );
+                if (options.optimize) {
+                    const text = arg0.content;
+                    if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
+                        return new TypstNode('symbol', text);
+                    }
                 }
+                return new TypstNode('funcCall', 'op', [arg0]);
             }
             // \hspace{1cm} -> #h(1cm)
             // TODO: reverse conversion support for this
@@ -409,6 +436,8 @@ const TYPST_UNARY_FUNCTIONS: string[] = [
     'frak',
     'floor',
     'ceil',
+    'norm',
+    'limits',
 ];
 const TYPST_BINARY_FUNCTIONS: string[] = [
@@ -428,8 +457,6 @@ function apply_escape_if_needed(c: string) {
 function typst_token_to_tex(token: string): string {
     if (/^[a-zA-Z0-9]$/.test(token)) {
         return token;
-    } else if (token === 'thin') {
-        return '\\,';
     } else if (reverseSymbolMap.has(token)) {
         return '\\' + reverseSymbolMap.get(token)!;
     }
@@ -491,6 +518,8 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
                         let left_delim = apply_escape_if_needed(data.leftDelim);
                         assert(data.rightDelim !== null, "leftDelim has value but rightDelim not");
                         let right_delim = apply_escape_if_needed(data.rightDelim!);
+                        // TODO: should be TeXNode('leftright', ...)
+                        // But currently writer will output `\left |` while people commonly prefer `\left|`.
                         return new TexNode('ordgroup', '', [
                             new TexNode('element', '\\left' + left_delim),
                             ...node.args!.map(convert_typst_node_to_tex),
@@ -500,17 +529,29 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
                         return new TexNode('ordgroup', '', node.args!.map(convert_typst_node_to_tex));
                     }
                 }
+                // special hook for norm
+                // `\| a  \|` <- `norm(a)`
+                // `\left\| a + \frac{1}{3} \right\|` <- `norm(a + 1/3)`
+                if (node.content === 'norm') {
+                    const arg0 = node.args![0];
+                    const tex_node_type = node.isOverHigh() ? 'leftright' : 'ordgroup';
+                    return new TexNode(tex_node_type, '', [
+                        new TexNode('symbol', "\\|"),
+                        convert_typst_node_to_tex(arg0),
+                        new TexNode('symbol', "\\|")
+                    ]);
+                }
                 // special hook for floor, ceil
-                // Typst "floor(a) + ceil(b)" should converts to Tex "\lfloor a \rfloor + \lceil b \rceil"
+                // `\lfloor a \rfloor` <- `floor(a)`
+                // `\lceil a \rceil` <- `ceil(a)`
+                // `\left\lfloor a \right\rfloor` <- `floor(a)`
+                // `\left\lceil a \right\rceil` <- `ceil(a)`
                 if (node.content === 'floor' || node.content === 'ceil') {
-                    let left = "\\l" + node.content;
-                    let right = "\\r" + node.content;
+                    const left = "\\l" + node.content;
+                    const right = "\\r" + node.content;
                     const arg0 = node.args![0];
-                    if (arg0.isOverHigh()) {
-                        left = "\\left" + left;
-                        right = "\\right" + right;
-                    }
-                    return new TexNode('ordgroup', '', [
+                    const tex_node_type = node.isOverHigh() ? 'leftright' : 'ordgroup';
+                    return new TexNode(tex_node_type, '', [
                         new TexNode('symbol', left),
                         convert_typst_node_to_tex(arg0),
                         new TexNode('symbol', right)
@@ -552,15 +593,34 @@ export function convert_typst_node_to_tex(node: TypstNode): TexNode {
         }
         case 'supsub': {
             const { base, sup, sub } = node.data as TypstSupsubData;
-            const base_tex = convert_typst_node_to_tex(base);
             let sup_tex: TexNode | undefined;
             let sub_tex: TexNode | undefined;
             if (sup) {
                 sup_tex = convert_typst_node_to_tex(sup);
             }
             if (sub) {
                 sub_tex = convert_typst_node_to_tex(sub);
             }
+            // special hook for limits
+            // `limits(+)^a` -> `\overset{a}{+}`
+            // `limits(+)_a` -> `\underset{a}{+}`
+            // `limits(+)_a^b` -> `\overset{b}{\underset{a}{+}}`
+            if (base.eq(new TypstNode('funcCall', 'limits'))) {
+                const body_in_limits = convert_typst_node_to_tex(base.args![0]);
+                if (sup_tex !== undefined && sub_tex === undefined) {
+                    return new TexNode('binaryFunc', '\\overset', [sup_tex, body_in_limits]);
+                } else if (sup_tex === undefined && sub_tex !== undefined) {
+                    return new TexNode('binaryFunc', '\\underset', [sub_tex, body_in_limits]);
+                } else {
+                    const underset_call = new TexNode('binaryFunc', '\\underset', [sub_tex!, body_in_limits]);
+                    return new TexNode('binaryFunc', '\\overset', [sup_tex!, underset_call]);
+                }
+            }
+            const base_tex = convert_typst_node_to_tex(base);
             const res = new TexNode('supsub', '', [], {
                 base: base_tex,
                 sup: sup_tex,

package/src/index.ts CHANGED Viewed

@@ -11,11 +11,11 @@ import { shorthandMap } from "./typst-shorthands";
 export function tex2typst(tex: string, options?: Tex2TypstOptions): string {
     const opt: Tex2TypstOptions = {
         nonStrict: true,
-        preferTypstIntrinsic: true,
         preferShorthands: true,
         keepSpaces: false,
         fracToSlash: true,
         inftyToOo: false,
+        optimize: true,
         nonAsciiWrapper: "",
         customTexMacros: {}
     };

package/src/map.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 const symbolMap = new Map<string, string>([
     ['displaystyle', 'display'],
+    ['|', 'bar.v.double'],
+    ['!', '#h(-math.thin.amount)'],
     [',', 'thin'],
     [':', 'med'],
     [';', 'thick'],

package/src/tex-parser.ts CHANGED Viewed

@@ -1,53 +1,8 @@
 import { symbolMap } from "./map";
 import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
 import { assert } from "./util";
-import { JSLex, Scanner } from "./jslex";
 import { array_find } from "./generic";
-const UNARY_COMMANDS = [
-    'sqrt',
-    'text',
-    'bar',
-    'bold',
-    'boldsymbol',
-    'ddot',
-    'dot',
-    'hat',
-    'mathbb',
-    'mathbf',
-    'mathcal',
-    'mathfrak',
-    'mathit',
-    'mathrm',
-    'mathscr',
-    'mathsf',
-    'mathtt',
-    'operatorname',
-    'overbrace',
-    'overline',
-    'pmb',
-    'rm',
-    'tilde',
-    'underbrace',
-    'underline',
-    'vec',
-    'widehat',
-    'widetilde',
-    'overleftarrow',
-    'overrightarrow',
-    'hspace',
-]
-const BINARY_COMMANDS = [
-    'frac',
-    'tfrac',
-    'binom',
-    'dbinom',
-    'dfrac',
-    'tbinom',
-    'overset',
-]
+import { TEX_BINARY_COMMANDS, TEX_UNARY_COMMANDS, tokenize_tex } from "./tex-tokenizer";
 const IGNORED_COMMANDS = [
     'bigl', 'bigr',
@@ -59,9 +14,9 @@ const IGNORED_COMMANDS = [
 const EMPTY_NODE: TexNode = new TexNode('empty', '');
 function get_command_param_num(command: string): number {
-    if (UNARY_COMMANDS.includes(command)) {
+    if (TEX_UNARY_COMMANDS.includes(command)) {
         return 1;
-    } else if (BINARY_COMMANDS.includes(command)) {
+    } else if (TEX_BINARY_COMMANDS.includes(command)) {
         return 2;
     } else {
         return 0;
@@ -86,7 +41,7 @@ function eat_whitespaces(tokens: TexToken[], start: number): TexToken[] {
 function eat_parenthesis(tokens: TexToken[], start: number): TexToken | null {
     const firstToken = tokens[start];
-    if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}', '.'].includes(firstToken.value)) {
+    if (firstToken.type === TexTokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}', '.', '\\|'].includes(firstToken.value)) {
         return firstToken;
     } else if (firstToken.type === TexTokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
         return firstToken;
@@ -142,93 +97,6 @@ function find_closing_end_command(tokens: TexToken[], start: number): number {
 }
-function unescape(str: string): string {
-    const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
-    for (const char of chars) {
-        str = str.replaceAll('\\' + char, char);
-    }
-    return str;
-}
-const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
-    [
-        String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
-            const text = s.text()!;
-            const command = text.substring(0, text.indexOf('{'));
-            const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
-            return [
-                new TexToken(TexTokenType.COMMAND, command),
-                new TexToken(TexTokenType.CONTROL, '{'),
-                new TexToken(TexTokenType.TEXT, unescape(text_inside)),
-                new TexToken(TexTokenType.CONTROL, '}')
-            ]
-        }
-    ],
-    [String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
-    [String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
-    [String.raw`\\[\\,:; ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
-    [String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
-    [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
-    [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
-        const text = s.text()!;
-        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
-        const match = text.match(regex);
-        assert(match !== null);
-        const command = match![1];
-        if (BINARY_COMMANDS.includes(command.substring(1))) {
-            const arg1 = match![2].trimStart();
-            const arg2 = match![3];
-            return [
-                new TexToken(TexTokenType.COMMAND, command),
-                new TexToken(TexTokenType.ELEMENT, arg1),
-                new TexToken(TexTokenType.ELEMENT, arg2),
-            ];
-        } else {
-            s.reject();
-            return [];
-        }
-   }],
-    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
-        const text = s.text()!;
-        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
-        const match = text.match(regex);
-        assert(match !== null);
-        const command = match![1];
-        if (UNARY_COMMANDS.includes(command.substring(1))) {
-            const arg1 = match![2].trimStart();
-            return [
-                new TexToken(TexTokenType.COMMAND, command),
-                new TexToken(TexTokenType.ELEMENT, arg1),
-            ];
-        } else {
-            s.reject();
-            return [];
-        }
-    }],
-    [String.raw`\\[a-zA-Z]+`, (s) => {
-        const command = s.text()!;
-        return [ new TexToken(TexTokenType.COMMAND, command), ];
-    }],
-    // Numbers like "123", "3.14"
-    [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    // non-ASCII characters
-    [String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    [String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
-]);
-const spec = {
-    "start": rules_map
-};
-export function tokenize_tex(input: string): TexToken[] {
-    const lexer = new JSLex<TexToken>(spec);
-    return lexer.collect(input);
-}
 export class LatexParserError extends Error {
     constructor(message: string) {
         super(message);
@@ -394,6 +262,7 @@ export class LatexParser {
                     case '}':
                         throw new LatexParserError("Unmatched '}'");
                     case '\\\\':
+                    case '\\!':
                     case '\\,':
                     case '\\:':
                     case '\\;':
@@ -552,7 +421,7 @@ export class LatexParser {
         while (pos < tokens.length) {
             const whitespaceCount = eat_whitespaces(tokens, pos).length;
             pos += whitespaceCount;
             if (pos >= tokens.length || !tokens[pos].eq(LEFT_CURLY_BRACKET)) {
                 break;
             }

package/src/tex-tokenizer.ts ADDED Viewed

@@ -0,0 +1,138 @@
+import { TexToken, TexTokenType } from "./types";
+import { assert } from "./util";
+import { JSLex, Scanner } from "./jslex";
+export const TEX_UNARY_COMMANDS = [
+    'sqrt',
+    'text',
+    'bar',
+    'bold',
+    'boldsymbol',
+    'ddot',
+    'dot',
+    'hat',
+    'mathbb',
+    'mathbf',
+    'mathcal',
+    'mathfrak',
+    'mathit',
+    'mathrm',
+    'mathscr',
+    'mathsf',
+    'mathtt',
+    'operatorname',
+    'overbrace',
+    'overline',
+    'pmb',
+    'rm',
+    'tilde',
+    'underbrace',
+    'underline',
+    'vec',
+    'widehat',
+    'widetilde',
+    'overleftarrow',
+    'overrightarrow',
+    'hspace',
+]
+export const TEX_BINARY_COMMANDS = [
+    'frac',
+    'tfrac',
+    'binom',
+    'dbinom',
+    'dfrac',
+    'tbinom',
+    'overset',
+    'underset',
+]
+function unescape(str: string): string {
+    const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
+    for (const char of chars) {
+        str = str.replaceAll('\\' + char, char);
+    }
+    return str;
+}
+const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
+    [
+        String.raw`\\(text|operatorname|begin|end|hspace){.+?}`, (s) => {
+            const text = s.text()!;
+            const command = text.substring(0, text.indexOf('{'));
+            const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.CONTROL, '{'),
+                new TexToken(TexTokenType.TEXT, unescape(text_inside)),
+                new TexToken(TexTokenType.CONTROL, '}')
+            ]
+        }
+    ],
+    [String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
+    [String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
+    [String.raw`\\[\\,:;! ]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
+    [String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
+    [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
+    [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    // e.g. match `\frac13`, `\frac1 b`, `\frac a b`
+    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
+        const text = s.text()!;
+        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
+        const match = text.match(regex);
+        assert(match !== null);
+        const command = match![1];
+        if (TEX_BINARY_COMMANDS.includes(command.substring(1))) {
+            const arg1 = match![2].trimStart();
+            const arg2 = match![3];
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.ELEMENT, arg1),
+                new TexToken(TexTokenType.ELEMENT, arg2),
+            ];
+        } else {
+            s.reject();
+            return [];
+        }
+    }],
+    // e.g. match `\sqrt3`, `\sqrt a`
+    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
+        const text = s.text()!;
+        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
+        const match = text.match(regex);
+        assert(match !== null);
+        const command = match![1];
+        if (TEX_UNARY_COMMANDS.includes(command.substring(1))) {
+            const arg1 = match![2].trimStart();
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.ELEMENT, arg1),
+            ];
+        } else {
+            s.reject();
+            return [];
+        }
+    }],
+    [String.raw`\\[a-zA-Z]+`, (s) => {
+        const command = s.text()!;
+        return [ new TexToken(TexTokenType.COMMAND, command), ];
+    }],
+    // Numbers like "123", "3.14"
+    [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    // non-ASCII characters
+    [String.raw`[^\x00-\x7F]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
+]);
+const spec = {
+    "start": rules_map
+};
+export function tokenize_tex(input: string): TexToken[] {
+    const lexer = new JSLex<TexToken>(spec);
+    return lexer.collect(input);
+}