npm - tex2typst - Versions diffs - 0.3.6 → 0.3.8 - Mend

tex2typst 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/src/tex-parser.ts CHANGED Viewed

@@ -48,7 +48,12 @@ const BINARY_COMMANDS = [
     'overset',
 ]
+const IGNORED_COMMANDS = [
+    'bigl', 'bigr',
+    'biggl', 'biggr',
+    'Bigl', 'Bigr',
+    'Biggl', 'Biggr',
+];
 const EMPTY_NODE: TexNode = new TexNode('empty', '');
@@ -164,7 +169,46 @@ const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[
     [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
     [String.raw`\\[\\,:;]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
     [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
-    [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
+    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`, (s) => {
+        const text = s.text()!;
+        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])\s*([0-9a-zA-Z])`);
+        const match = text.match(regex);
+        assert(match !== null);
+        const command = match![1];
+        if (BINARY_COMMANDS.includes(command.substring(1))) {
+            const arg1 = match![2].trimStart();
+            const arg2 = match![3];
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.ELEMENT, arg1),
+                new TexToken(TexTokenType.ELEMENT, arg2),
+            ];
+        } else {
+            s.reject();
+            return [];
+        }
+   }],
+    [String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`, (s) => {
+        const text = s.text()!;
+        const regex = RegExp(String.raw`(\\[a-zA-Z]+)(\s*\d|\s+[a-zA-Z])`);
+        const match = text.match(regex);
+        assert(match !== null);
+        const command = match![1];
+        if (UNARY_COMMANDS.includes(command.substring(1))) {
+            const arg1 = match![2].trimStart();
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.ELEMENT, arg1),
+            ];
+        } else {
+            s.reject();
+            return [];
+        }
+    }],
+    [String.raw`\\[a-zA-Z]+`, (s) => {
+        const command = s.text()!;
+        return [ new TexToken(TexTokenType.COMMAND, command), ];
+    }],
     [String.raw`[0-9]+`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
     [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
     [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
@@ -296,6 +340,9 @@ export class LatexParser {
     }
     parseNextExprWithoutSupSub(tokens: TexToken[], start: number): ParseResult {
+        if (start >= tokens.length) {
+            return [EMPTY_NODE, start];
+        }
         const firstToken = tokens[start];
         switch (firstToken.type) {
             case TexTokenType.ELEMENT:
@@ -308,6 +355,10 @@ export class LatexParser {
             case TexTokenType.NEWLINE:
                 return [new TexNode('whitespace', firstToken.value), start + 1];
             case TexTokenType.COMMAND:
+                const commandName = firstToken.value.slice(1);
+                if (IGNORED_COMMANDS.includes(commandName)) {
+                    return this.parseNextExprWithoutSupSub(tokens, start + 1);
+                }
                 if (firstToken.eq(BEGIN_COMMAND)) {
                     return this.parseBeginEndExpr(tokens, start);
                 } else if (firstToken.eq(LEFT_COMMAND)) {
@@ -376,7 +427,7 @@ export class LatexParser {
                         throw new LatexParserError('No matching right square bracket for [');
                     }
                     const [exponent, _] = this.parseGroup(tokens, posLeftSquareBracket + 1, posRightSquareBracket);
-                    const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
+                    const [arg1, newPos] = this.parseNextArg(tokens, posRightSquareBracket + 1);
                     return [new TexNode('unaryFunc', command, [arg1], exponent), newPos];
                 } else if (command === '\\text') {
                     if (pos + 2 >= tokens.length) {
@@ -388,12 +439,12 @@ export class LatexParser {
                     const text = tokens[pos + 1].value;
                     return [new TexNode('text', text), pos + 3];
                 }
-                let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
+                let [arg1, newPos] = this.parseNextArg(tokens, pos);
                 return [new TexNode('unaryFunc', command, [arg1]), newPos];
             }
             case 2: {
-                const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
-                const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
+                const [arg1, pos1] = this.parseNextArg(tokens, pos);
+                const [arg2, pos2] = this.parseNextArg(tokens, pos1);
                 return [new TexNode('binaryFunc', command, [arg1, arg2]), pos2];
             }
             default:
@@ -401,6 +452,30 @@ export class LatexParser {
         }
     }
+    /*
+    Extract a non-space argument from the token stream.
+    So that `\frac{12} 3` is parsed as
+        TexCommand{ content: '\frac', args: ['12', '3'] }
+        rather than
+        TexCommand{ content: '\frac', args: ['12', ' '] }, TexElement{ content: '3' }
+    */
+    parseNextArg(tokens: TexToken[], start: number): ParseResult {
+        let pos = start;
+        let arg: TexNode | null = null;
+        while (pos < tokens.length) {
+            let node: TexNode;
+            [node, pos] = this.parseNextExprWithoutSupSub(tokens, pos);
+            if (node.type !== 'whitespace') {
+                arg = node;
+                break;
+            }
+        }
+        if (arg === null) {
+            throw new LatexParserError('Expecting argument but token stream ended');
+        }
+        return [arg, pos];
+    }
     parseLeftRightExpr(tokens: TexToken[], start: number): ParseResult {
         assert(tokens[start].eq(LEFT_COMMAND));

package/src/types.ts CHANGED Viewed

@@ -348,6 +348,10 @@ export interface TypstSupsubData {
 }
 export type TypstArrayData = TypstNode[][];
+export interface TypstLrData {
+    leftDelim: string | null;
+    rightDelim: string | null;
+}
 type TypstNodeType = 'atom' | 'symbol' | 'text' | 'control' | 'comment' | 'whitespace'
             | 'empty' | 'group' | 'supsub' | 'funcCall' | 'fraction' | 'align' | 'matrix' | 'cases' | 'unknown';
@@ -364,12 +368,12 @@ export class TypstNode {
     type: TypstNodeType;
     content: string;
     args?: TypstNode[];
-    data?: TypstSupsubData | TypstArrayData;
+    data?: TypstSupsubData | TypstArrayData | TypstLrData;
     // Some Typst functions accept additional options. e.g. mat() has option "delim", op() has option "limits"
     options?: TypstNamedParams;
     constructor(type: TypstNodeType, content: string, args?: TypstNode[],
-            data?: TypstSupsubData | TypstArrayData) {
+            data?: TypstSupsubData | TypstArrayData| TypstLrData) {
         this.type = type;
         this.content = content;
         this.args = args;

package/src/typst-parser.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { array_find } from "./generic";
-import { TYPST_NONE, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
+import { TYPST_NONE, TypstLrData, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
 import { assert, isalpha } from "./util";
 import { reverseShorthandMap } from "./typst-shorthands";
 import { JSLex, Scanner } from "./jslex";
@@ -91,8 +91,9 @@ export function tokenize_typst(input: string): TypstToken[] {
 }
-function find_closing_match(tokens: TypstToken[], start: number): number {
-    assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
+function _find_closing_match(tokens: TypstToken[], start: number,
+        leftBrackets: TypstToken[], rightBrackets: TypstToken[]): number {
+    assert(tokens[start].isOneOf(leftBrackets));
     let count = 1;
     let pos = start + 1;
@@ -100,10 +101,10 @@ function find_closing_match(tokens: TypstToken[], start: number): number {
         if (pos >= tokens.length) {
             throw new Error('Unmatched brackets');
         }
-        if (tokens[pos].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET])) {
-            count += 1;
-        } else if (tokens[pos].isOneOf([RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET])) {
+        if (tokens[pos].isOneOf(rightBrackets)) {
             count -= 1;
+        }else if (tokens[pos].isOneOf(leftBrackets)) {
+            count += 1;
         }
         pos += 1;
     }
@@ -111,6 +112,25 @@ function find_closing_match(tokens: TypstToken[], start: number): number {
     return pos - 1;
 }
+function find_closing_match(tokens: TypstToken[], start: number): number {
+    return _find_closing_match(
+        tokens,
+        start,
+        [LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET],
+        [RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET]
+    );
+}
+function find_closing_delim(tokens: TypstToken[], start: number): number {
+    return _find_closing_match(
+        tokens,
+        start,
+        [LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET, VERTICAL_BAR],
+        [RIGHT_PARENTHESES, RIGHT_BRACKET, RIGHT_CURLY_BRACKET, VERTICAL_BAR]
+    );
+}
 function find_closing_parenthesis(nodes: TypstNode[], start: number): number {
     const left_parenthesis = new TypstNode('atom', '(');
@@ -261,6 +281,7 @@ const LEFT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '[');
 const RIGHT_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ']');
 const LEFT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '{');
 const RIGHT_CURLY_BRACKET: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '}');
+const VERTICAL_BAR = new TypstToken(TypstTokenType.ELEMENT, '|');
 const COMMA = new TypstToken(TypstTokenType.ELEMENT, ',');
 const SEMICOLON = new TypstToken(TypstTokenType.ELEMENT, ';');
 const SINGLE_SPACE = new TypstToken(TypstTokenType.SPACE, ' ');
@@ -389,9 +410,13 @@ export class TypstParser {
                     casesNode.setOptions(named_params);
                     return [casesNode, newPos];
                 }
+                if (firstToken.value === 'lr') {
+                    const [args, newPos, lrData] = this.parseLrArguments(tokens, start + 1);
+                    const func_call = new TypstNode('funcCall', firstToken.value, args, lrData);
+                    return [func_call, newPos];
+                }
                 const [args, newPos] = this.parseArguments(tokens, start + 1);
-                const func_call = new TypstNode('funcCall', firstToken.value);
-                func_call.args = args;
+                const func_call = new TypstNode('funcCall', firstToken.value, args);
                 return [func_call, newPos];
             }
         }
@@ -405,6 +430,28 @@ export class TypstParser {
         return [this.parseCommaSeparatedArguments(tokens, start + 1, end), end + 1];
     }
+    // start: the position of the left parentheses
+    parseLrArguments(tokens: TypstToken[], start: number): [TypstNode[], number, TypstLrData] {
+        if (tokens[start + 1].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET, VERTICAL_BAR])) {
+            const end = find_closing_match(tokens, start);
+            const inner_start = start + 1;
+            const inner_end = find_closing_delim(tokens, inner_start);
+            const inner_args= this.parseCommaSeparatedArguments(tokens, inner_start + 1, inner_end);
+            return [
+                inner_args,
+                end + 1,
+                {leftDelim: tokens[inner_start].value, rightDelim: tokens[inner_end].value} as TypstLrData
+            ];
+        } else {
+            const [args, end] = this.parseArguments(tokens, start);
+            return [
+                args,
+                end,
+                {leftDelim: null, rightDelim: null} as TypstLrData,
+            ];
+        }
+    }
     // start: the position of the left parentheses
     parseGroupsOfArguments(tokens: TypstToken[], start: number, newline_token = SEMICOLON): [TypstNode[][], TypstNamedParams, number] {
         const end = find_closing_match(tokens, start);
@@ -472,7 +519,7 @@ export class TypstParser {
                 pos = next_stop + 1;
             }
         }
         return [matrix, named_params, end + 1];
     }
@@ -481,8 +528,7 @@ export class TypstParser {
         const args: TypstNode[] = [];
         let pos = start;
         while (pos < end) {
-            let arg = new TypstNode('group', '', []);
+            let nodes: TypstNode[] = [];
             while(pos < end) {
                 if(tokens[pos].eq(COMMA)) {
                     pos += 1;
@@ -493,14 +539,18 @@ export class TypstParser {
                 }
                 const [argItem, newPos] = this.parseNextExpr(tokens, pos);
                 pos = newPos;
-                arg.args!.push(argItem);
+                nodes.push(argItem);
             }
-            if(arg.args!.length === 0) {
+            let arg: TypstNode;
+            if (nodes.length === 0) {
                 arg = TYPST_EMPTY_NODE;
-            } else if (arg.args!.length === 1) {
-                arg = arg.args![0];
+            } else if (nodes.length === 1) {
+                arg = nodes[0];
+            } else {
+                arg = process_operators(nodes);
             }
             args.push(arg);
         }
         return args;

package/src/typst-writer.ts CHANGED Viewed

@@ -84,8 +84,6 @@ export class TypstWriter {
         no_need_space ||= /^[_^,;!]$/.test(str);
         // putting a prime
         no_need_space ||= str === "'";
-        // continue a number
-        no_need_space ||= /[0-9]$/.test(this.buffer) && /^[0-9]/.test(str);
         // leading sign. e.g. produce "+1" instead of " +1"
         no_need_space ||= /[\(\[{]\s*(-|\+)$/.test(this.buffer) || this.buffer === "-" || this.buffer === "+";
         // new line

package/tools/make-symbol-map.py CHANGED Viewed

@@ -26,5 +26,10 @@ if __name__ == '__main__':
             # We only keep the first one
             if latex not in symbol_map:
                 symbol_map[latex] = typst
-                # print(f"    ['{latex[1:]}', '{typst}'],")
-                print(f'{latex[1:]} = "{typst}"')
+    # sort the pairs with alphabetical order of latex
+    sorted_keys = sorted(list(symbol_map.keys()), key=str.lower)
+    sorted_symbol_map = [(key, symbol_map[key]) for key in sorted_keys]
+    for latex, typst in sorted_symbol_map:
+        print(f"    ['{latex[1:]}', '{typst}'],")
+        # print(f'{latex[1:]} = "{typst}"')