npm - tex2typst - Versions diffs - 0.2.6 → 0.2.8 - Mend

tex2typst 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/src/parser.ts CHANGED Viewed

@@ -1,4 +1,5 @@
-import { TexNode, TexSupsubData } from "./types";
+import { symbolMap } from "./map";
+import { TexNode, TexSupsubData, Token, TokenType } from "./types";
 const UNARY_COMMANDS = [
@@ -42,7 +43,7 @@ const BINARY_COMMANDS = [
     'tbinom',
 ]
-const EMPTY_NODE = { 'type': 'empty', 'content': '' }
+const EMPTY_NODE: TexNode = { type: 'empty', content: '' };
 function assert(condition: boolean, message: string = ''): void {
     if (!condition) {
@@ -60,8 +61,8 @@ function get_command_param_num(command: string): number {
     }
 }
-const LEFT_CURLY_BRACKET: Token = {type: 'control', value: '{'};
-const RIGHT_CURLY_BRACKET: Token = {type: 'control', value: '}'};
+const LEFT_CURLY_BRACKET: Token = {type: TokenType.CONTROL, value: '{'};
+const RIGHT_CURLY_BRACKET: Token = {type: TokenType.CONTROL, value: '}'};
 function find_closing_curly_bracket(tokens: Token[], start: number): number {
     assert(token_eq(tokens[start], LEFT_CURLY_BRACKET));
@@ -83,8 +84,8 @@ function find_closing_curly_bracket(tokens: Token[], start: number): number {
     return pos - 1;
 }
-const LEFT_SQUARE_BRACKET: Token = {type: 'element', value: '['};
-const RIGHT_SQUARE_BRACKET: Token = {type: 'element', value: ']'};
+const LEFT_SQUARE_BRACKET: Token = {type: TokenType.ELEMENT, value: '['};
+const RIGHT_SQUARE_BRACKET: Token = {type: TokenType.ELEMENT, value: ']'};
 function find_closing_square_bracket(tokens: Token[], start: number): number {
     assert(token_eq(tokens[start], LEFT_SQUARE_BRACKET));
@@ -117,7 +118,7 @@ function isdigit(char: string): boolean {
 function eat_whitespaces(tokens: Token[], start: number): Token[] {
     let pos = start;
-    while (pos < tokens.length && ['whitespace', 'newline'].includes(tokens[pos].type)) {
+    while (pos < tokens.length && [TokenType.WHITESPACE, TokenType.NEWLINE].includes(tokens[pos].type)) {
         pos++;
     }
     return tokens.slice(start, pos);
@@ -126,9 +127,9 @@ function eat_whitespaces(tokens: Token[], start: number): Token[] {
 function eat_parenthesis(tokens: Token[], start: number): Token | null {
     const firstToken = tokens[start];
-    if (firstToken.type === 'element' && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
+    if (firstToken.type === TokenType.ELEMENT && ['(', ')', '[', ']', '|', '\\{', '\\}'].includes(firstToken.value)) {
         return firstToken;
-    } else if (firstToken.type === 'command' && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
+    } else if (firstToken.type === TokenType.COMMAND && ['lfloor', 'rfloor', 'lceil', 'rceil', 'langle', 'rangle'].includes(firstToken.value.slice(1))) {
         return firstToken;
     } else {
         return null;
@@ -137,7 +138,7 @@ function eat_parenthesis(tokens: Token[], start: number): Token | null {
 function eat_primes(tokens: Token[], start: number): number {
     let pos = start;
-    while (pos < tokens.length && token_eq(tokens[pos], { type: 'element', value: "'" })) {
+    while (pos < tokens.length && token_eq(tokens[pos], { type: TokenType.ELEMENT, value: "'" })) {
         pos += 1;
     }
     return pos - start;
@@ -155,8 +156,8 @@ function eat_command_name(latex: string, start: number): string {
-const LEFT_COMMAND: Token = { type: 'command', value: '\\left' };
-const RIGHT_COMMAND: Token = { type: 'command', value: '\\right' };
+const LEFT_COMMAND: Token = { type: TokenType.COMMAND, value: '\\left' };
+const RIGHT_COMMAND: Token = { type: TokenType.COMMAND, value: '\\right' };
 function find_closing_right_command(tokens: Token[], start: number): number {
     let count = 1;
@@ -178,8 +179,8 @@ function find_closing_right_command(tokens: Token[], start: number): number {
 }
-const BEGIN_COMMAND: Token = { type: 'command', value: '\\begin' };
-const END_COMMAND: Token = { type: 'command', value: '\\end' };
+const BEGIN_COMMAND: Token = { type: TokenType.COMMAND, value: '\\begin' };
+const END_COMMAND: Token = { type: TokenType.COMMAND, value: '\\end' };
 function find_closing_end_command(tokens: Token[], start: number): number {
@@ -226,12 +227,7 @@ function find_closing_curly_bracket_char(latex: string, start: number): number {
 }
-interface Token {
-    type: 'element' | 'command' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'unknown';
-    value: string;
-}
-function tokenize(latex: string): Token[] {
+export function tokenize(latex: string): Token[] {
     const tokens: Token[] = [];
     let pos = 0;
@@ -244,7 +240,7 @@ function tokenize(latex: string): Token[] {
                 while (newPos < latex.length && latex[newPos] !== '\n') {
                     newPos += 1;
                 }
-                token = { type: 'comment', value: latex.slice(pos + 1, newPos) };
+                token = { type: TokenType.COMMENT, value: latex.slice(pos + 1, newPos) };
                 pos = newPos;
                 break;
             }
@@ -253,19 +249,19 @@ function tokenize(latex: string): Token[] {
             case '_':
             case '^':
             case '&':
-                token = { type: 'control', value: firstChar};
+                token = { type: TokenType.CONTROL, value: firstChar};
                 pos++;
                 break;
             case '\n':
-                token = { type: 'newline', value: firstChar};
+                token = { type: TokenType.NEWLINE, value: firstChar};
                 pos++;
                 break;
             case '\r': {
                 if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
-                    token = { type: 'newline', value: '\n' };
+                    token = { type: TokenType.NEWLINE, value: '\n' };
                     pos += 2;
                 } else {
-                    token = { type: 'newline', value: '\n' };
+                    token = { type: TokenType.NEWLINE, value: '\n' };
                     pos ++;
                 }
                 break;
@@ -275,7 +271,7 @@ function tokenize(latex: string): Token[] {
                 while (newPos < latex.length && latex[newPos] === ' ') {
                     newPos += 1;
                 }
-                token = {type: 'whitespace', value: latex.slice(pos, newPos)};
+                token = {type: TokenType.WHITESPACE, value: latex.slice(pos, newPos)};
                 pos = newPos;
                 break;
             }
@@ -285,12 +281,12 @@ function tokenize(latex: string): Token[] {
                 }
                 const firstTwoChars = latex.slice(pos, pos + 2);
                 if (['\\\\', '\\,'].includes(firstTwoChars)) {
-                    token = { type: 'control', value: firstTwoChars };
+                    token = { type: TokenType.CONTROL, value: firstTwoChars };
                 } else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
-                    token = { type: 'element', value: firstTwoChars };
+                    token = { type: TokenType.ELEMENT, value: firstTwoChars };
                 } else {
                     const command = eat_command_name(latex, pos + 1);
-                    token = { type: 'command', value: '\\' + command};
+                    token = { type: TokenType.COMMAND, value: '\\' + command};
                 }
                 pos += token.value.length;
                 break;
@@ -301,13 +297,13 @@ function tokenize(latex: string): Token[] {
                     while (newPos < latex.length && isdigit(latex[newPos])) {
                         newPos += 1;
                     }
-                    token = { type: 'element', value: latex.slice(pos, newPos) }
+                    token = { type: TokenType.ELEMENT, value: latex.slice(pos, newPos) }
                 } else if (isalpha(firstChar)) {
-                    token = { type: 'element', value: firstChar };
+                    token = { type: TokenType.ELEMENT, value: firstChar };
                 } else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
-                    token = { type: 'element', value: firstChar }
+                    token = { type: TokenType.ELEMENT, value: firstChar }
                 } else {
-                    token = { type: 'unknown', value: firstChar };
+                    token = { type: TokenType.UNKNOWN, value: firstChar };
                 }
                 pos += token.value.length;
             }
@@ -315,11 +311,11 @@ function tokenize(latex: string): Token[] {
         tokens.push(token);
-        if (token.type === 'command' && ['\\text', '\\begin', '\\end'].includes(token.value)) {
+        if (token.type === TokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
             if (pos >= latex.length || latex[pos] !== '{') {
                 throw new LatexParserError(`No content for ${token.value} command`);
             }
-            tokens.push({ type: 'control', value: '{' });
+            tokens.push({ type: TokenType.CONTROL, value: '{' });
             const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
             pos++;
             let textInside = latex.slice(pos, posClosingBracket);
@@ -328,8 +324,8 @@ function tokenize(latex: string): Token[] {
             for (const char of chars) {
                 textInside = textInside.replaceAll('\\' + char, char);
             }
-            tokens.push({ type: 'text', value: textInside });
-            tokens.push({ type: 'control', value: '}' });
+            tokens.push({ type: TokenType.TEXT, value: textInside });
+            tokens.push({ type: TokenType.CONTROL, value: '}' });
             pos = posClosingBracket + 1;
         }
     }
@@ -351,8 +347,8 @@ export class LatexParserError extends Error {
 type ParseResult = [TexNode, number];
-const SUB_SYMBOL:Token = { type: 'control', value: '_' };
-const SUP_SYMBOL:Token = { type: 'control', value: '^' };
+const SUB_SYMBOL:Token = { type: TokenType.CONTROL, value: '_' };
+const SUP_SYMBOL:Token = { type: TokenType.CONTROL, value: '^' };
 export class LatexParser {
     space_sensitive: boolean;
@@ -443,7 +439,7 @@ export class LatexParser {
             if (num_prime > 0) {
                 res.sup = { type: 'ordgroup', content: '', args:  [] };
                 for (let i = 0; i < num_prime; i++) {
-                    res.sup.args!.push({ type: 'symbol', content: '\\prime' });
+                    res.sup.args!.push({ type: 'element', content: "'" });
                 }
                 if (sup) {
                     res.sup.args!.push(sup);
@@ -464,13 +460,17 @@ export class LatexParser {
         const firstToken = tokens[start];
         const tokenType = firstToken.type;
         switch (tokenType) {
-            case 'element':
-            case 'text':
-            case 'comment':
-            case 'whitespace':
-            case 'newline':
-                return [{ type: tokenType, content: firstToken.value }, start + 1];
-            case 'command':
+            case TokenType.ELEMENT:
+                return [{ type: 'element', content: firstToken.value }, start + 1];
+            case TokenType.TEXT:
+                return [{ type: 'text', content: firstToken.value }, start + 1];
+            case TokenType.COMMENT:
+                return [{ type: 'comment', content: firstToken.value }, start + 1];
+            case TokenType.WHITESPACE:
+                return [{ type: 'whitespace', content: firstToken.value }, start + 1];
+            case TokenType.NEWLINE:
+                return [{ type: 'newline', content: firstToken.value }, start + 1];
+            case TokenType.COMMAND:
                 if (token_eq(firstToken, BEGIN_COMMAND)) {
                     return this.parseBeginEndExpr(tokens, start);
                 } else if (token_eq(firstToken, LEFT_COMMAND)) {
@@ -478,7 +478,7 @@ export class LatexParser {
                 } else {
                     return this.parseCommandExpr(tokens, start);
                 }
-            case 'control':
+            case TokenType.CONTROL:
                 const controlChar = firstToken.value;
                 switch (controlChar) {
                     case '{':
@@ -492,22 +492,10 @@ export class LatexParser {
                     case '\\,':
                         return [{ type: 'control', content: '\\,' }, start + 1];
                     case '_': {
-                        let [sub, pos] = this.parseNextExpr(tokens, start + 1);
-                        let sup: TexNode | undefined = undefined;
-                        if (pos < tokens.length && token_eq(tokens[pos], SUP_SYMBOL)) {
-                            [sup, pos] = this.parseNextExpr(tokens, pos + 1);
-                        }
-                        const subData = { base: EMPTY_NODE, sub, sup };
-                        return [{ type: 'supsub', content: '', data: subData }, pos];
+                        return [ EMPTY_NODE, start];
                     }
                     case '^': {
-                        let [sup, pos] = this.parseNextExpr(tokens, start + 1);
-                        let sub: TexNode | undefined = undefined;
-                        if (pos < tokens.length && token_eq(tokens[pos], SUB_SYMBOL)) {
-                            [sub, pos] = this.parseNextExpr(tokens, pos + 1);
-                        }
-                        const supData = { base: EMPTY_NODE, sub, sup };
-                        return [{ type: 'supsub', content: '', data: supData }, pos];
+                        return [ EMPTY_NODE, start];
                     }
                     case '&':
                         return [{ type: 'control', content: '&' }, start + 1];
@@ -520,7 +508,7 @@ export class LatexParser {
     }
     parseCommandExpr(tokens: Token[], start: number): ParseResult {
-        assert(tokens[start].type === 'command');
+        assert(tokens[start].type === TokenType.COMMAND);
         const command = tokens[start].value; // command name starts with a \
@@ -530,35 +518,42 @@ export class LatexParser {
             throw new LatexParserError('Unexpected command: ' + command);
         }
         const paramNum = get_command_param_num(command.slice(1));
-        if (paramNum === 0) {
-            return [{ type: 'symbol', content: command }, pos];
-        } else if (paramNum === 1) {
-            if (command === '\\sqrt' && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
-                const posLeftSquareBracket = pos;
-                const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
-                const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
-                const exponent = this.parse(exprInside);
-                const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
-                return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
-            } else if (command === '\\text') {
-                if (pos + 2 >= tokens.length) {
-                    throw new LatexParserError('Expecting content for \\text command');
+        switch (paramNum) {
+            case 0:
+                if (!symbolMap.has(command.slice(1))) {
+                    return [{ type: 'unknownMacro', content: command }, pos];
                 }
-                assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
-                assert(tokens[pos + 1].type === 'text');
-                assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
-                const text = tokens[pos + 1].value;
-                return [{ type: 'text', content: text }, pos + 3];
+                return [{ type: 'symbol', content: command }, pos];
+            case 1: {
+                if (command === '\\sqrt' && pos < tokens.length && token_eq(tokens[pos], LEFT_SQUARE_BRACKET)) {
+                    const posLeftSquareBracket = pos;
+                    const posRightSquareBracket = find_closing_square_bracket(tokens, pos);
+                    const exprInside = tokens.slice(posLeftSquareBracket + 1, posRightSquareBracket);
+                    const exponent = this.parse(exprInside);
+                    const [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, posRightSquareBracket + 1);
+                    return [{ type: 'unaryFunc', content: command, args: [arg1], data: exponent }, newPos];
+                } else if (command === '\\text') {
+                    if (pos + 2 >= tokens.length) {
+                        throw new LatexParserError('Expecting content for \\text command');
+                    }
+                    assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
+                    assert(tokens[pos + 1].type === TokenType.TEXT);
+                    assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
+                    const text = tokens[pos + 1].value;
+                    return [{ type: 'text', content: text }, pos + 3];
+                }
+                let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
+                return [{ type: 'unaryFunc', content: command, args: [arg1] }, newPos];
             }
-            let [arg1, newPos] = this.parseNextExprWithoutSupSub(tokens, pos);
-            return [{ type: 'unaryFunc', content: command, args: [arg1] }, newPos];
-        } else if (paramNum === 2) {
-            const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
-            const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
-            return [{ type: 'binaryFunc', content: command, args: [arg1, arg2] }, pos2];
-        } else {
-            throw new Error( 'Invalid number of parameters');
+            case 2: {
+                const [arg1, pos1] = this.parseNextExprWithoutSupSub(tokens, pos);
+                const [arg2, pos2] = this.parseNextExprWithoutSupSub(tokens, pos1);
+                return [{ type: 'binaryFunc', content: command, args: [arg1, arg2] }, pos2];
+            }
+            default:
+                throw new Error( 'Invalid number of parameters');
         }
     }
@@ -598,12 +593,12 @@ export class LatexParser {
         const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
         const body = this.parse(exprInside);
-        const args = [
+        const args: TexNode[] = [
             { type: 'element', content: leftDelimiter.value },
             body,
             { type: 'element', content: rightDelimiter.value }
         ]
-        const res = { type: 'leftright', content: '', args: args };
+        const res: TexNode = { type: 'leftright', content: '', args: args };
         return [res, pos];
     }
@@ -612,7 +607,7 @@ export class LatexParser {
         let pos = start + 1;
         assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
-        assert(tokens[pos + 1].type === 'text');
+        assert(tokens[pos + 1].type === TokenType.TEXT);
         assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
         const envName = tokens[pos + 1].value;
         pos += 3;
@@ -629,7 +624,7 @@ export class LatexParser {
         pos = endIdx + 1;
         assert(token_eq(tokens[pos], LEFT_CURLY_BRACKET));
-        assert(tokens[pos + 1].type === 'text');
+        assert(tokens[pos + 1].type === TokenType.TEXT);
         assert(token_eq(tokens[pos + 2], RIGHT_CURLY_BRACKET));
         if (tokens[pos + 1].value !== envName) {
             throw new LatexParserError('Mismatched \\begin and \\end environments');
@@ -638,11 +633,11 @@ export class LatexParser {
         const exprInside = tokens.slice(exprInsideStart, exprInsideEnd);
         // ignore whitespaces and '\n' before \end{envName}
-        while(exprInside.length > 0 && ['whitespace', 'newline'].includes(exprInside[exprInside.length - 1].type)) {
+        while(exprInside.length > 0 && [TokenType.WHITESPACE, TokenType.NEWLINE].includes(exprInside[exprInside.length - 1].type)) {
             exprInside.pop();
         }
         const body = this.parseAligned(exprInside);
-        const res = { type: 'beginend', content: envName, data: body };
+        const res: TexNode = { type: 'beginend', content: envName, data: body };
         return [res, pos];
     }
@@ -677,17 +672,40 @@ export class LatexParser {
     }
 }
-export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
-    const parser = new LatexParser();
-    const original_tokens = tokenize(tex);
-    let processed_tokens: Token[] = [];
-    for (const token of original_tokens) {
-        if (token.type === 'command' && customTexMacros[token.value]) {
+// Remove all whitespace before or after _ or ^
+function passIgnoreWhitespaceBeforeScriptMark(tokens: Token[]): Token[] {
+    const is_script_mark = (token: Token) => token_eq(token, SUB_SYMBOL) || token_eq(token, SUP_SYMBOL);
+    let out_tokens: Token[] = [];
+    for (let i = 0; i < tokens.length; i++) {
+        if (tokens[i].type === TokenType.WHITESPACE && i + 1 < tokens.length && is_script_mark(tokens[i + 1])) {
+            continue;
+        }
+        if (tokens[i].type === TokenType.WHITESPACE && i - 1 >= 0 && is_script_mark(tokens[i - 1])) {
+            continue;
+        }
+        out_tokens.push(tokens[i]);
+    }
+    return out_tokens;
+}
+// expand custom tex macros
+function passExpandCustomTexMacros(tokens: Token[], customTexMacros: {[key: string]: string}): Token[] {
+    let out_tokens: Token[] = [];
+    for (const token of tokens) {
+        if (token.type === TokenType.COMMAND && customTexMacros[token.value]) {
             const expanded_tokens = tokenize(customTexMacros[token.value]);
-            processed_tokens = processed_tokens.concat(expanded_tokens);
+            out_tokens = out_tokens.concat(expanded_tokens);
         } else {
-            processed_tokens.push(token);
+            out_tokens.push(token);
         }
     }
-    return parser.parse(processed_tokens);
+    return out_tokens;
+}
+export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
+    const parser = new LatexParser();
+    let tokens = tokenize(tex);
+    tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
+    tokens = passExpandCustomTexMacros(tokens, customTexMacros);
+    return parser.parse(tokens);
 }

package/src/types.ts CHANGED Viewed

@@ -1,3 +1,20 @@
+export enum TokenType {
+    ELEMENT,
+    COMMAND,
+    TEXT,
+    COMMENT,
+    WHITESPACE,
+    NEWLINE,
+    CONTROL,
+    UNKNOWN,
+}
+export interface Token {
+    type: TokenType;
+    value: string;
+}
 export interface TexSupsubData {
     base: TexNode;
     sup?: TexNode;
@@ -9,7 +26,8 @@ export type TexSqrtData = TexNode;
 export type TexArrayData = TexNode[][];
 export interface TexNode {
-    type: string;
+    type: 'element' | 'text' | 'comment' | 'whitespace' | 'newline' | 'control' | 'ordgroup' | 'supsub'
+             | 'unaryFunc' | 'binaryFunc' | 'leftright' | 'beginend' | 'symbol' | 'empty' | 'unknownMacro';
     content: string;
     args?: TexNode[];
     // position?: Position;
@@ -19,10 +37,20 @@ export interface TexNode {
     data?: TexSqrtData | TexSupsubData | TexArrayData;
 }
+export interface TypstSupsubData {
+    base: TypstNode;
+    sup?: TypstNode;
+    sub?: TypstNode;
+}
+export type TypstArrayData = TypstNode[][];
 export interface TypstNode {
-    type: 'atom' | 'symbol' | 'text' | 'softSpace' | 'comment' | 'newline',
+    type: 'atom' | 'symbol' | 'text' | 'softSpace' | 'comment' | 'newline'
+            | 'empty' | 'group' | 'supsub' | 'unaryFunc' | 'binaryFunc' | 'align' | 'matrix' | 'unknown';
     content: string;
     args?: TypstNode[];
+    data?: TypstSupsubData | TypstArrayData;
 }
 export interface Tex2TypstOptions {