npm - tex2typst - Versions diffs - 0.3.1 → 0.3.3 - Mend

tex2typst 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +5 -3
package/dist/index.js +454 -322
package/dist/jslex.d.ts +105 -0
package/dist/tex-parser.d.ts +1 -1
package/dist/tex2typst.min.js +13 -20
package/dist/types.d.ts +3 -1
package/dist/typst-parser.d.ts +1 -1
package/dist/typst-shorthands.d.ts +3 -0
package/dist/typst-writer.d.ts +9 -3
package/docs/api-reference.md +64 -0
package/package.json +1 -1
package/src/convert.ts +31 -18
package/src/index.ts +11 -14
package/src/jslex.ts +304 -0
package/src/map.ts +13 -36
package/src/tex-parser.ts +44 -137
package/src/types.ts +3 -1
package/src/typst-parser.ts +72 -126
package/src/typst-shorthands.ts +51 -0
package/src/typst-writer.ts +29 -22
package/tools/make-shorthand-map.py +33 -0
package/tools/make-symbol-map.py +4 -3

package/src/tex-parser.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { symbolMap } from "./map";
 import { TexNode, TexSupsubData, TexToken, TexTokenType } from "./types";
-import { isalpha, isdigit, assert } from "./util";
+import { assert } from "./util";
+import { JSLex, Scanner } from "./jslex";
 const UNARY_COMMANDS = [
     'sqrt',
@@ -33,6 +33,8 @@ const UNARY_COMMANDS = [
     'vec',
     'widehat',
     'widetilde',
+    'overleftarrow',
+    'overrightarrow',
 ]
 const BINARY_COMMANDS = [
@@ -95,15 +97,6 @@ function eat_primes(tokens: TexToken[], start: number): number {
 }
-function eat_command_name(latex: string, start: number): string {
-    let pos = start;
-    while (pos < latex.length && isalpha(latex[pos])) {
-        pos += 1;
-    }
-    return latex.substring(start, pos);
-}
 function find_closing_match(tokens: TexToken[], start: number, leftToken: TexToken, rightToken: TexToken): number {
     assert(tokens[start].eq(leftToken));
     let count = 1;
@@ -141,135 +134,49 @@ function find_closing_end_command(tokens: TexToken[], start: number): number {
     return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
 }
-function find_closing_curly_bracket_char(latex: string, start: number): number {
-    assert(latex[start] === '{');
-    let count = 1;
-    let pos = start + 1;
-    while (count > 0) {
-        if (pos >= latex.length) {
-            throw new LatexParserError('Unmatched curly brackets');
-        }
-        if(pos + 1 < latex.length && (['\\{', '\\}'].includes(latex.substring(pos, pos + 2)))) {
-            pos += 2;
-            continue;
-        }
-        if (latex[pos] === '{') {
-            count += 1;
-        } else if (latex[pos] === '}') {
-            count -= 1;
-        }
-        pos += 1;
+function unescape(str: string): string {
+    const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
+    for (const char of chars) {
+        str = str.replaceAll('\\' + char, char);
     }
-    return pos - 1;
+    return str;
 }
-export function tokenize(latex: string): TexToken[] {
-    const tokens: TexToken[] = [];
-    let pos = 0;
-    while (pos < latex.length) {
-        const firstChar = latex[pos];
-        let token: TexToken;
-        switch (firstChar) {
-            case '%': {
-                let newPos = pos + 1;
-                while (newPos < latex.length && latex[newPos] !== '\n') {
-                    newPos += 1;
-                }
-                token = new TexToken(TexTokenType.COMMENT, latex.slice(pos + 1, newPos));
-                pos = newPos;
-                break;
-            }
-            case '{':
-            case '}':
-            case '_':
-            case '^':
-            case '&':
-                token = new TexToken(TexTokenType.CONTROL, firstChar);
-                pos++;
-                break;
-            case '\n':
-                token = new TexToken(TexTokenType.NEWLINE, firstChar);
-                pos++;
-                break;
-            case '\r': {
-                if (pos + 1 < latex.length && latex[pos + 1] === '\n') {
-                    token = new TexToken(TexTokenType.NEWLINE, '\n');
-                    pos += 2;
-                } else {
-                    token = new TexToken(TexTokenType.NEWLINE, '\n');
-                    pos ++;
-                }
-                break;
-            }
-            case ' ': {
-                let newPos = pos;
-                while (newPos < latex.length && latex[newPos] === ' ') {
-                    newPos += 1;
-                }
-                token = new TexToken(TexTokenType.SPACE, latex.slice(pos, newPos));
-                pos = newPos;
-                break;
-            }
-            case '\\': {
-                if (pos + 1 >= latex.length) {
-                    throw new LatexParserError('Expecting command name after \\');
-                }
-                const firstTwoChars = latex.slice(pos, pos + 2);
-                if (['\\\\', '\\,'].includes(firstTwoChars)) {
-                    token = new TexToken(TexTokenType.CONTROL, firstTwoChars);
-                } else if (['\\{','\\}', '\\%', '\\$', '\\&', '\\#', '\\_', '\\|'].includes(firstTwoChars)) {
-                    // \| is double vertical bar, not the same as just |
-                    token = new TexToken(TexTokenType.ELEMENT, firstTwoChars);
-                } else {
-                    const command = eat_command_name(latex, pos + 1);
-                    token = new TexToken(TexTokenType.COMMAND, '\\' + command);
-                }
-                pos += token.value.length;
-                break;
-            }
-            default: {
-                if (isdigit(firstChar)) {
-                    let newPos = pos;
-                    while (newPos < latex.length && isdigit(latex[newPos])) {
-                        newPos += 1;
-                    }
-                    token = new TexToken(TexTokenType.ELEMENT, latex.slice(pos, newPos));
-                } else if (isalpha(firstChar)) {
-                    token = new TexToken(TexTokenType.ELEMENT, firstChar);
-                } else if ('+-*/=\'<>!.,;:?()[]|'.includes(firstChar)) {
-                    token = new TexToken(TexTokenType.ELEMENT, firstChar)
-                } else {
-                    token = new TexToken(TexTokenType.UNKNOWN, firstChar);
-                }
-                pos += token.value.length;
-            }
-        }
-        tokens.push(token);
-        if (token.type === TexTokenType.COMMAND && ['\\text', '\\operatorname', '\\begin', '\\end'].includes(token.value)) {
-            if (pos >= latex.length || latex[pos] !== '{') {
-                throw new LatexParserError(`No content for ${token.value} command`);
-            }
-            tokens.push(new TexToken(TexTokenType.CONTROL, '{'));
-            const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
-            pos++;
-            let textInside = latex.slice(pos, posClosingBracket);
-            // replace all escape characters with their actual characters
-            const chars = ['{', '}', '\\', '$', '&', '#', '_', '%'];
-            for (const char of chars) {
-                textInside = textInside.replaceAll('\\' + char, char);
-            }
-            tokens.push(new TexToken(TexTokenType.TEXT, textInside));
-            tokens.push(new TexToken(TexTokenType.CONTROL, '}'));
-            pos = posClosingBracket + 1;
+const rules_map = new Map<string, (a: Scanner<TexToken>) => TexToken | TexToken[]>([
+    [
+        String.raw`\\(text|operatorname|begin|end){.+?}`, (s) => {
+            const text = s.text()!;
+            const command = text.substring(0, text.indexOf('{'));
+            const text_inside = text.substring(text.indexOf('{') + 1, text.lastIndexOf('}'));
+            return [
+                new TexToken(TexTokenType.COMMAND, command),
+                new TexToken(TexTokenType.CONTROL, '{'),
+                new TexToken(TexTokenType.TEXT, unescape(text_inside)),
+                new TexToken(TexTokenType.CONTROL, '}')
+            ]
         }
-    }
-    return tokens;
+    ],
+    [String.raw`%[^\n]*`, (s) => new TexToken(TexTokenType.COMMENT, s.text()!.substring(1))],
+    [String.raw`[{}_^&]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
+    [String.raw`\r?\n`, (_s) => new TexToken(TexTokenType.NEWLINE, "\n")],
+    [String.raw`\s+`, (s) => new TexToken(TexTokenType.SPACE, s.text()!)],
+    [String.raw`\\[\\,]`, (s) => new TexToken(TexTokenType.CONTROL, s.text()!)],
+    [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(TexTokenType.COMMAND, s.text()!)],
+    [String.raw`[0-9]+`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`[a-zA-Z]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(TexTokenType.ELEMENT, s.text()!)],
+    [String.raw`.`, (s) => new TexToken(TexTokenType.UNKNOWN, s.text()!)],
+]);
+const spec = {
+    "start": rules_map
+};
+export function tokenize_tex(input: string): TexToken[] {
+    const lexer = new JSLex<TexToken>(spec);
+    return lexer.collect(input);
 }
@@ -633,7 +540,7 @@ function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: s
     let out_tokens: TexToken[] = [];
     for (const token of tokens) {
         if (token.type === TexTokenType.COMMAND && customTexMacros[token.value]) {
-            const expanded_tokens = tokenize(customTexMacros[token.value]);
+            const expanded_tokens = tokenize_tex(customTexMacros[token.value]);
             out_tokens = out_tokens.concat(expanded_tokens);
         } else {
             out_tokens.push(token);
@@ -644,7 +551,7 @@ function passExpandCustomTexMacros(tokens: TexToken[], customTexMacros: {[key: s
 export function parseTex(tex: string, customTexMacros: {[key: string]: string}): TexNode {
     const parser = new LatexParser();
-    let tokens = tokenize(tex);
+    let tokens = tokenize_tex(tex);
     tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
     tokens = passExpandCustomTexMacros(tokens, customTexMacros);
     return parser.parse(tokens);

package/src/types.ts CHANGED Viewed

@@ -352,7 +352,7 @@ export type TypstArrayData = TypstNode[][];
 type TypstNodeType = 'atom' | 'symbol' | 'text' | 'control' | 'comment' | 'whitespace'
             | 'empty' | 'group' | 'supsub' | 'funcCall' | 'fraction' | 'align' | 'matrix' | 'unknown';
-export type TypstPrimitiveValue = string | boolean | null;
+export type TypstPrimitiveValue = string | boolean | null | TypstToken;
 export type TypstNamedParams = { [key: string]: TypstPrimitiveValue };
 // #none
@@ -389,8 +389,10 @@ export class TypstNode {
 export interface Tex2TypstOptions {
     nonStrict?: boolean; // default is true
     preferTypstIntrinsic?: boolean; // default is true,
+    preferShorthands?: boolean; // default is true
     keepSpaces?: boolean; // default is false
     fracToSlash?: boolean; // default is true
+    inftyToOo?: boolean; // default is false
     customTexMacros?: { [key: string]: string };
     // TODO: custom typst functions
 }

package/src/typst-parser.ts CHANGED Viewed

@@ -1,7 +1,14 @@
 import { array_find } from "./generic";
 import { TYPST_NONE, TypstNamedParams, TypstNode, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
-import { assert, isalpha, isdigit } from "./util";
+import { assert, isalpha } from "./util";
+import { reverseShorthandMap } from "./typst-shorthands";
+import { JSLex, Scanner } from "./jslex";
+const TYPST_EMPTY_NODE = new TypstNode('empty', '');
+const TYPST_SHORTHANDS = Array.from(reverseShorthandMap.keys());
 // TODO: In Typst, y' ' is not the same as y''.
 // The parser should be able to parse the former correctly.
@@ -14,137 +21,76 @@ function eat_primes(tokens: TypstToken[], start: number): number {
 }
-function eat_identifier_name(typst: string, start: number): string {
-    let pos = start;
-    while (pos < typst.length && (isalpha(typst[pos]) || (typst[pos] === '.'))) {
-        pos += 1;
-    }
-    return typst.substring(start, pos);
+function generate_regex_for_shorthands(): string {
+    const regex_list = TYPST_SHORTHANDS.map((s) => {
+        s = s.replaceAll('|', '\\|');
+        s = s.replaceAll('.', '\\.');
+        s = s.replaceAll('[', '\\[');
+        s = s.replaceAll(']', '\\]');
+        return s;
+    });
+    return `(${regex_list.join('|')})`;
 }
-const TYPST_EMPTY_NODE = new TypstNode('empty', '');
-export function tokenize_typst(typst: string): TypstToken[] {
-    const tokens: TypstToken[] = [];
-    let pos = 0;
-    while (pos < typst.length) {
-        const firstChar = typst[pos];
-        let token: TypstToken;
-        switch (firstChar) {
-            case '_':
-            case '^':
-            case '&':
-                token = new TypstToken(TypstTokenType.CONTROL, firstChar);
-                pos++;
-                break;
-            case '\n':
-                token = new TypstToken(TypstTokenType.NEWLINE, firstChar);
-                pos++;
-                break;
-            case '\r': {
-                if (pos + 1 < typst.length && typst[pos + 1] === '\n') {
-                    token = new TypstToken(TypstTokenType.NEWLINE, '\n');
-                    pos += 2;
-                } else {
-                    token = new TypstToken(TypstTokenType.NEWLINE, '\n');
-                    pos++;
-                }
-                break;
-            }
-            case ' ': {
-                let newPos = pos;
-                while (newPos < typst.length && typst[newPos] === ' ') {
-                    newPos++;
-                }
-                token = new TypstToken(TypstTokenType.SPACE, typst.substring(pos, newPos));
-                pos = newPos;
-                break;
-            }
-            case '/': {
-                if (pos < typst.length && typst[pos + 1] === '/') {
-                    let newPos = pos + 2;
-                    while (newPos < typst.length && typst[newPos] !== '\n') {
-                        newPos++;
-                    }
-                    token = new TypstToken(TypstTokenType.COMMENT, typst.slice(pos + 2, newPos));
-                    pos = newPos;
-                } else {
-                    token = new TypstToken(TypstTokenType.ELEMENT, '/');
-                    pos++;
-                }
-                break;
-            }
-            case '\\': {
-                if (pos + 1 >= typst.length) {
-                    throw new Error('Expecting a character after \\');
-                }
-                const firstTwoChars = typst.substring(pos, pos + 2);
-                if (['\\$', '\\&', '\\#', '\\_'].includes(firstTwoChars)) {
-                    token = new TypstToken(TypstTokenType.ELEMENT, firstTwoChars);
-                    pos += 2;
-                } else if (['\\\n', '\\ '].includes(firstTwoChars)) {
-                    token = new TypstToken(TypstTokenType.CONTROL, '\\');
-                    pos += 1;
-                } else {
-                    // this backslash is dummy and will be ignored in later stages
-                    token = new TypstToken(TypstTokenType.CONTROL, '');
-                    pos++;
-                }
-                break;
-            }
-            case '"': {
-                let newPos = pos + 1;
-                while (newPos < typst.length) {
-                    if (typst[newPos] === '"' && typst[newPos - 1] !== '\\') {
-                        break;
-                    }
-                    newPos++;
-                }
-                let text = typst.substring(pos + 1, newPos);
-                // replace all escape characters with their actual characters
-                const chars = ['"', '\\'];
-                for (const char of chars) {
-                    text = text.replaceAll('\\' + char, char);
-                }
-                token = new TypstToken(TypstTokenType.TEXT, text);
-                pos = newPos + 1;
-                break;
-            }
-            default: {
-                if (isdigit(firstChar)) {
-                    let newPos = pos;
-                    while (newPos < typst.length && isdigit(typst[newPos])) {
-                        newPos += 1;
-                    }
-                    if(newPos < typst.length && typst[newPos] === '.') {
-                        newPos += 1;
-                        while (newPos < typst.length && isdigit(typst[newPos])) {
-                            newPos += 1;
-                        }
-                    }
-                    token = new TypstToken(TypstTokenType.ELEMENT, typst.slice(pos, newPos));
-                } else if ('+-*/=\'<>!.,;?()[]|'.includes(firstChar)) {
-                    token = new TypstToken(TypstTokenType.ELEMENT, firstChar)
-                } else if (isalpha(firstChar)) {
-                    const identifier = eat_identifier_name(typst, pos);
-                    const _type = identifier.length === 1 ? TypstTokenType.ELEMENT : TypstTokenType.SYMBOL;
-                    token = new TypstToken(_type, identifier);
-                } else {
-                    token = new TypstToken(TypstTokenType.ELEMENT, firstChar);
-                }
-                pos += token.value.length;
-            }
+const REGEX_SHORTHANDS = generate_regex_for_shorthands();
+const rules_map = new Map<string, (a: Scanner<TypstToken>) => TypstToken | TypstToken[]>([
+    [String.raw`//[^\n]*`, (s) => new TypstToken(TypstTokenType.COMMENT, s.text()!.substring(2))],
+    [String.raw`/`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
+    [String.raw`[_^&]`, (s) => new TypstToken(TypstTokenType.CONTROL, s.text()!)],
+    [String.raw`\r?\n`, (_s) => new TypstToken(TypstTokenType.NEWLINE, "\n")],
+    [String.raw`\s+`, (s) => new TypstToken(TypstTokenType.SPACE, s.text()!)],
+    [String.raw`\\[$&#_]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
+    [String.raw`\\\n`, (s) => {
+        return [
+            new TypstToken(TypstTokenType.CONTROL, "\\"),
+            new TypstToken(TypstTokenType.NEWLINE, "\n"),
+        ]
+    }],
+    [String.raw`\\\s`, (s) => {
+        return [
+            new TypstToken(TypstTokenType.CONTROL, "\\"),
+            new TypstToken(TypstTokenType.SPACE, " "),
+        ]
+    }],
+    // this backslash is dummy and will be ignored in later stages
+    [String.raw`\\\S`, (_s) => new TypstToken(TypstTokenType.CONTROL, "")],
+    [
+        String.raw`"([^"]|(\\"))*"`,
+        (s) => {
+            const text = s.text()!.substring(1, s.text()!.length - 1);
+            // replace all escape characters with their actual characters
+            text.replaceAll('\\"', '"');
+            return new TypstToken(TypstTokenType.TEXT, text);
         }
-        tokens.push(token);
-    }
-    return tokens;
+    ],
+    [
+        REGEX_SHORTHANDS,
+        (s) => {
+            const shorthand = s.text()!;
+            const symbol = reverseShorthandMap.get(shorthand)!;
+            return new TypstToken(TypstTokenType.SYMBOL, symbol);
+        }
+    ],
+    [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
+    [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
+    [String.raw`[a-zA-Z\.]+`, (s) => {
+        return new TypstToken(s.text()!.length === 1? TypstTokenType.ELEMENT: TypstTokenType.SYMBOL, s.text()!);
+    }],
+    [String.raw`.`, (s) => new TypstToken(TypstTokenType.ELEMENT, s.text()!)],
+]);
+const spec = {
+    "start": rules_map
+};
+export function tokenize_typst(input: string): TypstToken[] {
+    const lexer = new JSLex<TypstToken>(spec);
+    return lexer.collect(input);
 }
 function find_closing_match(tokens: TypstToken[], start: number): number {
     assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET]));
     let count = 1;

package/src/typst-shorthands.ts ADDED Viewed

@@ -0,0 +1,51 @@
+const shorthandMap = new Map<string, string>([
+    ['arrow.l.r.double.long', '<==>'],
+    ['arrow.l.r.long', '<-->'],
+    ['arrow.r.bar', '|->'],
+    ['arrow.r.double.bar', '|=>'],
+    ['arrow.r.double.long', '==>'],
+    ['arrow.r.long', '-->'],
+    ['arrow.r.long.squiggly', '~~>'],
+    ['arrow.r.tail', '>->'],
+    ['arrow.r.twohead', '->>'],
+    ['arrow.l.double.long', '<=='],
+    ['arrow.l.long', '<--'],
+    ['arrow.l.long.squiggly', '<~~'],
+    ['arrow.l.tail', '<-<'],
+    ['arrow.l.twohead', '<<-'],
+    ['arrow.l.r', '<->'],
+    ['arrow.l.r.double', '<=>'],
+    ['colon.double.eq', '::='],
+    ['dots.h', '...'],
+    ['gt.triple', '>>>'],
+    ['lt.triple', '<<<'],
+    ['arrow.r', '->'],
+    ['arrow.r.double', '=>'],
+    ['arrow.r.squiggly', '~>'],
+    ['arrow.l', '<-'],
+    ['arrow.l.squiggly', '<~'],
+    ['bar.v.double', '||'],
+    ['bracket.l.double', '[|'],
+    ['bracket.r.double', '|]'],
+    ['colon.eq', ':='],
+    ['eq.colon', '=:'],
+    ['eq.not', '!='],
+    ['gt.double', '>>'],
+    ['gt.eq', '>='],
+    ['lt.double', '<<'],
+    ['lt.eq', '<='],
+    ['ast.op', '*'],
+    ['minus', '-'],
+    ['tilde.op', '~'],
+]);
+const reverseShorthandMap = new Map<string, string>();
+for (const [key, value] of shorthandMap.entries()) {
+    // filter out single character values ('-', '~', '*')
+    if(value.length > 1) {
+        reverseShorthandMap.set(value, key);
+    }
+}
+export { shorthandMap, reverseShorthandMap };

package/src/typst-writer.ts CHANGED Viewed

@@ -1,24 +1,10 @@
 import { TexNode, TypstNode, TypstPrimitiveValue, TypstSupsubData, TypstToken, TypstTokenType } from "./types";
-// symbols that are supported by Typst but not by KaTeX
-export const TYPST_INTRINSIC_SYMBOLS = [
-    'dim',
-    'id',
-    'im',
-    'mod',
-    'Pr',
-    'sech',
-    'csch',
-    // 'sgn
-];
+import { shorthandMap } from "./typst-shorthands";
 function is_delimiter(c: TypstNode): boolean {
     return c.type === 'atom' && ['(', ')', '[', ']', '{', '}', '|', '⌊', '⌋', '⌈', '⌉'].includes(c.content);
 }
 const TYPST_LEFT_PARENTHESIS: TypstToken = new TypstToken(TypstTokenType.ELEMENT, '(');
 const TYPST_RIGHT_PARENTHESIS: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ')');
 const TYPST_COMMA: TypstToken = new TypstToken(TypstTokenType.ELEMENT, ',');
@@ -35,6 +21,8 @@ function typst_primitive_to_string(value: TypstPrimitiveValue) {
         default:
             if (value === null) {
                 return '#none';
+            } else if (value instanceof TypstToken) {
+                return value.toString();
             }
             throw new TypstWriterError(`Invalid primitive value: ${value}`, value);
     }
@@ -50,20 +38,29 @@ export class TypstWriterError extends Error {
     }
 }
+export interface TypstWriterOptions {
+    nonStrict: boolean;
+    preferShorthands: boolean;
+    keepSpaces: boolean;
+    inftyToOo: boolean;
+}
 export class TypstWriter {
     private nonStrict: boolean;
-    private preferTypstIntrinsic: boolean;
+    private preferShorthands: boolean;
     private keepSpaces: boolean;
+    private inftyToOo: boolean;
     protected buffer: string = "";
     protected queue: TypstToken[] = [];
     private insideFunctionDepth = 0;
-    constructor(nonStrict: boolean, preferTypstIntrinsic: boolean, keepSpaces: boolean) {
-        this.nonStrict = nonStrict;
-        this.preferTypstIntrinsic = preferTypstIntrinsic;
-        this.keepSpaces = keepSpaces;
+    constructor(opt: TypstWriterOptions) {
+        this.nonStrict = opt.nonStrict;
+        this.preferShorthands = opt.preferShorthands;
+        this.keepSpaces = opt.keepSpaces;
+        this.inftyToOo = opt.inftyToOo;
     }
@@ -123,9 +120,19 @@ export class TypstWriter {
                 }
                 break;
             }
-            case 'symbol':
-                this.queue.push(new TypstToken(TypstTokenType.SYMBOL, node.content));
+            case 'symbol': {
+                let content = node.content;
+                if(this.preferShorthands) {
+                    if (shorthandMap.has(content)) {
+                        content = shorthandMap.get(content)!;
+                    }
+                }
+                if (this.inftyToOo && content === 'infinity') {
+                    content = 'oo';
+                }
+                this.queue.push(new TypstToken(TypstTokenType.SYMBOL, content));
                 break;
+            }
             case 'text':
                 this.queue.push(new TypstToken(TypstTokenType.TEXT, node.content));
                 break;

package/tools/make-shorthand-map.py ADDED Viewed

@@ -0,0 +1,33 @@
+import urllib.request
+import html
+from bs4 import BeautifulSoup
+if __name__ == '__main__':
+    shorthand_map = []
+    url = "https://typst.app/docs/reference/symbols/"
+    with urllib.request.urlopen(url) as response:
+        html_text = response.read().decode('utf-8')
+    soup = BeautifulSoup(html_text, 'html.parser')
+    # <ul class="symbol-grid">
+    ul_list = soup.find_all('ul', class_='symbol-grid')
+    # ul_shorthands_markup = ul_list[0]
+    ul_shorthands_math = ul_list[1]
+    li_list = ul_shorthands_math.find_all('li')
+    for li in li_list:
+        # e.g. <li id="symbol-arrow.r" data-math-shorthand="-&gt;"><button>...</button></li>
+        # ==> typst = "arrow.r"
+        # ==> shorthand = "->"
+        typst = li['id'][7:]
+        shorthand = html.unescape(li['data-math-shorthand'])
+        shorthand_map.append((typst, shorthand))
+    # Sort by length of shorthand, order from longest to shortest
+    shorthand_map.sort(key=lambda x: len(x[1]), reverse=True)
+    for typst, shorthand in shorthand_map:
+        print(f"['{typst}', '{shorthand}'],")

package/tools/make-symbol-map.py CHANGED Viewed

@@ -1,12 +1,13 @@
-import requests
+import urllib.request
 from bs4 import BeautifulSoup
 if __name__ == '__main__':
     symbol_map = {}
     url = "https://typst.app/docs/reference/symbols/sym/"
-    html_text = requests.get(url).text
+    with urllib.request.urlopen(url) as response:
+        html_text = response.read().decode('utf-8')
     soup = BeautifulSoup(html_text, 'html.parser')
     # <ul class="symbol-grid">
     ul = soup.find('ul', class_='symbol-grid')