tex2typst 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,7 +35,7 @@ import { tex2typst, typst2tex } from 'tex2typst';
35
35
  let tex = "e \overset{\text{def}}{=} \lim_{{n \to \infty}} \left(1 + \frac{1}{n}\right)^n";
36
36
  let typst = tex2typst(tex);
37
37
  console.log(typst);
38
- // e eq.def lim_(n arrow.r infinity)(1 + 1/n)^n
38
+ // e eq.def lim_(n -> infinity)(1 + 1/n)^n
39
39
 
40
40
  let tex_recovered = typst2tex(typst);
41
41
  console.log(tex_recovered);
package/dist/index.js CHANGED
@@ -1382,15 +1382,248 @@ var TypstNode = class {
1382
1382
  function isalpha(char) {
1383
1383
  return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
1384
1384
  }
1385
- function isdigit(char) {
1386
- return "0123456789".includes(char);
1387
- }
1388
1385
  function assert(condition, message = "") {
1389
1386
  if (!condition) {
1390
1387
  throw new Error(message);
1391
1388
  }
1392
1389
  }
1393
1390
 
1391
+ // src/jslex.ts
1392
+ var EOF = {};
1393
+ function matchcompare(m1, m2) {
1394
+ if (m2.len !== m1.len) {
1395
+ return m2.len - m1.len;
1396
+ } else {
1397
+ return m1.index - m2.index;
1398
+ }
1399
+ }
1400
+ var Scanner = class {
1401
+ constructor(input, lexer) {
1402
+ // position within input stream
1403
+ this._pos = 0;
1404
+ // current line number
1405
+ this._line = 0;
1406
+ // current column number
1407
+ this._col = 0;
1408
+ this._offset = 0;
1409
+ this._less = null;
1410
+ this._go = false;
1411
+ this._newstate = null;
1412
+ this._text = null;
1413
+ this._leng = null;
1414
+ this._input = input;
1415
+ this._lexer = lexer;
1416
+ this._state = lexer.states[0];
1417
+ }
1418
+ /**
1419
+ * Analogous to yytext and yyleng in lex - will be set during scan.
1420
+ */
1421
+ text() {
1422
+ return this._text;
1423
+ }
1424
+ leng() {
1425
+ return this._leng;
1426
+ }
1427
+ /**
1428
+ * Position of in stream, line number and column number of match.
1429
+ */
1430
+ pos() {
1431
+ return this._pos;
1432
+ }
1433
+ line() {
1434
+ return this._line;
1435
+ }
1436
+ column() {
1437
+ return this._col;
1438
+ }
1439
+ /**
1440
+ * Analogous to input() in lex.
1441
+ * @return {string} The next character in the stream.
1442
+ */
1443
+ input() {
1444
+ return this._input.charAt(this._pos + this._leng + this._offset++);
1445
+ }
1446
+ /**
1447
+ * Similar to unput() in lex, but does not allow modifying the stream.
1448
+ * @return {int} The offset position after the operation.
1449
+ */
1450
+ unput() {
1451
+ return this._offset = this._offset > 0 ? this._offset-- : 0;
1452
+ }
1453
+ /**
1454
+ * Analogous to yyless(n) in lex - retains the first n characters from this pattern, and returns
1455
+ * the rest to the input stream, such that they will be used in the next pattern-matching operation.
1456
+ * @param {int} n Number of characters to retain.
1457
+ * @return {int} Length of the stream after the operation has completed.
1458
+ */
1459
+ less(n) {
1460
+ this._less = n;
1461
+ this._offset = 0;
1462
+ this._text = this._text.substring(0, n);
1463
+ return this._leng = this._text.length;
1464
+ }
1465
+ /**
1466
+ * Like less(), but instead of retaining the first n characters, it chops off the last n.
1467
+ * @param {int} n Number of characters to chop.
1468
+ * @return {int} Length of the stream after the operation has completed.
1469
+ */
1470
+ pushback(n) {
1471
+ return this.less(this._leng - n);
1472
+ }
1473
+ /**
1474
+ * Similar to REJECT in lex, except it doesn't break the current execution context.
1475
+ * TIP: reject() should be the last instruction in a spec callback.
1476
+ */
1477
+ reject() {
1478
+ this._go = true;
1479
+ }
1480
+ /**
1481
+ * Analogous to BEGIN in lex - sets the named state (start condition).
1482
+ * @param {string|int} state Name of state to switch to, or ordinal number (0 is first, etc).
1483
+ * @return {string} The new state on successful switch, throws exception on failure.
1484
+ */
1485
+ begin(state) {
1486
+ if (this._lexer.specification[state]) {
1487
+ return this._newstate = state;
1488
+ }
1489
+ const s = this._lexer.states[parseInt(state)];
1490
+ if (s) {
1491
+ return this._newstate = s;
1492
+ }
1493
+ throw "Unknown state '" + state + "' requested";
1494
+ }
1495
+ /**
1496
+ * Simple accessor for reading in the current state.
1497
+ * @return {string} The current state.
1498
+ */
1499
+ state() {
1500
+ return this._state;
1501
+ }
1502
+ /**
1503
+ * Scan method to be returned to caller - grabs the next token and fires appropriate calback.
1504
+ * @return {T} The next token extracted from the stream.
1505
+ */
1506
+ scan() {
1507
+ if (this._pos >= this._input.length) {
1508
+ return EOF;
1509
+ }
1510
+ const str = this._input.substring(this._pos);
1511
+ const rules = this._lexer.specification[this._state];
1512
+ const matches = [];
1513
+ for (let i = 0; i < rules.length; i++) {
1514
+ const rule = rules[i];
1515
+ const mt = str.match(rule.re);
1516
+ if (mt !== null && mt[0].length > 0) {
1517
+ matches.push({
1518
+ index: i,
1519
+ text: mt[0],
1520
+ len: mt[0].length,
1521
+ rule
1522
+ });
1523
+ }
1524
+ }
1525
+ if (matches.length === 0) {
1526
+ throw new Error("No match found for input '" + str + "'");
1527
+ }
1528
+ matches.sort(matchcompare);
1529
+ this._go = true;
1530
+ let result;
1531
+ let m;
1532
+ for (let j = 0, n = matches.length; j < n && this._go; j++) {
1533
+ this._offset = 0;
1534
+ this._less = null;
1535
+ this._go = false;
1536
+ this._newstate = null;
1537
+ m = matches[j];
1538
+ this._text = m.text;
1539
+ this._leng = m.len;
1540
+ result = m.rule.action(this);
1541
+ if (this._newstate && this._newstate != this._state) {
1542
+ this._state = this._newstate;
1543
+ break;
1544
+ }
1545
+ }
1546
+ const text = this._less === null ? m.text : m.text.substring(0, this._less);
1547
+ const len = text.length;
1548
+ this._pos += len + this._offset;
1549
+ const nlm = text.match(/\n/g);
1550
+ if (nlm !== null) {
1551
+ this._line += nlm.length;
1552
+ this._col = len - text.lastIndexOf("\n") - 1;
1553
+ } else {
1554
+ this._col += len;
1555
+ }
1556
+ return result;
1557
+ }
1558
+ };
1559
+ var JSLex = class {
1560
+ constructor(spec3) {
1561
+ this.states = Object.keys(spec3);
1562
+ this.specification = {};
1563
+ for (const s of this.states) {
1564
+ const rule_map = spec3[s];
1565
+ if (s in this.specification) {
1566
+ throw "Duplicate state declaration encountered for state '" + s + "'";
1567
+ }
1568
+ this.specification[s] = [];
1569
+ for (const [k, v] of rule_map.entries()) {
1570
+ let re;
1571
+ try {
1572
+ re = new RegExp("^" + k);
1573
+ } catch (err) {
1574
+ throw "Invalid regexp '" + k + "' in state '" + s + "' (" + err.message + ")";
1575
+ }
1576
+ this.specification[s].push({
1577
+ re,
1578
+ action: v
1579
+ });
1580
+ }
1581
+ }
1582
+ }
1583
+ /**
1584
+ * Scanner function - makes a new scanner object which is used to get tokens one at a time.
1585
+ * @param {string} input Input text to tokenize.
1586
+ * @return {function} Scanner function.
1587
+ */
1588
+ scanner(input) {
1589
+ return new Scanner(input, this);
1590
+ }
1591
+ /**
1592
+ * Similar to lex's yylex() function, consumes all input, calling calback for each token.
1593
+ * @param {string} input Text to lex.
1594
+ * @param {function} callback Function to execute for each token.
1595
+ */
1596
+ lex(input, callback) {
1597
+ const scanner = this.scanner(input);
1598
+ while (true) {
1599
+ const token = scanner.scan();
1600
+ if (token === EOF) {
1601
+ return;
1602
+ }
1603
+ if (token !== void 0) {
1604
+ callback(token);
1605
+ }
1606
+ }
1607
+ }
1608
+ /**
1609
+ * Consumes all input, collecting tokens along the way.
1610
+ * @param {string} input Text to lex.
1611
+ * @return {array} List of tokens, may contain an Error at the end.
1612
+ */
1613
+ collect(input) {
1614
+ const tokens = [];
1615
+ const callback = function(item) {
1616
+ if (Array.isArray(item)) {
1617
+ tokens.push(...item);
1618
+ } else {
1619
+ tokens.push(item);
1620
+ }
1621
+ };
1622
+ this.lex(input, callback);
1623
+ return tokens;
1624
+ }
1625
+ };
1626
+
1394
1627
  // src/tex-parser.ts
1395
1628
  var UNARY_COMMANDS = [
1396
1629
  "sqrt",
@@ -1420,7 +1653,9 @@ var UNARY_COMMANDS = [
1420
1653
  "underline",
1421
1654
  "vec",
1422
1655
  "widehat",
1423
- "widetilde"
1656
+ "widetilde",
1657
+ "overleftarrow",
1658
+ "overrightarrow"
1424
1659
  ];
1425
1660
  var BINARY_COMMANDS = [
1426
1661
  "frac",
@@ -1469,13 +1704,6 @@ function eat_primes(tokens, start) {
1469
1704
  }
1470
1705
  return pos - start;
1471
1706
  }
1472
- function eat_command_name(latex, start) {
1473
- let pos = start;
1474
- while (pos < latex.length && isalpha(latex[pos])) {
1475
- pos += 1;
1476
- }
1477
- return latex.substring(start, pos);
1478
- }
1479
1707
  function find_closing_match(tokens, start, leftToken, rightToken) {
1480
1708
  assert(tokens[start].eq(leftToken));
1481
1709
  let count = 1;
@@ -1503,126 +1731,46 @@ var END_COMMAND = new TexToken(1 /* COMMAND */, "\\end");
1503
1731
  function find_closing_end_command(tokens, start) {
1504
1732
  return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
1505
1733
  }
1506
- function find_closing_curly_bracket_char(latex, start) {
1507
- assert(latex[start] === "{");
1508
- let count = 1;
1509
- let pos = start + 1;
1510
- while (count > 0) {
1511
- if (pos >= latex.length) {
1512
- throw new LatexParserError("Unmatched curly brackets");
1513
- }
1514
- if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
1515
- pos += 2;
1516
- continue;
1517
- }
1518
- if (latex[pos] === "{") {
1519
- count += 1;
1520
- } else if (latex[pos] === "}") {
1521
- count -= 1;
1522
- }
1523
- pos += 1;
1734
+ function unescape(str) {
1735
+ const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
1736
+ for (const char of chars) {
1737
+ str = str.replaceAll("\\" + char, char);
1524
1738
  }
1525
- return pos - 1;
1739
+ return str;
1526
1740
  }
1527
- function tokenize(latex) {
1528
- const tokens = [];
1529
- let pos = 0;
1530
- while (pos < latex.length) {
1531
- const firstChar = latex[pos];
1532
- let token;
1533
- switch (firstChar) {
1534
- case "%": {
1535
- let newPos = pos + 1;
1536
- while (newPos < latex.length && latex[newPos] !== "\n") {
1537
- newPos += 1;
1538
- }
1539
- token = new TexToken(3 /* COMMENT */, latex.slice(pos + 1, newPos));
1540
- pos = newPos;
1541
- break;
1542
- }
1543
- case "{":
1544
- case "}":
1545
- case "_":
1546
- case "^":
1547
- case "&":
1548
- token = new TexToken(6 /* CONTROL */, firstChar);
1549
- pos++;
1550
- break;
1551
- case "\n":
1552
- token = new TexToken(5 /* NEWLINE */, firstChar);
1553
- pos++;
1554
- break;
1555
- case "\r": {
1556
- if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
1557
- token = new TexToken(5 /* NEWLINE */, "\n");
1558
- pos += 2;
1559
- } else {
1560
- token = new TexToken(5 /* NEWLINE */, "\n");
1561
- pos++;
1562
- }
1563
- break;
1564
- }
1565
- case " ": {
1566
- let newPos = pos;
1567
- while (newPos < latex.length && latex[newPos] === " ") {
1568
- newPos += 1;
1569
- }
1570
- token = new TexToken(4 /* SPACE */, latex.slice(pos, newPos));
1571
- pos = newPos;
1572
- break;
1573
- }
1574
- case "\\": {
1575
- if (pos + 1 >= latex.length) {
1576
- throw new LatexParserError("Expecting command name after \\");
1577
- }
1578
- const firstTwoChars = latex.slice(pos, pos + 2);
1579
- if (["\\\\", "\\,"].includes(firstTwoChars)) {
1580
- token = new TexToken(6 /* CONTROL */, firstTwoChars);
1581
- } else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_", "\\|"].includes(firstTwoChars)) {
1582
- token = new TexToken(0 /* ELEMENT */, firstTwoChars);
1583
- } else {
1584
- const command = eat_command_name(latex, pos + 1);
1585
- token = new TexToken(1 /* COMMAND */, "\\" + command);
1586
- }
1587
- pos += token.value.length;
1588
- break;
1589
- }
1590
- default: {
1591
- if (isdigit(firstChar)) {
1592
- let newPos = pos;
1593
- while (newPos < latex.length && isdigit(latex[newPos])) {
1594
- newPos += 1;
1595
- }
1596
- token = new TexToken(0 /* ELEMENT */, latex.slice(pos, newPos));
1597
- } else if (isalpha(firstChar)) {
1598
- token = new TexToken(0 /* ELEMENT */, firstChar);
1599
- } else if ("+-*/='<>!.,;:?()[]|".includes(firstChar)) {
1600
- token = new TexToken(0 /* ELEMENT */, firstChar);
1601
- } else {
1602
- token = new TexToken(7 /* UNKNOWN */, firstChar);
1603
- }
1604
- pos += token.value.length;
1605
- }
1606
- }
1607
- tokens.push(token);
1608
- if (token.type === 1 /* COMMAND */ && ["\\text", "\\operatorname", "\\begin", "\\end"].includes(token.value)) {
1609
- if (pos >= latex.length || latex[pos] !== "{") {
1610
- throw new LatexParserError(`No content for ${token.value} command`);
1611
- }
1612
- tokens.push(new TexToken(6 /* CONTROL */, "{"));
1613
- const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
1614
- pos++;
1615
- let textInside = latex.slice(pos, posClosingBracket);
1616
- const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
1617
- for (const char of chars) {
1618
- textInside = textInside.replaceAll("\\" + char, char);
1619
- }
1620
- tokens.push(new TexToken(2 /* TEXT */, textInside));
1621
- tokens.push(new TexToken(6 /* CONTROL */, "}"));
1622
- pos = posClosingBracket + 1;
1623
- }
1624
- }
1625
- return tokens;
1741
+ var rules_map = /* @__PURE__ */ new Map([
1742
+ [
1743
+ String.raw`\\(text|operatorname|begin|end){.+?}`,
1744
+ (s) => {
1745
+ const text = s.text();
1746
+ const command = text.substring(0, text.indexOf("{"));
1747
+ const text_inside = text.substring(text.indexOf("{") + 1, text.lastIndexOf("}"));
1748
+ return [
1749
+ new TexToken(1 /* COMMAND */, command),
1750
+ new TexToken(6 /* CONTROL */, "{"),
1751
+ new TexToken(2 /* TEXT */, unescape(text_inside)),
1752
+ new TexToken(6 /* CONTROL */, "}")
1753
+ ];
1754
+ }
1755
+ ],
1756
+ [String.raw`%[^\n]*`, (s) => new TexToken(3 /* COMMENT */, s.text().substring(1))],
1757
+ [String.raw`[{}_^&]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
1758
+ [String.raw`\r?\n`, (_s) => new TexToken(5 /* NEWLINE */, "\n")],
1759
+ [String.raw`\s+`, (s) => new TexToken(4 /* SPACE */, s.text())],
1760
+ [String.raw`\\[\\,]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
1761
+ [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1762
+ [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(1 /* COMMAND */, s.text())],
1763
+ [String.raw`[0-9]+`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1764
+ [String.raw`[a-zA-Z]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1765
+ [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1766
+ [String.raw`.`, (s) => new TexToken(7 /* UNKNOWN */, s.text())]
1767
+ ]);
1768
+ var spec = {
1769
+ "start": rules_map
1770
+ };
1771
+ function tokenize_tex(input) {
1772
+ const lexer = new JSLex(spec);
1773
+ return lexer.collect(input);
1626
1774
  }
1627
1775
  var LatexParserError = class extends Error {
1628
1776
  constructor(message) {
@@ -1938,7 +2086,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
1938
2086
  let out_tokens = [];
1939
2087
  for (const token of tokens) {
1940
2088
  if (token.type === 1 /* COMMAND */ && customTexMacros[token.value]) {
1941
- const expanded_tokens = tokenize(customTexMacros[token.value]);
2089
+ const expanded_tokens = tokenize_tex(customTexMacros[token.value]);
1942
2090
  out_tokens = out_tokens.concat(expanded_tokens);
1943
2091
  } else {
1944
2092
  out_tokens.push(token);
@@ -1948,7 +2096,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
1948
2096
  }
1949
2097
  function parseTex(tex, customTexMacros) {
1950
2098
  const parser = new LatexParser();
1951
- let tokens = tokenize(tex);
2099
+ let tokens = tokenize_tex(tex);
1952
2100
  tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
1953
2101
  tokens = passExpandCustomTexMacros(tokens, customTexMacros);
1954
2102
  return parser.parse(tokens);
@@ -2486,6 +2634,20 @@ function convert_tex_node_to_typst(node, options = {}) {
2486
2634
  if (node.content === "\\mathbb" && arg0.type === "atom" && /^[A-Z]$/.test(arg0.content)) {
2487
2635
  return new TypstNode("symbol", arg0.content + arg0.content);
2488
2636
  }
2637
+ if (node.content === "\\overrightarrow") {
2638
+ return new TypstNode(
2639
+ "funcCall",
2640
+ "arrow",
2641
+ [arg0]
2642
+ );
2643
+ }
2644
+ if (node.content === "\\overleftarrow") {
2645
+ return new TypstNode(
2646
+ "funcCall",
2647
+ "accent",
2648
+ [arg0, new TypstNode("symbol", "arrow.l")]
2649
+ );
2650
+ }
2489
2651
  if (node.content === "\\operatorname") {
2490
2652
  const body = node.args;
2491
2653
  if (body.length !== 1 || body[0].type !== "text") {
@@ -2772,139 +2934,67 @@ function eat_primes2(tokens, start) {
2772
2934
  }
2773
2935
  return pos - start;
2774
2936
  }
2775
- function eat_identifier_name(typst, start) {
2776
- let pos = start;
2777
- while (pos < typst.length && (isalpha(typst[pos]) || typst[pos] === ".")) {
2778
- pos += 1;
2779
- }
2780
- return typst.substring(start, pos);
2781
- }
2782
- function try_eat_shorthand(typst, start) {
2783
- for (const shorthand of TYPST_SHORTHANDS) {
2784
- if (typst.startsWith(shorthand, start)) {
2785
- return shorthand;
2786
- }
2787
- }
2788
- return null;
2937
+ function generate_regex_for_shorthands() {
2938
+ const regex_list = TYPST_SHORTHANDS.map((s) => {
2939
+ s = s.replaceAll("|", "\\|");
2940
+ s = s.replaceAll(".", "\\.");
2941
+ s = s.replaceAll("[", "\\[");
2942
+ s = s.replaceAll("]", "\\]");
2943
+ return s;
2944
+ });
2945
+ return `(${regex_list.join("|")})`;
2789
2946
  }
2790
- function tokenize_typst(typst) {
2791
- const tokens = [];
2792
- let pos = 0;
2793
- while (pos < typst.length) {
2794
- const firstChar = typst[pos];
2795
- let token;
2796
- switch (firstChar) {
2797
- case "_":
2798
- case "^":
2799
- case "&":
2800
- token = new TypstToken(6 /* CONTROL */, firstChar);
2801
- pos++;
2802
- break;
2803
- case "\n":
2804
- token = new TypstToken(7 /* NEWLINE */, firstChar);
2805
- pos++;
2806
- break;
2807
- case "\r": {
2808
- if (pos + 1 < typst.length && typst[pos + 1] === "\n") {
2809
- token = new TypstToken(7 /* NEWLINE */, "\n");
2810
- pos += 2;
2811
- } else {
2812
- token = new TypstToken(7 /* NEWLINE */, "\n");
2813
- pos++;
2814
- }
2815
- break;
2816
- }
2817
- case " ": {
2818
- let newPos = pos;
2819
- while (newPos < typst.length && typst[newPos] === " ") {
2820
- newPos++;
2821
- }
2822
- token = new TypstToken(4 /* SPACE */, typst.substring(pos, newPos));
2823
- pos = newPos;
2824
- break;
2825
- }
2826
- case "/": {
2827
- if (pos < typst.length && typst[pos + 1] === "/") {
2828
- let newPos = pos + 2;
2829
- while (newPos < typst.length && typst[newPos] !== "\n") {
2830
- newPos++;
2831
- }
2832
- token = new TypstToken(3 /* COMMENT */, typst.slice(pos + 2, newPos));
2833
- pos = newPos;
2834
- } else {
2835
- token = new TypstToken(1 /* ELEMENT */, "/");
2836
- pos++;
2837
- }
2838
- break;
2839
- }
2840
- case "\\": {
2841
- if (pos + 1 >= typst.length) {
2842
- throw new Error("Expecting a character after \\");
2843
- }
2844
- const firstTwoChars = typst.substring(pos, pos + 2);
2845
- if (["\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
2846
- token = new TypstToken(1 /* ELEMENT */, firstTwoChars);
2847
- pos += 2;
2848
- } else if (["\\\n", "\\ "].includes(firstTwoChars)) {
2849
- token = new TypstToken(6 /* CONTROL */, "\\");
2850
- pos += 1;
2851
- } else {
2852
- token = new TypstToken(6 /* CONTROL */, "");
2853
- pos++;
2854
- }
2855
- break;
2856
- }
2857
- case '"': {
2858
- let newPos = pos + 1;
2859
- while (newPos < typst.length) {
2860
- if (typst[newPos] === '"' && typst[newPos - 1] !== "\\") {
2861
- break;
2862
- }
2863
- newPos++;
2864
- }
2865
- let text = typst.substring(pos + 1, newPos);
2866
- const chars = ['"', "\\"];
2867
- for (const char of chars) {
2868
- text = text.replaceAll("\\" + char, char);
2869
- }
2870
- token = new TypstToken(2 /* TEXT */, text);
2871
- pos = newPos + 1;
2872
- break;
2873
- }
2874
- default: {
2875
- const shorthand = try_eat_shorthand(typst, pos);
2876
- if (shorthand !== null) {
2877
- token = new TypstToken(0 /* SYMBOL */, reverseShorthandMap.get(shorthand));
2878
- pos += shorthand.length;
2879
- break;
2880
- }
2881
- if (isdigit(firstChar)) {
2882
- let newPos = pos;
2883
- while (newPos < typst.length && isdigit(typst[newPos])) {
2884
- newPos += 1;
2885
- }
2886
- if (newPos < typst.length && typst[newPos] === ".") {
2887
- newPos += 1;
2888
- while (newPos < typst.length && isdigit(typst[newPos])) {
2889
- newPos += 1;
2890
- }
2891
- }
2892
- token = new TypstToken(1 /* ELEMENT */, typst.slice(pos, newPos));
2893
- } else if ("+-*/='<>!.,;?()[]|".includes(firstChar)) {
2894
- token = new TypstToken(1 /* ELEMENT */, firstChar);
2895
- } else if (isalpha(firstChar)) {
2896
- const identifier = eat_identifier_name(typst, pos);
2897
- const _type = identifier.length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */;
2898
- token = new TypstToken(_type, identifier);
2899
- } else {
2900
- token = new TypstToken(1 /* ELEMENT */, firstChar);
2901
- }
2902
- pos += token.value.length;
2903
- }
2904
- }
2905
- tokens.push(token);
2906
- }
2907
- return tokens;
2947
+ var REGEX_SHORTHANDS = generate_regex_for_shorthands();
2948
+ var rules_map2 = /* @__PURE__ */ new Map([
2949
+ [String.raw`//[^\n]*`, (s) => new TypstToken(3 /* COMMENT */, s.text().substring(2))],
2950
+ [String.raw`/`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2951
+ [String.raw`[_^&]`, (s) => new TypstToken(6 /* CONTROL */, s.text())],
2952
+ [String.raw`\r?\n`, (_s) => new TypstToken(7 /* NEWLINE */, "\n")],
2953
+ [String.raw`\s+`, (s) => new TypstToken(4 /* SPACE */, s.text())],
2954
+ [String.raw`\\[$&#_]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2955
+ [String.raw`\\\n`, (s) => {
2956
+ return [
2957
+ new TypstToken(6 /* CONTROL */, "\\"),
2958
+ new TypstToken(7 /* NEWLINE */, "\n")
2959
+ ];
2960
+ }],
2961
+ [String.raw`\\\s`, (s) => {
2962
+ return [
2963
+ new TypstToken(6 /* CONTROL */, "\\"),
2964
+ new TypstToken(4 /* SPACE */, " ")
2965
+ ];
2966
+ }],
2967
+ // this backslash is dummy and will be ignored in later stages
2968
+ [String.raw`\\\S`, (_s) => new TypstToken(6 /* CONTROL */, "")],
2969
+ [
2970
+ String.raw`"([^"]|(\\"))*"`,
2971
+ (s) => {
2972
+ const text = s.text().substring(1, s.text().length - 1);
2973
+ text.replaceAll('\\"', '"');
2974
+ return new TypstToken(2 /* TEXT */, text);
2975
+ }
2976
+ ],
2977
+ [
2978
+ REGEX_SHORTHANDS,
2979
+ (s) => {
2980
+ const shorthand = s.text();
2981
+ const symbol = reverseShorthandMap.get(shorthand);
2982
+ return new TypstToken(0 /* SYMBOL */, symbol);
2983
+ }
2984
+ ],
2985
+ [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2986
+ [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2987
+ [String.raw`[a-zA-Z\.]+`, (s) => {
2988
+ return new TypstToken(s.text().length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */, s.text());
2989
+ }],
2990
+ [String.raw`.`, (s) => new TypstToken(1 /* ELEMENT */, s.text())]
2991
+ ]);
2992
+ var spec2 = {
2993
+ "start": rules_map2
2994
+ };
2995
+ function tokenize_typst(input) {
2996
+ const lexer = new JSLex(spec2);
2997
+ return lexer.collect(input);
2908
2998
  }
2909
2999
  function find_closing_match2(tokens, start) {
2910
3000
  assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET2]));