tex2typst 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,5 +1,8 @@
1
1
  // src/map.ts
2
2
  var symbolMap = /* @__PURE__ */ new Map([
3
+ [",", "thin"],
4
+ [":", "med"],
5
+ [";", "thick"],
3
6
  ["cos", "cos"],
4
7
  ["sin", "sin"],
5
8
  ["tan", "tan"],
@@ -1382,15 +1385,248 @@ var TypstNode = class {
1382
1385
  function isalpha(char) {
1383
1386
  return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
1384
1387
  }
1385
- function isdigit(char) {
1386
- return "0123456789".includes(char);
1387
- }
1388
1388
  function assert(condition, message = "") {
1389
1389
  if (!condition) {
1390
1390
  throw new Error(message);
1391
1391
  }
1392
1392
  }
1393
1393
 
1394
+ // src/jslex.ts
1395
+ var EOF = {};
1396
+ function matchcompare(m1, m2) {
1397
+ if (m2.len !== m1.len) {
1398
+ return m2.len - m1.len;
1399
+ } else {
1400
+ return m1.index - m2.index;
1401
+ }
1402
+ }
1403
+ var Scanner = class {
1404
+ constructor(input, lexer) {
1405
+ // position within input stream
1406
+ this._pos = 0;
1407
+ // current line number
1408
+ this._line = 0;
1409
+ // current column number
1410
+ this._col = 0;
1411
+ this._offset = 0;
1412
+ this._less = null;
1413
+ this._go = false;
1414
+ this._newstate = null;
1415
+ this._text = null;
1416
+ this._leng = null;
1417
+ this._input = input;
1418
+ this._lexer = lexer;
1419
+ this._state = lexer.states[0];
1420
+ }
1421
+ /**
1422
+ * Analogous to yytext and yyleng in lex - will be set during scan.
1423
+ */
1424
+ text() {
1425
+ return this._text;
1426
+ }
1427
+ leng() {
1428
+ return this._leng;
1429
+ }
1430
+ /**
1431
+ * Position of in stream, line number and column number of match.
1432
+ */
1433
+ pos() {
1434
+ return this._pos;
1435
+ }
1436
+ line() {
1437
+ return this._line;
1438
+ }
1439
+ column() {
1440
+ return this._col;
1441
+ }
1442
+ /**
1443
+ * Analogous to input() in lex.
1444
+ * @return {string} The next character in the stream.
1445
+ */
1446
+ input() {
1447
+ return this._input.charAt(this._pos + this._leng + this._offset++);
1448
+ }
1449
+ /**
1450
+ * Similar to unput() in lex, but does not allow modifying the stream.
1451
+ * @return {int} The offset position after the operation.
1452
+ */
1453
+ unput() {
1454
+ return this._offset = this._offset > 0 ? this._offset-- : 0;
1455
+ }
1456
+ /**
1457
+ * Analogous to yyless(n) in lex - retains the first n characters from this pattern, and returns
1458
+ * the rest to the input stream, such that they will be used in the next pattern-matching operation.
1459
+ * @param {int} n Number of characters to retain.
1460
+ * @return {int} Length of the stream after the operation has completed.
1461
+ */
1462
+ less(n) {
1463
+ this._less = n;
1464
+ this._offset = 0;
1465
+ this._text = this._text.substring(0, n);
1466
+ return this._leng = this._text.length;
1467
+ }
1468
+ /**
1469
+ * Like less(), but instead of retaining the first n characters, it chops off the last n.
1470
+ * @param {int} n Number of characters to chop.
1471
+ * @return {int} Length of the stream after the operation has completed.
1472
+ */
1473
+ pushback(n) {
1474
+ return this.less(this._leng - n);
1475
+ }
1476
+ /**
1477
+ * Similar to REJECT in lex, except it doesn't break the current execution context.
1478
+ * TIP: reject() should be the last instruction in a spec callback.
1479
+ */
1480
+ reject() {
1481
+ this._go = true;
1482
+ }
1483
+ /**
1484
+ * Analogous to BEGIN in lex - sets the named state (start condition).
1485
+ * @param {string|int} state Name of state to switch to, or ordinal number (0 is first, etc).
1486
+ * @return {string} The new state on successful switch, throws exception on failure.
1487
+ */
1488
+ begin(state) {
1489
+ if (this._lexer.specification[state]) {
1490
+ return this._newstate = state;
1491
+ }
1492
+ const s = this._lexer.states[parseInt(state)];
1493
+ if (s) {
1494
+ return this._newstate = s;
1495
+ }
1496
+ throw "Unknown state '" + state + "' requested";
1497
+ }
1498
+ /**
1499
+ * Simple accessor for reading in the current state.
1500
+ * @return {string} The current state.
1501
+ */
1502
+ state() {
1503
+ return this._state;
1504
+ }
1505
+ /**
1506
+ * Scan method to be returned to caller - grabs the next token and fires appropriate calback.
1507
+ * @return {T} The next token extracted from the stream.
1508
+ */
1509
+ scan() {
1510
+ if (this._pos >= this._input.length) {
1511
+ return EOF;
1512
+ }
1513
+ const str = this._input.substring(this._pos);
1514
+ const rules = this._lexer.specification[this._state];
1515
+ const matches = [];
1516
+ for (let i = 0; i < rules.length; i++) {
1517
+ const rule = rules[i];
1518
+ const mt = str.match(rule.re);
1519
+ if (mt !== null && mt[0].length > 0) {
1520
+ matches.push({
1521
+ index: i,
1522
+ text: mt[0],
1523
+ len: mt[0].length,
1524
+ rule
1525
+ });
1526
+ }
1527
+ }
1528
+ if (matches.length === 0) {
1529
+ throw new Error("No match found for input '" + str + "'");
1530
+ }
1531
+ matches.sort(matchcompare);
1532
+ this._go = true;
1533
+ let result;
1534
+ let m;
1535
+ for (let j = 0, n = matches.length; j < n && this._go; j++) {
1536
+ this._offset = 0;
1537
+ this._less = null;
1538
+ this._go = false;
1539
+ this._newstate = null;
1540
+ m = matches[j];
1541
+ this._text = m.text;
1542
+ this._leng = m.len;
1543
+ result = m.rule.action(this);
1544
+ if (this._newstate && this._newstate != this._state) {
1545
+ this._state = this._newstate;
1546
+ break;
1547
+ }
1548
+ }
1549
+ const text = this._less === null ? m.text : m.text.substring(0, this._less);
1550
+ const len = text.length;
1551
+ this._pos += len + this._offset;
1552
+ const nlm = text.match(/\n/g);
1553
+ if (nlm !== null) {
1554
+ this._line += nlm.length;
1555
+ this._col = len - text.lastIndexOf("\n") - 1;
1556
+ } else {
1557
+ this._col += len;
1558
+ }
1559
+ return result;
1560
+ }
1561
+ };
1562
+ var JSLex = class {
1563
+ constructor(spec3) {
1564
+ this.states = Object.keys(spec3);
1565
+ this.specification = {};
1566
+ for (const s of this.states) {
1567
+ const rule_map = spec3[s];
1568
+ if (s in this.specification) {
1569
+ throw "Duplicate state declaration encountered for state '" + s + "'";
1570
+ }
1571
+ this.specification[s] = [];
1572
+ for (const [k, v] of rule_map.entries()) {
1573
+ let re;
1574
+ try {
1575
+ re = new RegExp("^" + k);
1576
+ } catch (err) {
1577
+ throw "Invalid regexp '" + k + "' in state '" + s + "' (" + err.message + ")";
1578
+ }
1579
+ this.specification[s].push({
1580
+ re,
1581
+ action: v
1582
+ });
1583
+ }
1584
+ }
1585
+ }
1586
+ /**
1587
+ * Scanner function - makes a new scanner object which is used to get tokens one at a time.
1588
+ * @param {string} input Input text to tokenize.
1589
+ * @return {function} Scanner function.
1590
+ */
1591
+ scanner(input) {
1592
+ return new Scanner(input, this);
1593
+ }
1594
+ /**
1595
+ * Similar to lex's yylex() function, consumes all input, calling calback for each token.
1596
+ * @param {string} input Text to lex.
1597
+ * @param {function} callback Function to execute for each token.
1598
+ */
1599
+ lex(input, callback) {
1600
+ const scanner = this.scanner(input);
1601
+ while (true) {
1602
+ const token = scanner.scan();
1603
+ if (token === EOF) {
1604
+ return;
1605
+ }
1606
+ if (token !== void 0) {
1607
+ callback(token);
1608
+ }
1609
+ }
1610
+ }
1611
+ /**
1612
+ * Consumes all input, collecting tokens along the way.
1613
+ * @param {string} input Text to lex.
1614
+ * @return {array} List of tokens, may contain an Error at the end.
1615
+ */
1616
+ collect(input) {
1617
+ const tokens = [];
1618
+ const callback = function(item) {
1619
+ if (Array.isArray(item)) {
1620
+ tokens.push(...item);
1621
+ } else {
1622
+ tokens.push(item);
1623
+ }
1624
+ };
1625
+ this.lex(input, callback);
1626
+ return tokens;
1627
+ }
1628
+ };
1629
+
1394
1630
  // src/tex-parser.ts
1395
1631
  var UNARY_COMMANDS = [
1396
1632
  "sqrt",
@@ -1420,7 +1656,10 @@ var UNARY_COMMANDS = [
1420
1656
  "underline",
1421
1657
  "vec",
1422
1658
  "widehat",
1423
- "widetilde"
1659
+ "widetilde",
1660
+ "overleftarrow",
1661
+ "overrightarrow",
1662
+ "hspace"
1424
1663
  ];
1425
1664
  var BINARY_COMMANDS = [
1426
1665
  "frac",
@@ -1469,13 +1708,6 @@ function eat_primes(tokens, start) {
1469
1708
  }
1470
1709
  return pos - start;
1471
1710
  }
1472
- function eat_command_name(latex, start) {
1473
- let pos = start;
1474
- while (pos < latex.length && isalpha(latex[pos])) {
1475
- pos += 1;
1476
- }
1477
- return latex.substring(start, pos);
1478
- }
1479
1711
  function find_closing_match(tokens, start, leftToken, rightToken) {
1480
1712
  assert(tokens[start].eq(leftToken));
1481
1713
  let count = 1;
@@ -1503,126 +1735,46 @@ var END_COMMAND = new TexToken(1 /* COMMAND */, "\\end");
1503
1735
  function find_closing_end_command(tokens, start) {
1504
1736
  return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
1505
1737
  }
1506
- function find_closing_curly_bracket_char(latex, start) {
1507
- assert(latex[start] === "{");
1508
- let count = 1;
1509
- let pos = start + 1;
1510
- while (count > 0) {
1511
- if (pos >= latex.length) {
1512
- throw new LatexParserError("Unmatched curly brackets");
1513
- }
1514
- if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
1515
- pos += 2;
1516
- continue;
1517
- }
1518
- if (latex[pos] === "{") {
1519
- count += 1;
1520
- } else if (latex[pos] === "}") {
1521
- count -= 1;
1522
- }
1523
- pos += 1;
1738
+ function unescape(str) {
1739
+ const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
1740
+ for (const char of chars) {
1741
+ str = str.replaceAll("\\" + char, char);
1524
1742
  }
1525
- return pos - 1;
1743
+ return str;
1526
1744
  }
1527
- function tokenize(latex) {
1528
- const tokens = [];
1529
- let pos = 0;
1530
- while (pos < latex.length) {
1531
- const firstChar = latex[pos];
1532
- let token;
1533
- switch (firstChar) {
1534
- case "%": {
1535
- let newPos = pos + 1;
1536
- while (newPos < latex.length && latex[newPos] !== "\n") {
1537
- newPos += 1;
1538
- }
1539
- token = new TexToken(3 /* COMMENT */, latex.slice(pos + 1, newPos));
1540
- pos = newPos;
1541
- break;
1542
- }
1543
- case "{":
1544
- case "}":
1545
- case "_":
1546
- case "^":
1547
- case "&":
1548
- token = new TexToken(6 /* CONTROL */, firstChar);
1549
- pos++;
1550
- break;
1551
- case "\n":
1552
- token = new TexToken(5 /* NEWLINE */, firstChar);
1553
- pos++;
1554
- break;
1555
- case "\r": {
1556
- if (pos + 1 < latex.length && latex[pos + 1] === "\n") {
1557
- token = new TexToken(5 /* NEWLINE */, "\n");
1558
- pos += 2;
1559
- } else {
1560
- token = new TexToken(5 /* NEWLINE */, "\n");
1561
- pos++;
1562
- }
1563
- break;
1564
- }
1565
- case " ": {
1566
- let newPos = pos;
1567
- while (newPos < latex.length && latex[newPos] === " ") {
1568
- newPos += 1;
1569
- }
1570
- token = new TexToken(4 /* SPACE */, latex.slice(pos, newPos));
1571
- pos = newPos;
1572
- break;
1573
- }
1574
- case "\\": {
1575
- if (pos + 1 >= latex.length) {
1576
- throw new LatexParserError("Expecting command name after \\");
1577
- }
1578
- const firstTwoChars = latex.slice(pos, pos + 2);
1579
- if (["\\\\", "\\,"].includes(firstTwoChars)) {
1580
- token = new TexToken(6 /* CONTROL */, firstTwoChars);
1581
- } else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_", "\\|"].includes(firstTwoChars)) {
1582
- token = new TexToken(0 /* ELEMENT */, firstTwoChars);
1583
- } else {
1584
- const command = eat_command_name(latex, pos + 1);
1585
- token = new TexToken(1 /* COMMAND */, "\\" + command);
1586
- }
1587
- pos += token.value.length;
1588
- break;
1589
- }
1590
- default: {
1591
- if (isdigit(firstChar)) {
1592
- let newPos = pos;
1593
- while (newPos < latex.length && isdigit(latex[newPos])) {
1594
- newPos += 1;
1595
- }
1596
- token = new TexToken(0 /* ELEMENT */, latex.slice(pos, newPos));
1597
- } else if (isalpha(firstChar)) {
1598
- token = new TexToken(0 /* ELEMENT */, firstChar);
1599
- } else if ("+-*/='<>!.,;:?()[]|".includes(firstChar)) {
1600
- token = new TexToken(0 /* ELEMENT */, firstChar);
1601
- } else {
1602
- token = new TexToken(7 /* UNKNOWN */, firstChar);
1603
- }
1604
- pos += token.value.length;
1605
- }
1606
- }
1607
- tokens.push(token);
1608
- if (token.type === 1 /* COMMAND */ && ["\\text", "\\operatorname", "\\begin", "\\end"].includes(token.value)) {
1609
- if (pos >= latex.length || latex[pos] !== "{") {
1610
- throw new LatexParserError(`No content for ${token.value} command`);
1611
- }
1612
- tokens.push(new TexToken(6 /* CONTROL */, "{"));
1613
- const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
1614
- pos++;
1615
- let textInside = latex.slice(pos, posClosingBracket);
1616
- const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
1617
- for (const char of chars) {
1618
- textInside = textInside.replaceAll("\\" + char, char);
1619
- }
1620
- tokens.push(new TexToken(2 /* TEXT */, textInside));
1621
- tokens.push(new TexToken(6 /* CONTROL */, "}"));
1622
- pos = posClosingBracket + 1;
1623
- }
1624
- }
1625
- return tokens;
1745
+ var rules_map = /* @__PURE__ */ new Map([
1746
+ [
1747
+ String.raw`\\(text|operatorname|begin|end|hspace){.+?}`,
1748
+ (s) => {
1749
+ const text = s.text();
1750
+ const command = text.substring(0, text.indexOf("{"));
1751
+ const text_inside = text.substring(text.indexOf("{") + 1, text.lastIndexOf("}"));
1752
+ return [
1753
+ new TexToken(1 /* COMMAND */, command),
1754
+ new TexToken(6 /* CONTROL */, "{"),
1755
+ new TexToken(2 /* TEXT */, unescape(text_inside)),
1756
+ new TexToken(6 /* CONTROL */, "}")
1757
+ ];
1758
+ }
1759
+ ],
1760
+ [String.raw`%[^\n]*`, (s) => new TexToken(3 /* COMMENT */, s.text().substring(1))],
1761
+ [String.raw`[{}_^&]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
1762
+ [String.raw`\r?\n`, (_s) => new TexToken(5 /* NEWLINE */, "\n")],
1763
+ [String.raw`\s+`, (s) => new TexToken(4 /* SPACE */, s.text())],
1764
+ [String.raw`\\[\\,:;]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
1765
+ [String.raw`\\[{}%$&#_|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1766
+ [String.raw`\\[a-zA-Z]+`, (s) => new TexToken(1 /* COMMAND */, s.text())],
1767
+ [String.raw`[0-9]+`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1768
+ [String.raw`[a-zA-Z]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1769
+ [String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
1770
+ [String.raw`.`, (s) => new TexToken(7 /* UNKNOWN */, s.text())]
1771
+ ]);
1772
+ var spec = {
1773
+ "start": rules_map
1774
+ };
1775
+ function tokenize_tex(input) {
1776
+ const lexer = new JSLex(spec);
1777
+ return lexer.collect(input);
1626
1778
  }
1627
1779
  var LatexParserError = class extends Error {
1628
1780
  constructor(message) {
@@ -1755,9 +1907,10 @@ var LatexParser = class {
1755
1907
  case "}":
1756
1908
  throw new LatexParserError("Unmatched '}'");
1757
1909
  case "\\\\":
1758
- return [new TexNode("control", "\\\\"), start + 1];
1759
1910
  case "\\,":
1760
- return [new TexNode("control", "\\,"), start + 1];
1911
+ case "\\:":
1912
+ case "\\;":
1913
+ return [new TexNode("control", controlChar), start + 1];
1761
1914
  case "_":
1762
1915
  case "^":
1763
1916
  return [EMPTY_NODE, start];
@@ -1938,7 +2091,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
1938
2091
  let out_tokens = [];
1939
2092
  for (const token of tokens) {
1940
2093
  if (token.type === 1 /* COMMAND */ && customTexMacros[token.value]) {
1941
- const expanded_tokens = tokenize(customTexMacros[token.value]);
2094
+ const expanded_tokens = tokenize_tex(customTexMacros[token.value]);
1942
2095
  out_tokens = out_tokens.concat(expanded_tokens);
1943
2096
  } else {
1944
2097
  out_tokens.push(token);
@@ -1948,7 +2101,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
1948
2101
  }
1949
2102
  function parseTex(tex, customTexMacros) {
1950
2103
  const parser = new LatexParser();
1951
- let tokens = tokenize(tex);
2104
+ let tokens = tokenize_tex(tex);
1952
2105
  tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
1953
2106
  tokens = passExpandCustomTexMacros(tokens, customTexMacros);
1954
2107
  return parser.parse(tokens);
@@ -2486,22 +2639,40 @@ function convert_tex_node_to_typst(node, options = {}) {
2486
2639
  if (node.content === "\\mathbb" && arg0.type === "atom" && /^[A-Z]$/.test(arg0.content)) {
2487
2640
  return new TypstNode("symbol", arg0.content + arg0.content);
2488
2641
  }
2642
+ if (node.content === "\\overrightarrow") {
2643
+ return new TypstNode(
2644
+ "funcCall",
2645
+ "arrow",
2646
+ [arg0]
2647
+ );
2648
+ }
2649
+ if (node.content === "\\overleftarrow") {
2650
+ return new TypstNode(
2651
+ "funcCall",
2652
+ "accent",
2653
+ [arg0, new TypstNode("symbol", "arrow.l")]
2654
+ );
2655
+ }
2489
2656
  if (node.content === "\\operatorname") {
2490
- const body = node.args;
2491
- if (body.length !== 1 || body[0].type !== "text") {
2492
- throw new TypstWriterError(`Expecting body of \\operatorname to be text but got`, node);
2493
- }
2494
- const text = body[0].content;
2657
+ const text = arg0.content;
2495
2658
  if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
2496
2659
  return new TypstNode("symbol", text);
2497
2660
  } else {
2498
2661
  return new TypstNode(
2499
2662
  "funcCall",
2500
2663
  "op",
2501
- [new TypstNode("text", text)]
2664
+ [arg0]
2502
2665
  );
2503
2666
  }
2504
2667
  }
2668
+ if (node.content === "\\hspace") {
2669
+ const text = arg0.content;
2670
+ return new TypstNode(
2671
+ "funcCall",
2672
+ "#h",
2673
+ [new TypstNode("symbol", text)]
2674
+ );
2675
+ }
2505
2676
  return new TypstNode(
2506
2677
  "funcCall",
2507
2678
  tex_token_to_typst(node.content),
@@ -2550,8 +2721,9 @@ function convert_tex_node_to_typst(node, options = {}) {
2550
2721
  case "control":
2551
2722
  if (node.content === "\\\\") {
2552
2723
  return new TypstNode("symbol", "\\");
2553
- } else if (node.content === "\\,") {
2554
- return new TypstNode("symbol", "thin");
2724
+ } else if (symbolMap.has(node.content.substring(1))) {
2725
+ const typst_symbol = symbolMap.get(node.content.substring(1));
2726
+ return new TypstNode("symbol", typst_symbol);
2555
2727
  } else {
2556
2728
  throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
2557
2729
  }
@@ -2772,139 +2944,67 @@ function eat_primes2(tokens, start) {
2772
2944
  }
2773
2945
  return pos - start;
2774
2946
  }
2775
- function eat_identifier_name(typst, start) {
2776
- let pos = start;
2777
- while (pos < typst.length && (isalpha(typst[pos]) || typst[pos] === ".")) {
2778
- pos += 1;
2779
- }
2780
- return typst.substring(start, pos);
2947
+ function generate_regex_for_shorthands() {
2948
+ const regex_list = TYPST_SHORTHANDS.map((s) => {
2949
+ s = s.replaceAll("|", "\\|");
2950
+ s = s.replaceAll(".", "\\.");
2951
+ s = s.replaceAll("[", "\\[");
2952
+ s = s.replaceAll("]", "\\]");
2953
+ return s;
2954
+ });
2955
+ return `(${regex_list.join("|")})`;
2781
2956
  }
2782
- function try_eat_shorthand(typst, start) {
2783
- for (const shorthand of TYPST_SHORTHANDS) {
2784
- if (typst.startsWith(shorthand, start)) {
2785
- return shorthand;
2786
- }
2787
- }
2788
- return null;
2789
- }
2790
- function tokenize_typst(typst) {
2791
- const tokens = [];
2792
- let pos = 0;
2793
- while (pos < typst.length) {
2794
- const firstChar = typst[pos];
2795
- let token;
2796
- switch (firstChar) {
2797
- case "_":
2798
- case "^":
2799
- case "&":
2800
- token = new TypstToken(6 /* CONTROL */, firstChar);
2801
- pos++;
2802
- break;
2803
- case "\n":
2804
- token = new TypstToken(7 /* NEWLINE */, firstChar);
2805
- pos++;
2806
- break;
2807
- case "\r": {
2808
- if (pos + 1 < typst.length && typst[pos + 1] === "\n") {
2809
- token = new TypstToken(7 /* NEWLINE */, "\n");
2810
- pos += 2;
2811
- } else {
2812
- token = new TypstToken(7 /* NEWLINE */, "\n");
2813
- pos++;
2814
- }
2815
- break;
2816
- }
2817
- case " ": {
2818
- let newPos = pos;
2819
- while (newPos < typst.length && typst[newPos] === " ") {
2820
- newPos++;
2821
- }
2822
- token = new TypstToken(4 /* SPACE */, typst.substring(pos, newPos));
2823
- pos = newPos;
2824
- break;
2825
- }
2826
- case "/": {
2827
- if (pos < typst.length && typst[pos + 1] === "/") {
2828
- let newPos = pos + 2;
2829
- while (newPos < typst.length && typst[newPos] !== "\n") {
2830
- newPos++;
2831
- }
2832
- token = new TypstToken(3 /* COMMENT */, typst.slice(pos + 2, newPos));
2833
- pos = newPos;
2834
- } else {
2835
- token = new TypstToken(1 /* ELEMENT */, "/");
2836
- pos++;
2837
- }
2838
- break;
2839
- }
2840
- case "\\": {
2841
- if (pos + 1 >= typst.length) {
2842
- throw new Error("Expecting a character after \\");
2843
- }
2844
- const firstTwoChars = typst.substring(pos, pos + 2);
2845
- if (["\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
2846
- token = new TypstToken(1 /* ELEMENT */, firstTwoChars);
2847
- pos += 2;
2848
- } else if (["\\\n", "\\ "].includes(firstTwoChars)) {
2849
- token = new TypstToken(6 /* CONTROL */, "\\");
2850
- pos += 1;
2851
- } else {
2852
- token = new TypstToken(6 /* CONTROL */, "");
2853
- pos++;
2854
- }
2855
- break;
2856
- }
2857
- case '"': {
2858
- let newPos = pos + 1;
2859
- while (newPos < typst.length) {
2860
- if (typst[newPos] === '"' && typst[newPos - 1] !== "\\") {
2861
- break;
2862
- }
2863
- newPos++;
2864
- }
2865
- let text = typst.substring(pos + 1, newPos);
2866
- const chars = ['"', "\\"];
2867
- for (const char of chars) {
2868
- text = text.replaceAll("\\" + char, char);
2869
- }
2870
- token = new TypstToken(2 /* TEXT */, text);
2871
- pos = newPos + 1;
2872
- break;
2873
- }
2874
- default: {
2875
- const shorthand = try_eat_shorthand(typst, pos);
2876
- if (shorthand !== null) {
2877
- token = new TypstToken(0 /* SYMBOL */, reverseShorthandMap.get(shorthand));
2878
- pos += shorthand.length;
2879
- break;
2880
- }
2881
- if (isdigit(firstChar)) {
2882
- let newPos = pos;
2883
- while (newPos < typst.length && isdigit(typst[newPos])) {
2884
- newPos += 1;
2885
- }
2886
- if (newPos < typst.length && typst[newPos] === ".") {
2887
- newPos += 1;
2888
- while (newPos < typst.length && isdigit(typst[newPos])) {
2889
- newPos += 1;
2890
- }
2891
- }
2892
- token = new TypstToken(1 /* ELEMENT */, typst.slice(pos, newPos));
2893
- } else if ("+-*/='<>!.,;?()[]|".includes(firstChar)) {
2894
- token = new TypstToken(1 /* ELEMENT */, firstChar);
2895
- } else if (isalpha(firstChar)) {
2896
- const identifier = eat_identifier_name(typst, pos);
2897
- const _type = identifier.length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */;
2898
- token = new TypstToken(_type, identifier);
2899
- } else {
2900
- token = new TypstToken(1 /* ELEMENT */, firstChar);
2901
- }
2902
- pos += token.value.length;
2903
- }
2904
- }
2905
- tokens.push(token);
2906
- }
2907
- return tokens;
2957
+ var REGEX_SHORTHANDS = generate_regex_for_shorthands();
2958
+ var rules_map2 = /* @__PURE__ */ new Map([
2959
+ [String.raw`//[^\n]*`, (s) => new TypstToken(3 /* COMMENT */, s.text().substring(2))],
2960
+ [String.raw`/`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2961
+ [String.raw`[_^&]`, (s) => new TypstToken(6 /* CONTROL */, s.text())],
2962
+ [String.raw`\r?\n`, (_s) => new TypstToken(7 /* NEWLINE */, "\n")],
2963
+ [String.raw`\s+`, (s) => new TypstToken(4 /* SPACE */, s.text())],
2964
+ [String.raw`\\[$&#_]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2965
+ [String.raw`\\\n`, (s) => {
2966
+ return [
2967
+ new TypstToken(6 /* CONTROL */, "\\"),
2968
+ new TypstToken(7 /* NEWLINE */, "\n")
2969
+ ];
2970
+ }],
2971
+ [String.raw`\\\s`, (s) => {
2972
+ return [
2973
+ new TypstToken(6 /* CONTROL */, "\\"),
2974
+ new TypstToken(4 /* SPACE */, " ")
2975
+ ];
2976
+ }],
2977
+ // this backslash is dummy and will be ignored in later stages
2978
+ [String.raw`\\\S`, (_s) => new TypstToken(6 /* CONTROL */, "")],
2979
+ [
2980
+ String.raw`"([^"]|(\\"))*"`,
2981
+ (s) => {
2982
+ const text = s.text().substring(1, s.text().length - 1);
2983
+ text.replaceAll('\\"', '"');
2984
+ return new TypstToken(2 /* TEXT */, text);
2985
+ }
2986
+ ],
2987
+ [
2988
+ REGEX_SHORTHANDS,
2989
+ (s) => {
2990
+ const shorthand = s.text();
2991
+ const symbol = reverseShorthandMap.get(shorthand);
2992
+ return new TypstToken(0 /* SYMBOL */, symbol);
2993
+ }
2994
+ ],
2995
+ [String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2996
+ [String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
2997
+ [String.raw`[a-zA-Z\.]+`, (s) => {
2998
+ return new TypstToken(s.text().length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */, s.text());
2999
+ }],
3000
+ [String.raw`.`, (s) => new TypstToken(1 /* ELEMENT */, s.text())]
3001
+ ]);
3002
+ var spec2 = {
3003
+ "start": rules_map2
3004
+ };
3005
+ function tokenize_typst(input) {
3006
+ const lexer = new JSLex(spec2);
3007
+ return lexer.collect(input);
2908
3008
  }
2909
3009
  function find_closing_match2(tokens, start) {
2910
3010
  assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET2]));