tex2typst 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +373 -273
- package/dist/jslex.d.ts +105 -0
- package/dist/tex-parser.d.ts +1 -1
- package/dist/tex2typst.min.js +13 -20
- package/dist/typst-parser.d.ts +1 -1
- package/docs/api-reference.md +1 -1
- package/package.json +1 -1
- package/src/convert.ts +32 -8
- package/src/jslex.ts +304 -0
- package/src/map.ts +4 -0
- package/src/tex-parser.ts +48 -139
- package/src/typst-parser.ts +66 -140
package/dist/index.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
// src/map.ts
|
|
2
2
|
var symbolMap = /* @__PURE__ */ new Map([
|
|
3
|
+
[",", "thin"],
|
|
4
|
+
[":", "med"],
|
|
5
|
+
[";", "thick"],
|
|
3
6
|
["cos", "cos"],
|
|
4
7
|
["sin", "sin"],
|
|
5
8
|
["tan", "tan"],
|
|
@@ -1382,15 +1385,248 @@ var TypstNode = class {
|
|
|
1382
1385
|
function isalpha(char) {
|
|
1383
1386
|
return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".includes(char);
|
|
1384
1387
|
}
|
|
1385
|
-
function isdigit(char) {
|
|
1386
|
-
return "0123456789".includes(char);
|
|
1387
|
-
}
|
|
1388
1388
|
function assert(condition, message = "") {
|
|
1389
1389
|
if (!condition) {
|
|
1390
1390
|
throw new Error(message);
|
|
1391
1391
|
}
|
|
1392
1392
|
}
|
|
1393
1393
|
|
|
1394
|
+
// src/jslex.ts
|
|
1395
|
+
var EOF = {};
|
|
1396
|
+
function matchcompare(m1, m2) {
|
|
1397
|
+
if (m2.len !== m1.len) {
|
|
1398
|
+
return m2.len - m1.len;
|
|
1399
|
+
} else {
|
|
1400
|
+
return m1.index - m2.index;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
var Scanner = class {
|
|
1404
|
+
constructor(input, lexer) {
|
|
1405
|
+
// position within input stream
|
|
1406
|
+
this._pos = 0;
|
|
1407
|
+
// current line number
|
|
1408
|
+
this._line = 0;
|
|
1409
|
+
// current column number
|
|
1410
|
+
this._col = 0;
|
|
1411
|
+
this._offset = 0;
|
|
1412
|
+
this._less = null;
|
|
1413
|
+
this._go = false;
|
|
1414
|
+
this._newstate = null;
|
|
1415
|
+
this._text = null;
|
|
1416
|
+
this._leng = null;
|
|
1417
|
+
this._input = input;
|
|
1418
|
+
this._lexer = lexer;
|
|
1419
|
+
this._state = lexer.states[0];
|
|
1420
|
+
}
|
|
1421
|
+
/**
|
|
1422
|
+
* Analogous to yytext and yyleng in lex - will be set during scan.
|
|
1423
|
+
*/
|
|
1424
|
+
text() {
|
|
1425
|
+
return this._text;
|
|
1426
|
+
}
|
|
1427
|
+
leng() {
|
|
1428
|
+
return this._leng;
|
|
1429
|
+
}
|
|
1430
|
+
/**
|
|
1431
|
+
* Position of in stream, line number and column number of match.
|
|
1432
|
+
*/
|
|
1433
|
+
pos() {
|
|
1434
|
+
return this._pos;
|
|
1435
|
+
}
|
|
1436
|
+
line() {
|
|
1437
|
+
return this._line;
|
|
1438
|
+
}
|
|
1439
|
+
column() {
|
|
1440
|
+
return this._col;
|
|
1441
|
+
}
|
|
1442
|
+
/**
|
|
1443
|
+
* Analogous to input() in lex.
|
|
1444
|
+
* @return {string} The next character in the stream.
|
|
1445
|
+
*/
|
|
1446
|
+
input() {
|
|
1447
|
+
return this._input.charAt(this._pos + this._leng + this._offset++);
|
|
1448
|
+
}
|
|
1449
|
+
/**
|
|
1450
|
+
* Similar to unput() in lex, but does not allow modifying the stream.
|
|
1451
|
+
* @return {int} The offset position after the operation.
|
|
1452
|
+
*/
|
|
1453
|
+
unput() {
|
|
1454
|
+
return this._offset = this._offset > 0 ? this._offset-- : 0;
|
|
1455
|
+
}
|
|
1456
|
+
/**
|
|
1457
|
+
* Analogous to yyless(n) in lex - retains the first n characters from this pattern, and returns
|
|
1458
|
+
* the rest to the input stream, such that they will be used in the next pattern-matching operation.
|
|
1459
|
+
* @param {int} n Number of characters to retain.
|
|
1460
|
+
* @return {int} Length of the stream after the operation has completed.
|
|
1461
|
+
*/
|
|
1462
|
+
less(n) {
|
|
1463
|
+
this._less = n;
|
|
1464
|
+
this._offset = 0;
|
|
1465
|
+
this._text = this._text.substring(0, n);
|
|
1466
|
+
return this._leng = this._text.length;
|
|
1467
|
+
}
|
|
1468
|
+
/**
|
|
1469
|
+
* Like less(), but instead of retaining the first n characters, it chops off the last n.
|
|
1470
|
+
* @param {int} n Number of characters to chop.
|
|
1471
|
+
* @return {int} Length of the stream after the operation has completed.
|
|
1472
|
+
*/
|
|
1473
|
+
pushback(n) {
|
|
1474
|
+
return this.less(this._leng - n);
|
|
1475
|
+
}
|
|
1476
|
+
/**
|
|
1477
|
+
* Similar to REJECT in lex, except it doesn't break the current execution context.
|
|
1478
|
+
* TIP: reject() should be the last instruction in a spec callback.
|
|
1479
|
+
*/
|
|
1480
|
+
reject() {
|
|
1481
|
+
this._go = true;
|
|
1482
|
+
}
|
|
1483
|
+
/**
|
|
1484
|
+
* Analogous to BEGIN in lex - sets the named state (start condition).
|
|
1485
|
+
* @param {string|int} state Name of state to switch to, or ordinal number (0 is first, etc).
|
|
1486
|
+
* @return {string} The new state on successful switch, throws exception on failure.
|
|
1487
|
+
*/
|
|
1488
|
+
begin(state) {
|
|
1489
|
+
if (this._lexer.specification[state]) {
|
|
1490
|
+
return this._newstate = state;
|
|
1491
|
+
}
|
|
1492
|
+
const s = this._lexer.states[parseInt(state)];
|
|
1493
|
+
if (s) {
|
|
1494
|
+
return this._newstate = s;
|
|
1495
|
+
}
|
|
1496
|
+
throw "Unknown state '" + state + "' requested";
|
|
1497
|
+
}
|
|
1498
|
+
/**
|
|
1499
|
+
* Simple accessor for reading in the current state.
|
|
1500
|
+
* @return {string} The current state.
|
|
1501
|
+
*/
|
|
1502
|
+
state() {
|
|
1503
|
+
return this._state;
|
|
1504
|
+
}
|
|
1505
|
+
/**
|
|
1506
|
+
* Scan method to be returned to caller - grabs the next token and fires appropriate calback.
|
|
1507
|
+
* @return {T} The next token extracted from the stream.
|
|
1508
|
+
*/
|
|
1509
|
+
scan() {
|
|
1510
|
+
if (this._pos >= this._input.length) {
|
|
1511
|
+
return EOF;
|
|
1512
|
+
}
|
|
1513
|
+
const str = this._input.substring(this._pos);
|
|
1514
|
+
const rules = this._lexer.specification[this._state];
|
|
1515
|
+
const matches = [];
|
|
1516
|
+
for (let i = 0; i < rules.length; i++) {
|
|
1517
|
+
const rule = rules[i];
|
|
1518
|
+
const mt = str.match(rule.re);
|
|
1519
|
+
if (mt !== null && mt[0].length > 0) {
|
|
1520
|
+
matches.push({
|
|
1521
|
+
index: i,
|
|
1522
|
+
text: mt[0],
|
|
1523
|
+
len: mt[0].length,
|
|
1524
|
+
rule
|
|
1525
|
+
});
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
1528
|
+
if (matches.length === 0) {
|
|
1529
|
+
throw new Error("No match found for input '" + str + "'");
|
|
1530
|
+
}
|
|
1531
|
+
matches.sort(matchcompare);
|
|
1532
|
+
this._go = true;
|
|
1533
|
+
let result;
|
|
1534
|
+
let m;
|
|
1535
|
+
for (let j = 0, n = matches.length; j < n && this._go; j++) {
|
|
1536
|
+
this._offset = 0;
|
|
1537
|
+
this._less = null;
|
|
1538
|
+
this._go = false;
|
|
1539
|
+
this._newstate = null;
|
|
1540
|
+
m = matches[j];
|
|
1541
|
+
this._text = m.text;
|
|
1542
|
+
this._leng = m.len;
|
|
1543
|
+
result = m.rule.action(this);
|
|
1544
|
+
if (this._newstate && this._newstate != this._state) {
|
|
1545
|
+
this._state = this._newstate;
|
|
1546
|
+
break;
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
const text = this._less === null ? m.text : m.text.substring(0, this._less);
|
|
1550
|
+
const len = text.length;
|
|
1551
|
+
this._pos += len + this._offset;
|
|
1552
|
+
const nlm = text.match(/\n/g);
|
|
1553
|
+
if (nlm !== null) {
|
|
1554
|
+
this._line += nlm.length;
|
|
1555
|
+
this._col = len - text.lastIndexOf("\n") - 1;
|
|
1556
|
+
} else {
|
|
1557
|
+
this._col += len;
|
|
1558
|
+
}
|
|
1559
|
+
return result;
|
|
1560
|
+
}
|
|
1561
|
+
};
|
|
1562
|
+
var JSLex = class {
|
|
1563
|
+
constructor(spec3) {
|
|
1564
|
+
this.states = Object.keys(spec3);
|
|
1565
|
+
this.specification = {};
|
|
1566
|
+
for (const s of this.states) {
|
|
1567
|
+
const rule_map = spec3[s];
|
|
1568
|
+
if (s in this.specification) {
|
|
1569
|
+
throw "Duplicate state declaration encountered for state '" + s + "'";
|
|
1570
|
+
}
|
|
1571
|
+
this.specification[s] = [];
|
|
1572
|
+
for (const [k, v] of rule_map.entries()) {
|
|
1573
|
+
let re;
|
|
1574
|
+
try {
|
|
1575
|
+
re = new RegExp("^" + k);
|
|
1576
|
+
} catch (err) {
|
|
1577
|
+
throw "Invalid regexp '" + k + "' in state '" + s + "' (" + err.message + ")";
|
|
1578
|
+
}
|
|
1579
|
+
this.specification[s].push({
|
|
1580
|
+
re,
|
|
1581
|
+
action: v
|
|
1582
|
+
});
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
/**
|
|
1587
|
+
* Scanner function - makes a new scanner object which is used to get tokens one at a time.
|
|
1588
|
+
* @param {string} input Input text to tokenize.
|
|
1589
|
+
* @return {function} Scanner function.
|
|
1590
|
+
*/
|
|
1591
|
+
scanner(input) {
|
|
1592
|
+
return new Scanner(input, this);
|
|
1593
|
+
}
|
|
1594
|
+
/**
|
|
1595
|
+
* Similar to lex's yylex() function, consumes all input, calling calback for each token.
|
|
1596
|
+
* @param {string} input Text to lex.
|
|
1597
|
+
* @param {function} callback Function to execute for each token.
|
|
1598
|
+
*/
|
|
1599
|
+
lex(input, callback) {
|
|
1600
|
+
const scanner = this.scanner(input);
|
|
1601
|
+
while (true) {
|
|
1602
|
+
const token = scanner.scan();
|
|
1603
|
+
if (token === EOF) {
|
|
1604
|
+
return;
|
|
1605
|
+
}
|
|
1606
|
+
if (token !== void 0) {
|
|
1607
|
+
callback(token);
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
/**
|
|
1612
|
+
* Consumes all input, collecting tokens along the way.
|
|
1613
|
+
* @param {string} input Text to lex.
|
|
1614
|
+
* @return {array} List of tokens, may contain an Error at the end.
|
|
1615
|
+
*/
|
|
1616
|
+
collect(input) {
|
|
1617
|
+
const tokens = [];
|
|
1618
|
+
const callback = function(item) {
|
|
1619
|
+
if (Array.isArray(item)) {
|
|
1620
|
+
tokens.push(...item);
|
|
1621
|
+
} else {
|
|
1622
|
+
tokens.push(item);
|
|
1623
|
+
}
|
|
1624
|
+
};
|
|
1625
|
+
this.lex(input, callback);
|
|
1626
|
+
return tokens;
|
|
1627
|
+
}
|
|
1628
|
+
};
|
|
1629
|
+
|
|
1394
1630
|
// src/tex-parser.ts
|
|
1395
1631
|
var UNARY_COMMANDS = [
|
|
1396
1632
|
"sqrt",
|
|
@@ -1420,7 +1656,10 @@ var UNARY_COMMANDS = [
|
|
|
1420
1656
|
"underline",
|
|
1421
1657
|
"vec",
|
|
1422
1658
|
"widehat",
|
|
1423
|
-
"widetilde"
|
|
1659
|
+
"widetilde",
|
|
1660
|
+
"overleftarrow",
|
|
1661
|
+
"overrightarrow",
|
|
1662
|
+
"hspace"
|
|
1424
1663
|
];
|
|
1425
1664
|
var BINARY_COMMANDS = [
|
|
1426
1665
|
"frac",
|
|
@@ -1469,13 +1708,6 @@ function eat_primes(tokens, start) {
|
|
|
1469
1708
|
}
|
|
1470
1709
|
return pos - start;
|
|
1471
1710
|
}
|
|
1472
|
-
function eat_command_name(latex, start) {
|
|
1473
|
-
let pos = start;
|
|
1474
|
-
while (pos < latex.length && isalpha(latex[pos])) {
|
|
1475
|
-
pos += 1;
|
|
1476
|
-
}
|
|
1477
|
-
return latex.substring(start, pos);
|
|
1478
|
-
}
|
|
1479
1711
|
function find_closing_match(tokens, start, leftToken, rightToken) {
|
|
1480
1712
|
assert(tokens[start].eq(leftToken));
|
|
1481
1713
|
let count = 1;
|
|
@@ -1503,126 +1735,46 @@ var END_COMMAND = new TexToken(1 /* COMMAND */, "\\end");
|
|
|
1503
1735
|
function find_closing_end_command(tokens, start) {
|
|
1504
1736
|
return find_closing_match(tokens, start, BEGIN_COMMAND, END_COMMAND);
|
|
1505
1737
|
}
|
|
1506
|
-
function
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
while (count > 0) {
|
|
1511
|
-
if (pos >= latex.length) {
|
|
1512
|
-
throw new LatexParserError("Unmatched curly brackets");
|
|
1513
|
-
}
|
|
1514
|
-
if (pos + 1 < latex.length && ["\\{", "\\}"].includes(latex.substring(pos, pos + 2))) {
|
|
1515
|
-
pos += 2;
|
|
1516
|
-
continue;
|
|
1517
|
-
}
|
|
1518
|
-
if (latex[pos] === "{") {
|
|
1519
|
-
count += 1;
|
|
1520
|
-
} else if (latex[pos] === "}") {
|
|
1521
|
-
count -= 1;
|
|
1522
|
-
}
|
|
1523
|
-
pos += 1;
|
|
1738
|
+
function unescape(str) {
|
|
1739
|
+
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
1740
|
+
for (const char of chars) {
|
|
1741
|
+
str = str.replaceAll("\\" + char, char);
|
|
1524
1742
|
}
|
|
1525
|
-
return
|
|
1743
|
+
return str;
|
|
1526
1744
|
}
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
}
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
token = new TexToken(5 /* NEWLINE */, "\n");
|
|
1561
|
-
pos++;
|
|
1562
|
-
}
|
|
1563
|
-
break;
|
|
1564
|
-
}
|
|
1565
|
-
case " ": {
|
|
1566
|
-
let newPos = pos;
|
|
1567
|
-
while (newPos < latex.length && latex[newPos] === " ") {
|
|
1568
|
-
newPos += 1;
|
|
1569
|
-
}
|
|
1570
|
-
token = new TexToken(4 /* SPACE */, latex.slice(pos, newPos));
|
|
1571
|
-
pos = newPos;
|
|
1572
|
-
break;
|
|
1573
|
-
}
|
|
1574
|
-
case "\\": {
|
|
1575
|
-
if (pos + 1 >= latex.length) {
|
|
1576
|
-
throw new LatexParserError("Expecting command name after \\");
|
|
1577
|
-
}
|
|
1578
|
-
const firstTwoChars = latex.slice(pos, pos + 2);
|
|
1579
|
-
if (["\\\\", "\\,"].includes(firstTwoChars)) {
|
|
1580
|
-
token = new TexToken(6 /* CONTROL */, firstTwoChars);
|
|
1581
|
-
} else if (["\\{", "\\}", "\\%", "\\$", "\\&", "\\#", "\\_", "\\|"].includes(firstTwoChars)) {
|
|
1582
|
-
token = new TexToken(0 /* ELEMENT */, firstTwoChars);
|
|
1583
|
-
} else {
|
|
1584
|
-
const command = eat_command_name(latex, pos + 1);
|
|
1585
|
-
token = new TexToken(1 /* COMMAND */, "\\" + command);
|
|
1586
|
-
}
|
|
1587
|
-
pos += token.value.length;
|
|
1588
|
-
break;
|
|
1589
|
-
}
|
|
1590
|
-
default: {
|
|
1591
|
-
if (isdigit(firstChar)) {
|
|
1592
|
-
let newPos = pos;
|
|
1593
|
-
while (newPos < latex.length && isdigit(latex[newPos])) {
|
|
1594
|
-
newPos += 1;
|
|
1595
|
-
}
|
|
1596
|
-
token = new TexToken(0 /* ELEMENT */, latex.slice(pos, newPos));
|
|
1597
|
-
} else if (isalpha(firstChar)) {
|
|
1598
|
-
token = new TexToken(0 /* ELEMENT */, firstChar);
|
|
1599
|
-
} else if ("+-*/='<>!.,;:?()[]|".includes(firstChar)) {
|
|
1600
|
-
token = new TexToken(0 /* ELEMENT */, firstChar);
|
|
1601
|
-
} else {
|
|
1602
|
-
token = new TexToken(7 /* UNKNOWN */, firstChar);
|
|
1603
|
-
}
|
|
1604
|
-
pos += token.value.length;
|
|
1605
|
-
}
|
|
1606
|
-
}
|
|
1607
|
-
tokens.push(token);
|
|
1608
|
-
if (token.type === 1 /* COMMAND */ && ["\\text", "\\operatorname", "\\begin", "\\end"].includes(token.value)) {
|
|
1609
|
-
if (pos >= latex.length || latex[pos] !== "{") {
|
|
1610
|
-
throw new LatexParserError(`No content for ${token.value} command`);
|
|
1611
|
-
}
|
|
1612
|
-
tokens.push(new TexToken(6 /* CONTROL */, "{"));
|
|
1613
|
-
const posClosingBracket = find_closing_curly_bracket_char(latex, pos);
|
|
1614
|
-
pos++;
|
|
1615
|
-
let textInside = latex.slice(pos, posClosingBracket);
|
|
1616
|
-
const chars = ["{", "}", "\\", "$", "&", "#", "_", "%"];
|
|
1617
|
-
for (const char of chars) {
|
|
1618
|
-
textInside = textInside.replaceAll("\\" + char, char);
|
|
1619
|
-
}
|
|
1620
|
-
tokens.push(new TexToken(2 /* TEXT */, textInside));
|
|
1621
|
-
tokens.push(new TexToken(6 /* CONTROL */, "}"));
|
|
1622
|
-
pos = posClosingBracket + 1;
|
|
1623
|
-
}
|
|
1624
|
-
}
|
|
1625
|
-
return tokens;
|
|
1745
|
+
var rules_map = /* @__PURE__ */ new Map([
|
|
1746
|
+
[
|
|
1747
|
+
String.raw`\\(text|operatorname|begin|end|hspace){.+?}`,
|
|
1748
|
+
(s) => {
|
|
1749
|
+
const text = s.text();
|
|
1750
|
+
const command = text.substring(0, text.indexOf("{"));
|
|
1751
|
+
const text_inside = text.substring(text.indexOf("{") + 1, text.lastIndexOf("}"));
|
|
1752
|
+
return [
|
|
1753
|
+
new TexToken(1 /* COMMAND */, command),
|
|
1754
|
+
new TexToken(6 /* CONTROL */, "{"),
|
|
1755
|
+
new TexToken(2 /* TEXT */, unescape(text_inside)),
|
|
1756
|
+
new TexToken(6 /* CONTROL */, "}")
|
|
1757
|
+
];
|
|
1758
|
+
}
|
|
1759
|
+
],
|
|
1760
|
+
[String.raw`%[^\n]*`, (s) => new TexToken(3 /* COMMENT */, s.text().substring(1))],
|
|
1761
|
+
[String.raw`[{}_^&]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
|
|
1762
|
+
[String.raw`\r?\n`, (_s) => new TexToken(5 /* NEWLINE */, "\n")],
|
|
1763
|
+
[String.raw`\s+`, (s) => new TexToken(4 /* SPACE */, s.text())],
|
|
1764
|
+
[String.raw`\\[\\,:;]`, (s) => new TexToken(6 /* CONTROL */, s.text())],
|
|
1765
|
+
[String.raw`\\[{}%$&#_|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
|
|
1766
|
+
[String.raw`\\[a-zA-Z]+`, (s) => new TexToken(1 /* COMMAND */, s.text())],
|
|
1767
|
+
[String.raw`[0-9]+`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
|
|
1768
|
+
[String.raw`[a-zA-Z]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
|
|
1769
|
+
[String.raw`[+\-*/='<>!.,;:?()\[\]|]`, (s) => new TexToken(0 /* ELEMENT */, s.text())],
|
|
1770
|
+
[String.raw`.`, (s) => new TexToken(7 /* UNKNOWN */, s.text())]
|
|
1771
|
+
]);
|
|
1772
|
+
var spec = {
|
|
1773
|
+
"start": rules_map
|
|
1774
|
+
};
|
|
1775
|
+
function tokenize_tex(input) {
|
|
1776
|
+
const lexer = new JSLex(spec);
|
|
1777
|
+
return lexer.collect(input);
|
|
1626
1778
|
}
|
|
1627
1779
|
var LatexParserError = class extends Error {
|
|
1628
1780
|
constructor(message) {
|
|
@@ -1755,9 +1907,10 @@ var LatexParser = class {
|
|
|
1755
1907
|
case "}":
|
|
1756
1908
|
throw new LatexParserError("Unmatched '}'");
|
|
1757
1909
|
case "\\\\":
|
|
1758
|
-
return [new TexNode("control", "\\\\"), start + 1];
|
|
1759
1910
|
case "\\,":
|
|
1760
|
-
|
|
1911
|
+
case "\\:":
|
|
1912
|
+
case "\\;":
|
|
1913
|
+
return [new TexNode("control", controlChar), start + 1];
|
|
1761
1914
|
case "_":
|
|
1762
1915
|
case "^":
|
|
1763
1916
|
return [EMPTY_NODE, start];
|
|
@@ -1938,7 +2091,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
|
|
|
1938
2091
|
let out_tokens = [];
|
|
1939
2092
|
for (const token of tokens) {
|
|
1940
2093
|
if (token.type === 1 /* COMMAND */ && customTexMacros[token.value]) {
|
|
1941
|
-
const expanded_tokens =
|
|
2094
|
+
const expanded_tokens = tokenize_tex(customTexMacros[token.value]);
|
|
1942
2095
|
out_tokens = out_tokens.concat(expanded_tokens);
|
|
1943
2096
|
} else {
|
|
1944
2097
|
out_tokens.push(token);
|
|
@@ -1948,7 +2101,7 @@ function passExpandCustomTexMacros(tokens, customTexMacros) {
|
|
|
1948
2101
|
}
|
|
1949
2102
|
function parseTex(tex, customTexMacros) {
|
|
1950
2103
|
const parser = new LatexParser();
|
|
1951
|
-
let tokens =
|
|
2104
|
+
let tokens = tokenize_tex(tex);
|
|
1952
2105
|
tokens = passIgnoreWhitespaceBeforeScriptMark(tokens);
|
|
1953
2106
|
tokens = passExpandCustomTexMacros(tokens, customTexMacros);
|
|
1954
2107
|
return parser.parse(tokens);
|
|
@@ -2486,22 +2639,40 @@ function convert_tex_node_to_typst(node, options = {}) {
|
|
|
2486
2639
|
if (node.content === "\\mathbb" && arg0.type === "atom" && /^[A-Z]$/.test(arg0.content)) {
|
|
2487
2640
|
return new TypstNode("symbol", arg0.content + arg0.content);
|
|
2488
2641
|
}
|
|
2642
|
+
if (node.content === "\\overrightarrow") {
|
|
2643
|
+
return new TypstNode(
|
|
2644
|
+
"funcCall",
|
|
2645
|
+
"arrow",
|
|
2646
|
+
[arg0]
|
|
2647
|
+
);
|
|
2648
|
+
}
|
|
2649
|
+
if (node.content === "\\overleftarrow") {
|
|
2650
|
+
return new TypstNode(
|
|
2651
|
+
"funcCall",
|
|
2652
|
+
"accent",
|
|
2653
|
+
[arg0, new TypstNode("symbol", "arrow.l")]
|
|
2654
|
+
);
|
|
2655
|
+
}
|
|
2489
2656
|
if (node.content === "\\operatorname") {
|
|
2490
|
-
const
|
|
2491
|
-
if (body.length !== 1 || body[0].type !== "text") {
|
|
2492
|
-
throw new TypstWriterError(`Expecting body of \\operatorname to be text but got`, node);
|
|
2493
|
-
}
|
|
2494
|
-
const text = body[0].content;
|
|
2657
|
+
const text = arg0.content;
|
|
2495
2658
|
if (TYPST_INTRINSIC_SYMBOLS.includes(text)) {
|
|
2496
2659
|
return new TypstNode("symbol", text);
|
|
2497
2660
|
} else {
|
|
2498
2661
|
return new TypstNode(
|
|
2499
2662
|
"funcCall",
|
|
2500
2663
|
"op",
|
|
2501
|
-
[
|
|
2664
|
+
[arg0]
|
|
2502
2665
|
);
|
|
2503
2666
|
}
|
|
2504
2667
|
}
|
|
2668
|
+
if (node.content === "\\hspace") {
|
|
2669
|
+
const text = arg0.content;
|
|
2670
|
+
return new TypstNode(
|
|
2671
|
+
"funcCall",
|
|
2672
|
+
"#h",
|
|
2673
|
+
[new TypstNode("symbol", text)]
|
|
2674
|
+
);
|
|
2675
|
+
}
|
|
2505
2676
|
return new TypstNode(
|
|
2506
2677
|
"funcCall",
|
|
2507
2678
|
tex_token_to_typst(node.content),
|
|
@@ -2550,8 +2721,9 @@ function convert_tex_node_to_typst(node, options = {}) {
|
|
|
2550
2721
|
case "control":
|
|
2551
2722
|
if (node.content === "\\\\") {
|
|
2552
2723
|
return new TypstNode("symbol", "\\");
|
|
2553
|
-
} else if (node.content
|
|
2554
|
-
|
|
2724
|
+
} else if (symbolMap.has(node.content.substring(1))) {
|
|
2725
|
+
const typst_symbol = symbolMap.get(node.content.substring(1));
|
|
2726
|
+
return new TypstNode("symbol", typst_symbol);
|
|
2555
2727
|
} else {
|
|
2556
2728
|
throw new TypstWriterError(`Unknown control sequence: ${node.content}`, node);
|
|
2557
2729
|
}
|
|
@@ -2772,139 +2944,67 @@ function eat_primes2(tokens, start) {
|
|
|
2772
2944
|
}
|
|
2773
2945
|
return pos - start;
|
|
2774
2946
|
}
|
|
2775
|
-
function
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2947
|
+
function generate_regex_for_shorthands() {
|
|
2948
|
+
const regex_list = TYPST_SHORTHANDS.map((s) => {
|
|
2949
|
+
s = s.replaceAll("|", "\\|");
|
|
2950
|
+
s = s.replaceAll(".", "\\.");
|
|
2951
|
+
s = s.replaceAll("[", "\\[");
|
|
2952
|
+
s = s.replaceAll("]", "\\]");
|
|
2953
|
+
return s;
|
|
2954
|
+
});
|
|
2955
|
+
return `(${regex_list.join("|")})`;
|
|
2781
2956
|
}
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
pos = newPos;
|
|
2834
|
-
} else {
|
|
2835
|
-
token = new TypstToken(1 /* ELEMENT */, "/");
|
|
2836
|
-
pos++;
|
|
2837
|
-
}
|
|
2838
|
-
break;
|
|
2839
|
-
}
|
|
2840
|
-
case "\\": {
|
|
2841
|
-
if (pos + 1 >= typst.length) {
|
|
2842
|
-
throw new Error("Expecting a character after \\");
|
|
2843
|
-
}
|
|
2844
|
-
const firstTwoChars = typst.substring(pos, pos + 2);
|
|
2845
|
-
if (["\\$", "\\&", "\\#", "\\_"].includes(firstTwoChars)) {
|
|
2846
|
-
token = new TypstToken(1 /* ELEMENT */, firstTwoChars);
|
|
2847
|
-
pos += 2;
|
|
2848
|
-
} else if (["\\\n", "\\ "].includes(firstTwoChars)) {
|
|
2849
|
-
token = new TypstToken(6 /* CONTROL */, "\\");
|
|
2850
|
-
pos += 1;
|
|
2851
|
-
} else {
|
|
2852
|
-
token = new TypstToken(6 /* CONTROL */, "");
|
|
2853
|
-
pos++;
|
|
2854
|
-
}
|
|
2855
|
-
break;
|
|
2856
|
-
}
|
|
2857
|
-
case '"': {
|
|
2858
|
-
let newPos = pos + 1;
|
|
2859
|
-
while (newPos < typst.length) {
|
|
2860
|
-
if (typst[newPos] === '"' && typst[newPos - 1] !== "\\") {
|
|
2861
|
-
break;
|
|
2862
|
-
}
|
|
2863
|
-
newPos++;
|
|
2864
|
-
}
|
|
2865
|
-
let text = typst.substring(pos + 1, newPos);
|
|
2866
|
-
const chars = ['"', "\\"];
|
|
2867
|
-
for (const char of chars) {
|
|
2868
|
-
text = text.replaceAll("\\" + char, char);
|
|
2869
|
-
}
|
|
2870
|
-
token = new TypstToken(2 /* TEXT */, text);
|
|
2871
|
-
pos = newPos + 1;
|
|
2872
|
-
break;
|
|
2873
|
-
}
|
|
2874
|
-
default: {
|
|
2875
|
-
const shorthand = try_eat_shorthand(typst, pos);
|
|
2876
|
-
if (shorthand !== null) {
|
|
2877
|
-
token = new TypstToken(0 /* SYMBOL */, reverseShorthandMap.get(shorthand));
|
|
2878
|
-
pos += shorthand.length;
|
|
2879
|
-
break;
|
|
2880
|
-
}
|
|
2881
|
-
if (isdigit(firstChar)) {
|
|
2882
|
-
let newPos = pos;
|
|
2883
|
-
while (newPos < typst.length && isdigit(typst[newPos])) {
|
|
2884
|
-
newPos += 1;
|
|
2885
|
-
}
|
|
2886
|
-
if (newPos < typst.length && typst[newPos] === ".") {
|
|
2887
|
-
newPos += 1;
|
|
2888
|
-
while (newPos < typst.length && isdigit(typst[newPos])) {
|
|
2889
|
-
newPos += 1;
|
|
2890
|
-
}
|
|
2891
|
-
}
|
|
2892
|
-
token = new TypstToken(1 /* ELEMENT */, typst.slice(pos, newPos));
|
|
2893
|
-
} else if ("+-*/='<>!.,;?()[]|".includes(firstChar)) {
|
|
2894
|
-
token = new TypstToken(1 /* ELEMENT */, firstChar);
|
|
2895
|
-
} else if (isalpha(firstChar)) {
|
|
2896
|
-
const identifier = eat_identifier_name(typst, pos);
|
|
2897
|
-
const _type = identifier.length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */;
|
|
2898
|
-
token = new TypstToken(_type, identifier);
|
|
2899
|
-
} else {
|
|
2900
|
-
token = new TypstToken(1 /* ELEMENT */, firstChar);
|
|
2901
|
-
}
|
|
2902
|
-
pos += token.value.length;
|
|
2903
|
-
}
|
|
2904
|
-
}
|
|
2905
|
-
tokens.push(token);
|
|
2906
|
-
}
|
|
2907
|
-
return tokens;
|
|
2957
|
+
var REGEX_SHORTHANDS = generate_regex_for_shorthands();
|
|
2958
|
+
var rules_map2 = /* @__PURE__ */ new Map([
|
|
2959
|
+
[String.raw`//[^\n]*`, (s) => new TypstToken(3 /* COMMENT */, s.text().substring(2))],
|
|
2960
|
+
[String.raw`/`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
|
|
2961
|
+
[String.raw`[_^&]`, (s) => new TypstToken(6 /* CONTROL */, s.text())],
|
|
2962
|
+
[String.raw`\r?\n`, (_s) => new TypstToken(7 /* NEWLINE */, "\n")],
|
|
2963
|
+
[String.raw`\s+`, (s) => new TypstToken(4 /* SPACE */, s.text())],
|
|
2964
|
+
[String.raw`\\[$&#_]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
|
|
2965
|
+
[String.raw`\\\n`, (s) => {
|
|
2966
|
+
return [
|
|
2967
|
+
new TypstToken(6 /* CONTROL */, "\\"),
|
|
2968
|
+
new TypstToken(7 /* NEWLINE */, "\n")
|
|
2969
|
+
];
|
|
2970
|
+
}],
|
|
2971
|
+
[String.raw`\\\s`, (s) => {
|
|
2972
|
+
return [
|
|
2973
|
+
new TypstToken(6 /* CONTROL */, "\\"),
|
|
2974
|
+
new TypstToken(4 /* SPACE */, " ")
|
|
2975
|
+
];
|
|
2976
|
+
}],
|
|
2977
|
+
// this backslash is dummy and will be ignored in later stages
|
|
2978
|
+
[String.raw`\\\S`, (_s) => new TypstToken(6 /* CONTROL */, "")],
|
|
2979
|
+
[
|
|
2980
|
+
String.raw`"([^"]|(\\"))*"`,
|
|
2981
|
+
(s) => {
|
|
2982
|
+
const text = s.text().substring(1, s.text().length - 1);
|
|
2983
|
+
text.replaceAll('\\"', '"');
|
|
2984
|
+
return new TypstToken(2 /* TEXT */, text);
|
|
2985
|
+
}
|
|
2986
|
+
],
|
|
2987
|
+
[
|
|
2988
|
+
REGEX_SHORTHANDS,
|
|
2989
|
+
(s) => {
|
|
2990
|
+
const shorthand = s.text();
|
|
2991
|
+
const symbol = reverseShorthandMap.get(shorthand);
|
|
2992
|
+
return new TypstToken(0 /* SYMBOL */, symbol);
|
|
2993
|
+
}
|
|
2994
|
+
],
|
|
2995
|
+
[String.raw`[0-9]+(\.[0-9]+)?`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
|
|
2996
|
+
[String.raw`[+\-*/=\'<>!.,;?()\[\]|]`, (s) => new TypstToken(1 /* ELEMENT */, s.text())],
|
|
2997
|
+
[String.raw`[a-zA-Z\.]+`, (s) => {
|
|
2998
|
+
return new TypstToken(s.text().length === 1 ? 1 /* ELEMENT */ : 0 /* SYMBOL */, s.text());
|
|
2999
|
+
}],
|
|
3000
|
+
[String.raw`.`, (s) => new TypstToken(1 /* ELEMENT */, s.text())]
|
|
3001
|
+
]);
|
|
3002
|
+
var spec2 = {
|
|
3003
|
+
"start": rules_map2
|
|
3004
|
+
};
|
|
3005
|
+
function tokenize_typst(input) {
|
|
3006
|
+
const lexer = new JSLex(spec2);
|
|
3007
|
+
return lexer.collect(input);
|
|
2908
3008
|
}
|
|
2909
3009
|
function find_closing_match2(tokens, start) {
|
|
2910
3010
|
assert(tokens[start].isOneOf([LEFT_PARENTHESES, LEFT_BRACKET, LEFT_CURLY_BRACKET2]));
|