temml 0.10.3 → 0.10.4
Sign up to get free protection for your applications and to get access to all the features.
- package/dist/temml.cjs +49 -22
- package/dist/temml.js +49 -22
- package/dist/temml.min.js +1 -1
- package/dist/temml.mjs +49 -22
- package/dist/temmlPostProcess.js +1 -1
- package/package.json +8 -2
- package/src/Lexer.js +2 -8
- package/src/Parser.js +0 -11
- package/src/buildMathML.js +44 -0
- package/src/functions/symbolsOrd.js +1 -1
- package/src/postProcess.js +1 -1
package/dist/temml.mjs
CHANGED
@@ -1964,6 +1964,48 @@ const consolidateText = mrow => {
|
|
1964
1964
|
return mtext
|
1965
1965
|
};
|
1966
1966
|
|
1967
|
+
const numberRegEx$1 = /^[0-9]$/;
|
1968
|
+
const isCommaOrDot = node => {
|
1969
|
+
return (node.type === "atom" && node.text === ",") ||
|
1970
|
+
(node.type === "textord" && node.text === ".")
|
1971
|
+
};
|
1972
|
+
const consolidateNumbers = expression => {
|
1973
|
+
// Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
|
1974
|
+
// not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
|
1975
|
+
if (expression.length < 2) { return }
|
1976
|
+
const nums = [];
|
1977
|
+
let inNum = false;
|
1978
|
+
// Find adjacent numerals
|
1979
|
+
for (let i = 0; i < expression.length; i++) {
|
1980
|
+
const node = expression[i];
|
1981
|
+
if (node.type === "textord" && numberRegEx$1.test(node.text)) {
|
1982
|
+
if (!inNum) { nums.push({ start: i }); }
|
1983
|
+
inNum = true;
|
1984
|
+
} else {
|
1985
|
+
if (inNum) { nums[nums.length - 1].end = i - 1; }
|
1986
|
+
inNum = false;
|
1987
|
+
}
|
1988
|
+
}
|
1989
|
+
if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
|
1990
|
+
|
1991
|
+
// Determine if numeral groups are separated by a comma or dot.
|
1992
|
+
for (let i = nums.length - 1; i > 0; i--) {
|
1993
|
+
if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
|
1994
|
+
// Merge the two groups.
|
1995
|
+
nums[i - 1].end = nums[i].end;
|
1996
|
+
nums.splice(i, 1);
|
1997
|
+
}
|
1998
|
+
}
|
1999
|
+
|
2000
|
+
// Consolidate the number nodes
|
2001
|
+
for (let i = nums.length - 1; i >= 0; i--) {
|
2002
|
+
for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
|
2003
|
+
expression[nums[i].start].text += expression[j].text;
|
2004
|
+
}
|
2005
|
+
expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
|
2006
|
+
}
|
2007
|
+
};
|
2008
|
+
|
1967
2009
|
/**
|
1968
2010
|
* Wrap the given array of nodes in an <mrow> node if needed, i.e.,
|
1969
2011
|
* unless the array has length 1. Always returns a single node.
|
@@ -1999,6 +2041,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
|
|
1999
2041
|
return [group];
|
2000
2042
|
}
|
2001
2043
|
|
2044
|
+
consolidateNumbers(expression);
|
2045
|
+
|
2002
2046
|
const groups = [];
|
2003
2047
|
for (let i = 0; i < expression.length; i++) {
|
2004
2048
|
const group = buildGroup$1(expression[i], style);
|
@@ -7641,7 +7685,7 @@ const smallCaps = Object.freeze({
|
|
7641
7685
|
// "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
|
7642
7686
|
// src/symbols.js.
|
7643
7687
|
|
7644
|
-
const numberRegEx
|
7688
|
+
const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
|
7645
7689
|
const latinRegEx = /[A-Ba-z]/;
|
7646
7690
|
|
7647
7691
|
const italicNumber = (text, variant, tag) => {
|
@@ -7695,7 +7739,7 @@ defineFunctionBuilders({
|
|
7695
7739
|
const variant = getVariant(group, style) || "normal";
|
7696
7740
|
|
7697
7741
|
let node;
|
7698
|
-
if (numberRegEx
|
7742
|
+
if (numberRegEx.test(group.text)) {
|
7699
7743
|
const tag = group.mode === "text" ? "mtext" : "mn";
|
7700
7744
|
if (variant === "italic" || variant === "bold-italic") {
|
7701
7745
|
return italicNumber(text, variant, tag)
|
@@ -8008,8 +8052,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
|
|
8008
8052
|
const tokenRegexString =
|
8009
8053
|
`(${spaceRegexString}+)|` + // whitespace
|
8010
8054
|
`${controlSpaceRegexString}|` + // whitespace
|
8011
|
-
"(
|
8012
|
-
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
8055
|
+
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
8013
8056
|
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
8014
8057
|
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
|
8015
8058
|
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
@@ -8024,12 +8067,7 @@ class Lexer {
|
|
8024
8067
|
// Separate accents from characters
|
8025
8068
|
this.input = input;
|
8026
8069
|
this.settings = settings;
|
8027
|
-
this.tokenRegex = new RegExp(
|
8028
|
-
// Strict Temml, like TeX, lexes one numeral at a time.
|
8029
|
-
// Default Temml lexes contiguous numerals into a single <mn> element.
|
8030
|
-
tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
|
8031
|
-
"g"
|
8032
|
-
);
|
8070
|
+
this.tokenRegex = new RegExp(tokenRegexString, 'g');
|
8033
8071
|
// Category codes. The lexer only supports comment characters (14) for now.
|
8034
8072
|
// MacroExpander additionally distinguishes active (13).
|
8035
8073
|
this.catcodes = {
|
@@ -11797,8 +11835,6 @@ var unicodeSymbols = {
|
|
11797
11835
|
|
11798
11836
|
/* eslint no-constant-condition:0 */
|
11799
11837
|
|
11800
|
-
const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
|
11801
|
-
|
11802
11838
|
/**
|
11803
11839
|
* This file contains the parser used to parse out a TeX expression from the
|
11804
11840
|
* input. Since TeX isn't context-free, standard parsers don't work particularly
|
@@ -12720,15 +12756,6 @@ class Parser {
|
|
12720
12756
|
};
|
12721
12757
|
}
|
12722
12758
|
symbol = s;
|
12723
|
-
} else if (!this.strict && numberRegEx.test(text)) {
|
12724
|
-
// A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
|
12725
|
-
this.consume();
|
12726
|
-
return {
|
12727
|
-
type: "textord",
|
12728
|
-
mode: this.mode,
|
12729
|
-
loc: SourceLocation.range(nucleus),
|
12730
|
-
text
|
12731
|
-
}
|
12732
12759
|
} else if (text.charCodeAt(0) >= 0x80) {
|
12733
12760
|
// no symbol for e.g. ^
|
12734
12761
|
if (this.settings.strict) {
|
@@ -12968,7 +12995,7 @@ class Style {
|
|
12968
12995
|
* https://mit-license.org/
|
12969
12996
|
*/
|
12970
12997
|
|
12971
|
-
const version = "0.10.
|
12998
|
+
const version = "0.10.4";
|
12972
12999
|
|
12973
13000
|
function postProcess(block) {
|
12974
13001
|
const labelMap = {};
|
package/dist/temmlPostProcess.js
CHANGED
package/package.json
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
{
|
2
2
|
"name": "temml",
|
3
|
-
"version": "0.10.
|
3
|
+
"version": "0.10.4",
|
4
4
|
"description": "TeX to MathML conversion in JavaScript.",
|
5
5
|
"main": "dist/temml.js",
|
6
|
+
"exports": {
|
7
|
+
".": {
|
8
|
+
"require": "./dist/temml.cjs"
|
9
|
+
},
|
10
|
+
"./*": "./*"
|
11
|
+
},
|
6
12
|
"homepage": "https://temml.org",
|
7
13
|
"repository": {
|
8
14
|
"type": "git",
|
@@ -24,7 +30,7 @@
|
|
24
30
|
},
|
25
31
|
"scripts": {
|
26
32
|
"lint": "eslint temml.js src",
|
27
|
-
"unit-test": "node
|
33
|
+
"unit-test": "node ./test/unit-test.cjs",
|
28
34
|
"visual-test": "node utils/buildTests.js",
|
29
35
|
"test": "yarn lint && node utils/buildTests.js && yarn unit-test",
|
30
36
|
"minify": "terser test/temml.js -o site/assets/temml.min.js -c -m && terser contrib/mhchem/mhchem.js -o site/assets/mhchem.min.js -c -m",
|
package/src/Lexer.js
CHANGED
@@ -49,8 +49,7 @@ export const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriti
|
|
49
49
|
const tokenRegexString =
|
50
50
|
`(${spaceRegexString}+)|` + // whitespace
|
51
51
|
`${controlSpaceRegexString}|` + // whitespace
|
52
|
-
"(
|
53
|
-
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
52
|
+
"([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
|
54
53
|
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
55
54
|
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
|
56
55
|
`${combiningDiacriticalMarkString}*` + // ...plus accents
|
@@ -65,12 +64,7 @@ export default class Lexer {
|
|
65
64
|
// Separate accents from characters
|
66
65
|
this.input = input;
|
67
66
|
this.settings = settings;
|
68
|
-
this.tokenRegex = new RegExp(
|
69
|
-
// Strict Temml, like TeX, lexes one numeral at a time.
|
70
|
-
// Default Temml lexes contiguous numerals into a single <mn> element.
|
71
|
-
tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
|
72
|
-
"g"
|
73
|
-
);
|
67
|
+
this.tokenRegex = new RegExp(tokenRegexString, 'g');
|
74
68
|
// Category codes. The lexer only supports comment characters (14) for now.
|
75
69
|
// MacroExpander additionally distinguishes active (13).
|
76
70
|
this.catcodes = {
|
package/src/Parser.js
CHANGED
@@ -16,8 +16,6 @@ import { isDelimiter } from "./functions/delimsizing"
|
|
16
16
|
import unicodeAccents from /*preval*/ "./unicodeAccents";
|
17
17
|
import unicodeSymbols from /*preval*/ "./unicodeSymbols";
|
18
18
|
|
19
|
-
const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
|
20
|
-
|
21
19
|
/**
|
22
20
|
* This file contains the parser used to parse out a TeX expression from the
|
23
21
|
* input. Since TeX isn't context-free, standard parsers don't work particularly
|
@@ -939,15 +937,6 @@ export default class Parser {
|
|
939
937
|
};
|
940
938
|
}
|
941
939
|
symbol = s;
|
942
|
-
} else if (!this.strict && numberRegEx.test(text)) {
|
943
|
-
// A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
|
944
|
-
this.consume()
|
945
|
-
return {
|
946
|
-
type: "textord",
|
947
|
-
mode: this.mode,
|
948
|
-
loc: SourceLocation.range(nucleus),
|
949
|
-
text
|
950
|
-
}
|
951
940
|
} else if (text.charCodeAt(0) >= 0x80) {
|
952
941
|
// no symbol for e.g. ^
|
953
942
|
if (this.settings.strict) {
|
package/src/buildMathML.js
CHANGED
@@ -75,6 +75,48 @@ export const consolidateText = mrow => {
|
|
75
75
|
return mtext
|
76
76
|
}
|
77
77
|
|
78
|
+
const numberRegEx = /^[0-9]$/
|
79
|
+
const isCommaOrDot = node => {
|
80
|
+
return (node.type === "atom" && node.text === ",") ||
|
81
|
+
(node.type === "textord" && node.text === ".")
|
82
|
+
}
|
83
|
+
const consolidateNumbers = expression => {
|
84
|
+
// Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
|
85
|
+
// not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
|
86
|
+
if (expression.length < 2) { return }
|
87
|
+
const nums = [];
|
88
|
+
let inNum = false
|
89
|
+
// Find adjacent numerals
|
90
|
+
for (let i = 0; i < expression.length; i++) {
|
91
|
+
const node = expression[i];
|
92
|
+
if (node.type === "textord" && numberRegEx.test(node.text)) {
|
93
|
+
if (!inNum) { nums.push({ start: i }) }
|
94
|
+
inNum = true
|
95
|
+
} else {
|
96
|
+
if (inNum) { nums[nums.length - 1].end = i - 1 }
|
97
|
+
inNum = false
|
98
|
+
}
|
99
|
+
}
|
100
|
+
if (inNum) { nums[nums.length - 1].end = expression.length - 1 }
|
101
|
+
|
102
|
+
// Determine if numeral groups are separated by a comma or dot.
|
103
|
+
for (let i = nums.length - 1; i > 0; i--) {
|
104
|
+
if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
|
105
|
+
// Merge the two groups.
|
106
|
+
nums[i - 1].end = nums[i].end
|
107
|
+
nums.splice(i, 1)
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
// Consolidate the number nodes
|
112
|
+
for (let i = nums.length - 1; i >= 0; i--) {
|
113
|
+
for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
|
114
|
+
expression[nums[i].start].text += expression[j].text
|
115
|
+
}
|
116
|
+
expression.splice(nums[i].start + 1, nums[i].end - nums[i].start)
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
78
120
|
/**
|
79
121
|
* Wrap the given array of nodes in an <mrow> node if needed, i.e.,
|
80
122
|
* unless the array has length 1. Always returns a single node.
|
@@ -110,6 +152,8 @@ export const buildExpression = function(expression, style, isOrdgroup) {
|
|
110
152
|
return [group];
|
111
153
|
}
|
112
154
|
|
155
|
+
consolidateNumbers(expression)
|
156
|
+
|
113
157
|
const groups = [];
|
114
158
|
for (let i = 0; i < expression.length; i++) {
|
115
159
|
const group = buildGroup(expression[i], style);
|
@@ -7,7 +7,7 @@ import * as mml from "../buildMathML"
|
|
7
7
|
// "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
|
8
8
|
// src/symbols.js.
|
9
9
|
|
10
|
-
const numberRegEx = /^\d(?:[\d,.]*\d)?$/
|
10
|
+
const numberRegEx = /^\d(?:[\d,.]*\d)?$/
|
11
11
|
const latinRegEx = /[A-Ba-z]/
|
12
12
|
|
13
13
|
const italicNumber = (text, variant, tag) => {
|