temml 0.10.3 → 0.10.4

Sign up to get free protection for your applications and to get access to all the features.
package/dist/temml.mjs CHANGED
@@ -1964,6 +1964,48 @@ const consolidateText = mrow => {
1964
1964
  return mtext
1965
1965
  };
1966
1966
 
1967
+ const numberRegEx$1 = /^[0-9]$/;
1968
+ const isCommaOrDot = node => {
1969
+ return (node.type === "atom" && node.text === ",") ||
1970
+ (node.type === "textord" && node.text === ".")
1971
+ };
1972
+ const consolidateNumbers = expression => {
1973
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1974
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1975
+ if (expression.length < 2) { return }
1976
+ const nums = [];
1977
+ let inNum = false;
1978
+ // Find adjacent numerals
1979
+ for (let i = 0; i < expression.length; i++) {
1980
+ const node = expression[i];
1981
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1982
+ if (!inNum) { nums.push({ start: i }); }
1983
+ inNum = true;
1984
+ } else {
1985
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1986
+ inNum = false;
1987
+ }
1988
+ }
1989
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1990
+
1991
+ // Determine if numeral groups are separated by a comma or dot.
1992
+ for (let i = nums.length - 1; i > 0; i--) {
1993
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1994
+ // Merge the two groups.
1995
+ nums[i - 1].end = nums[i].end;
1996
+ nums.splice(i, 1);
1997
+ }
1998
+ }
1999
+
2000
+ // Consolidate the number nodes
2001
+ for (let i = nums.length - 1; i >= 0; i--) {
2002
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2003
+ expression[nums[i].start].text += expression[j].text;
2004
+ }
2005
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2006
+ }
2007
+ };
2008
+
1967
2009
  /**
1968
2010
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1969
2011
  * unless the array has length 1. Always returns a single node.
@@ -1999,6 +2041,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
1999
2041
  return [group];
2000
2042
  }
2001
2043
 
2044
+ consolidateNumbers(expression);
2045
+
2002
2046
  const groups = [];
2003
2047
  for (let i = 0; i < expression.length; i++) {
2004
2048
  const group = buildGroup$1(expression[i], style);
@@ -7641,7 +7685,7 @@ const smallCaps = Object.freeze({
7641
7685
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7642
7686
  // src/symbols.js.
7643
7687
 
7644
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7688
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7645
7689
  const latinRegEx = /[A-Ba-z]/;
7646
7690
 
7647
7691
  const italicNumber = (text, variant, tag) => {
@@ -7695,7 +7739,7 @@ defineFunctionBuilders({
7695
7739
  const variant = getVariant(group, style) || "normal";
7696
7740
 
7697
7741
  let node;
7698
- if (numberRegEx$1.test(group.text)) {
7742
+ if (numberRegEx.test(group.text)) {
7699
7743
  const tag = group.mode === "text" ? "mtext" : "mn";
7700
7744
  if (variant === "italic" || variant === "bold-italic") {
7701
7745
  return italicNumber(text, variant, tag)
@@ -8008,8 +8052,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
8008
8052
  const tokenRegexString =
8009
8053
  `(${spaceRegexString}+)|` + // whitespace
8010
8054
  `${controlSpaceRegexString}|` + // whitespace
8011
- "(number" + // numbers (in non-strict mode)
8012
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8055
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8013
8056
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8014
8057
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8015
8058
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8024,12 +8067,7 @@ class Lexer {
8024
8067
  // Separate accents from characters
8025
8068
  this.input = input;
8026
8069
  this.settings = settings;
8027
- this.tokenRegex = new RegExp(
8028
- // Strict Temml, like TeX, lexes one numeral at a time.
8029
- // Default Temml lexes contiguous numerals into a single <mn> element.
8030
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8031
- "g"
8032
- );
8070
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8033
8071
  // Category codes. The lexer only supports comment characters (14) for now.
8034
8072
  // MacroExpander additionally distinguishes active (13).
8035
8073
  this.catcodes = {
@@ -11797,8 +11835,6 @@ var unicodeSymbols = {
11797
11835
 
11798
11836
  /* eslint no-constant-condition:0 */
11799
11837
 
11800
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
11801
-
11802
11838
  /**
11803
11839
  * This file contains the parser used to parse out a TeX expression from the
11804
11840
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12720,15 +12756,6 @@ class Parser {
12720
12756
  };
12721
12757
  }
12722
12758
  symbol = s;
12723
- } else if (!this.strict && numberRegEx.test(text)) {
12724
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
12725
- this.consume();
12726
- return {
12727
- type: "textord",
12728
- mode: this.mode,
12729
- loc: SourceLocation.range(nucleus),
12730
- text
12731
- }
12732
12759
  } else if (text.charCodeAt(0) >= 0x80) {
12733
12760
  // no symbol for e.g. ^
12734
12761
  if (this.settings.strict) {
@@ -12968,7 +12995,7 @@ class Style {
12968
12995
  * https://mit-license.org/
12969
12996
  */
12970
12997
 
12971
- const version = "0.10.3";
12998
+ const version = "0.10.4";
12972
12999
 
12973
13000
  function postProcess(block) {
12974
13001
  const labelMap = {};
@@ -14,7 +14,7 @@
14
14
  * https://mit-license.org/
15
15
  */
16
16
 
17
- const version = "0.10.3";
17
+ const version = "0.10.4";
18
18
 
19
19
  function postProcess(block) {
20
20
  const labelMap = {};
package/package.json CHANGED
@@ -1,8 +1,14 @@
1
1
  {
2
2
  "name": "temml",
3
- "version": "0.10.3",
3
+ "version": "0.10.4",
4
4
  "description": "TeX to MathML conversion in JavaScript.",
5
5
  "main": "dist/temml.js",
6
+ "exports": {
7
+ ".": {
8
+ "require": "./dist/temml.cjs"
9
+ },
10
+ "./*": "./*"
11
+ },
6
12
  "homepage": "https://temml.org",
7
13
  "repository": {
8
14
  "type": "git",
@@ -24,7 +30,7 @@
24
30
  },
25
31
  "scripts": {
26
32
  "lint": "eslint temml.js src",
27
- "unit-test": "node -r esm ./test/unit-test.js",
33
+ "unit-test": "node ./test/unit-test.cjs",
28
34
  "visual-test": "node utils/buildTests.js",
29
35
  "test": "yarn lint && node utils/buildTests.js && yarn unit-test",
30
36
  "minify": "terser test/temml.js -o site/assets/temml.min.js -c -m && terser contrib/mhchem/mhchem.js -o site/assets/mhchem.min.js -c -m",
package/src/Lexer.js CHANGED
@@ -49,8 +49,7 @@ export const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriti
49
49
  const tokenRegexString =
50
50
  `(${spaceRegexString}+)|` + // whitespace
51
51
  `${controlSpaceRegexString}|` + // whitespace
52
- "(number" + // numbers (in non-strict mode)
53
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
52
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
54
53
  `${combiningDiacriticalMarkString}*` + // ...plus accents
55
54
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
56
55
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -65,12 +64,7 @@ export default class Lexer {
65
64
  // Separate accents from characters
66
65
  this.input = input;
67
66
  this.settings = settings;
68
- this.tokenRegex = new RegExp(
69
- // Strict Temml, like TeX, lexes one numeral at a time.
70
- // Default Temml lexes contiguous numerals into a single <mn> element.
71
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
72
- "g"
73
- );
67
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
74
68
  // Category codes. The lexer only supports comment characters (14) for now.
75
69
  // MacroExpander additionally distinguishes active (13).
76
70
  this.catcodes = {
package/src/Parser.js CHANGED
@@ -16,8 +16,6 @@ import { isDelimiter } from "./functions/delimsizing"
16
16
  import unicodeAccents from /*preval*/ "./unicodeAccents";
17
17
  import unicodeSymbols from /*preval*/ "./unicodeSymbols";
18
18
 
19
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
20
-
21
19
  /**
22
20
  * This file contains the parser used to parse out a TeX expression from the
23
21
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -939,15 +937,6 @@ export default class Parser {
939
937
  };
940
938
  }
941
939
  symbol = s;
942
- } else if (!this.strict && numberRegEx.test(text)) {
943
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
944
- this.consume()
945
- return {
946
- type: "textord",
947
- mode: this.mode,
948
- loc: SourceLocation.range(nucleus),
949
- text
950
- }
951
940
  } else if (text.charCodeAt(0) >= 0x80) {
952
941
  // no symbol for e.g. ^
953
942
  if (this.settings.strict) {
@@ -75,6 +75,48 @@ export const consolidateText = mrow => {
75
75
  return mtext
76
76
  }
77
77
 
78
+ const numberRegEx = /^[0-9]$/
79
+ const isCommaOrDot = node => {
80
+ return (node.type === "atom" && node.text === ",") ||
81
+ (node.type === "textord" && node.text === ".")
82
+ }
83
+ const consolidateNumbers = expression => {
84
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
85
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
86
+ if (expression.length < 2) { return }
87
+ const nums = [];
88
+ let inNum = false
89
+ // Find adjacent numerals
90
+ for (let i = 0; i < expression.length; i++) {
91
+ const node = expression[i];
92
+ if (node.type === "textord" && numberRegEx.test(node.text)) {
93
+ if (!inNum) { nums.push({ start: i }) }
94
+ inNum = true
95
+ } else {
96
+ if (inNum) { nums[nums.length - 1].end = i - 1 }
97
+ inNum = false
98
+ }
99
+ }
100
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1 }
101
+
102
+ // Determine if numeral groups are separated by a comma or dot.
103
+ for (let i = nums.length - 1; i > 0; i--) {
104
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
105
+ // Merge the two groups.
106
+ nums[i - 1].end = nums[i].end
107
+ nums.splice(i, 1)
108
+ }
109
+ }
110
+
111
+ // Consolidate the number nodes
112
+ for (let i = nums.length - 1; i >= 0; i--) {
113
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
114
+ expression[nums[i].start].text += expression[j].text
115
+ }
116
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start)
117
+ }
118
+ }
119
+
78
120
  /**
79
121
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
80
122
  * unless the array has length 1. Always returns a single node.
@@ -110,6 +152,8 @@ export const buildExpression = function(expression, style, isOrdgroup) {
110
152
  return [group];
111
153
  }
112
154
 
155
+ consolidateNumbers(expression)
156
+
113
157
  const groups = [];
114
158
  for (let i = 0; i < expression.length; i++) {
115
159
  const group = buildGroup(expression[i], style);
@@ -7,7 +7,7 @@ import * as mml from "../buildMathML"
7
7
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
8
8
  // src/symbols.js.
9
9
 
10
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in Parser.js
10
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/
11
11
  const latinRegEx = /[A-Ba-z]/
12
12
 
13
13
  const italicNumber = (text, variant, tag) => {
@@ -8,7 +8,7 @@
8
8
  * https://mit-license.org/
9
9
  */
10
10
 
11
- export const version = "0.10.3";
11
+ export const version = "0.10.4";
12
12
 
13
13
  export function postProcess(block) {
14
14
  const labelMap = {}