temml 0.10.3 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/temml.mjs CHANGED
@@ -1964,6 +1964,48 @@ const consolidateText = mrow => {
1964
1964
  return mtext
1965
1965
  };
1966
1966
 
1967
+ const numberRegEx$1 = /^[0-9]$/;
1968
+ const isCommaOrDot = node => {
1969
+ return (node.type === "atom" && node.text === ",") ||
1970
+ (node.type === "textord" && node.text === ".")
1971
+ };
1972
+ const consolidateNumbers = expression => {
1973
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1974
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1975
+ if (expression.length < 2) { return }
1976
+ const nums = [];
1977
+ let inNum = false;
1978
+ // Find adjacent numerals
1979
+ for (let i = 0; i < expression.length; i++) {
1980
+ const node = expression[i];
1981
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1982
+ if (!inNum) { nums.push({ start: i }); }
1983
+ inNum = true;
1984
+ } else {
1985
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1986
+ inNum = false;
1987
+ }
1988
+ }
1989
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1990
+
1991
+ // Determine if numeral groups are separated by a comma or dot.
1992
+ for (let i = nums.length - 1; i > 0; i--) {
1993
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1994
+ // Merge the two groups.
1995
+ nums[i - 1].end = nums[i].end;
1996
+ nums.splice(i, 1);
1997
+ }
1998
+ }
1999
+
2000
+ // Consolidate the number nodes
2001
+ for (let i = nums.length - 1; i >= 0; i--) {
2002
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2003
+ expression[nums[i].start].text += expression[j].text;
2004
+ }
2005
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2006
+ }
2007
+ };
2008
+
1967
2009
  /**
1968
2010
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1969
2011
  * unless the array has length 1. Always returns a single node.
@@ -1999,6 +2041,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
1999
2041
  return [group];
2000
2042
  }
2001
2043
 
2044
+ consolidateNumbers(expression);
2045
+
2002
2046
  const groups = [];
2003
2047
  for (let i = 0; i < expression.length; i++) {
2004
2048
  const group = buildGroup$1(expression[i], style);
@@ -7641,7 +7685,7 @@ const smallCaps = Object.freeze({
7641
7685
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7642
7686
  // src/symbols.js.
7643
7687
 
7644
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7688
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7645
7689
  const latinRegEx = /[A-Ba-z]/;
7646
7690
 
7647
7691
  const italicNumber = (text, variant, tag) => {
@@ -7695,7 +7739,7 @@ defineFunctionBuilders({
7695
7739
  const variant = getVariant(group, style) || "normal";
7696
7740
 
7697
7741
  let node;
7698
- if (numberRegEx$1.test(group.text)) {
7742
+ if (numberRegEx.test(group.text)) {
7699
7743
  const tag = group.mode === "text" ? "mtext" : "mn";
7700
7744
  if (variant === "italic" || variant === "bold-italic") {
7701
7745
  return italicNumber(text, variant, tag)
@@ -8008,8 +8052,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
8008
8052
  const tokenRegexString =
8009
8053
  `(${spaceRegexString}+)|` + // whitespace
8010
8054
  `${controlSpaceRegexString}|` + // whitespace
8011
- "(number" + // numbers (in non-strict mode)
8012
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8055
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8013
8056
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8014
8057
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8015
8058
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8024,12 +8067,7 @@ class Lexer {
8024
8067
  // Separate accents from characters
8025
8068
  this.input = input;
8026
8069
  this.settings = settings;
8027
- this.tokenRegex = new RegExp(
8028
- // Strict Temml, like TeX, lexes one numeral at a time.
8029
- // Default Temml lexes contiguous numerals into a single <mn> element.
8030
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8031
- "g"
8032
- );
8070
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8033
8071
  // Category codes. The lexer only supports comment characters (14) for now.
8034
8072
  // MacroExpander additionally distinguishes active (13).
8035
8073
  this.catcodes = {
@@ -11797,8 +11835,6 @@ var unicodeSymbols = {
11797
11835
 
11798
11836
  /* eslint no-constant-condition:0 */
11799
11837
 
11800
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
11801
-
11802
11838
  /**
11803
11839
  * This file contains the parser used to parse out a TeX expression from the
11804
11840
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12720,15 +12756,6 @@ class Parser {
12720
12756
  };
12721
12757
  }
12722
12758
  symbol = s;
12723
- } else if (!this.strict && numberRegEx.test(text)) {
12724
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
12725
- this.consume();
12726
- return {
12727
- type: "textord",
12728
- mode: this.mode,
12729
- loc: SourceLocation.range(nucleus),
12730
- text
12731
- }
12732
12759
  } else if (text.charCodeAt(0) >= 0x80) {
12733
12760
  // no symbol for e.g. ^
12734
12761
  if (this.settings.strict) {
@@ -12968,7 +12995,7 @@ class Style {
12968
12995
  * https://mit-license.org/
12969
12996
  */
12970
12997
 
12971
- const version = "0.10.3";
12998
+ const version = "0.10.4";
12972
12999
 
12973
13000
  function postProcess(block) {
12974
13001
  const labelMap = {};
@@ -14,7 +14,7 @@
14
14
  * https://mit-license.org/
15
15
  */
16
16
 
17
- const version = "0.10.3";
17
+ const version = "0.10.4";
18
18
 
19
19
  function postProcess(block) {
20
20
  const labelMap = {};
package/package.json CHANGED
@@ -1,8 +1,14 @@
1
1
  {
2
2
  "name": "temml",
3
- "version": "0.10.3",
3
+ "version": "0.10.4",
4
4
  "description": "TeX to MathML conversion in JavaScript.",
5
5
  "main": "dist/temml.js",
6
+ "exports": {
7
+ ".": {
8
+ "require": "./dist/temml.cjs"
9
+ },
10
+ "./*": "./*"
11
+ },
6
12
  "homepage": "https://temml.org",
7
13
  "repository": {
8
14
  "type": "git",
@@ -24,7 +30,7 @@
24
30
  },
25
31
  "scripts": {
26
32
  "lint": "eslint temml.js src",
27
- "unit-test": "node -r esm ./test/unit-test.js",
33
+ "unit-test": "node ./test/unit-test.cjs",
28
34
  "visual-test": "node utils/buildTests.js",
29
35
  "test": "yarn lint && node utils/buildTests.js && yarn unit-test",
30
36
  "minify": "terser test/temml.js -o site/assets/temml.min.js -c -m && terser contrib/mhchem/mhchem.js -o site/assets/mhchem.min.js -c -m",
package/src/Lexer.js CHANGED
@@ -49,8 +49,7 @@ export const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriti
49
49
  const tokenRegexString =
50
50
  `(${spaceRegexString}+)|` + // whitespace
51
51
  `${controlSpaceRegexString}|` + // whitespace
52
- "(number" + // numbers (in non-strict mode)
53
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
52
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
54
53
  `${combiningDiacriticalMarkString}*` + // ...plus accents
55
54
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
56
55
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -65,12 +64,7 @@ export default class Lexer {
65
64
  // Separate accents from characters
66
65
  this.input = input;
67
66
  this.settings = settings;
68
- this.tokenRegex = new RegExp(
69
- // Strict Temml, like TeX, lexes one numeral at a time.
70
- // Default Temml lexes contiguous numerals into a single <mn> element.
71
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
72
- "g"
73
- );
67
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
74
68
  // Category codes. The lexer only supports comment characters (14) for now.
75
69
  // MacroExpander additionally distinguishes active (13).
76
70
  this.catcodes = {
package/src/Parser.js CHANGED
@@ -16,8 +16,6 @@ import { isDelimiter } from "./functions/delimsizing"
16
16
  import unicodeAccents from /*preval*/ "./unicodeAccents";
17
17
  import unicodeSymbols from /*preval*/ "./unicodeSymbols";
18
18
 
19
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in symbolsOrd.js
20
-
21
19
  /**
22
20
  * This file contains the parser used to parse out a TeX expression from the
23
21
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -939,15 +937,6 @@ export default class Parser {
939
937
  };
940
938
  }
941
939
  symbol = s;
942
- } else if (!this.strict && numberRegEx.test(text)) {
943
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
944
- this.consume()
945
- return {
946
- type: "textord",
947
- mode: this.mode,
948
- loc: SourceLocation.range(nucleus),
949
- text
950
- }
951
940
  } else if (text.charCodeAt(0) >= 0x80) {
952
941
  // no symbol for e.g. ^
953
942
  if (this.settings.strict) {
@@ -75,6 +75,48 @@ export const consolidateText = mrow => {
75
75
  return mtext
76
76
  }
77
77
 
78
+ const numberRegEx = /^[0-9]$/
79
+ const isCommaOrDot = node => {
80
+ return (node.type === "atom" && node.text === ",") ||
81
+ (node.type === "textord" && node.text === ".")
82
+ }
83
+ const consolidateNumbers = expression => {
84
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
85
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
86
+ if (expression.length < 2) { return }
87
+ const nums = [];
88
+ let inNum = false
89
+ // Find adjacent numerals
90
+ for (let i = 0; i < expression.length; i++) {
91
+ const node = expression[i];
92
+ if (node.type === "textord" && numberRegEx.test(node.text)) {
93
+ if (!inNum) { nums.push({ start: i }) }
94
+ inNum = true
95
+ } else {
96
+ if (inNum) { nums[nums.length - 1].end = i - 1 }
97
+ inNum = false
98
+ }
99
+ }
100
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1 }
101
+
102
+ // Determine if numeral groups are separated by a comma or dot.
103
+ for (let i = nums.length - 1; i > 0; i--) {
104
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
105
+ // Merge the two groups.
106
+ nums[i - 1].end = nums[i].end
107
+ nums.splice(i, 1)
108
+ }
109
+ }
110
+
111
+ // Consolidate the number nodes
112
+ for (let i = nums.length - 1; i >= 0; i--) {
113
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
114
+ expression[nums[i].start].text += expression[j].text
115
+ }
116
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start)
117
+ }
118
+ }
119
+
78
120
  /**
79
121
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
80
122
  * unless the array has length 1. Always returns a single node.
@@ -110,6 +152,8 @@ export const buildExpression = function(expression, style, isOrdgroup) {
110
152
  return [group];
111
153
  }
112
154
 
155
+ consolidateNumbers(expression)
156
+
113
157
  const groups = [];
114
158
  for (let i = 0; i < expression.length; i++) {
115
159
  const group = buildGroup(expression[i], style);
@@ -7,7 +7,7 @@ import * as mml from "../buildMathML"
7
7
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
8
8
  // src/symbols.js.
9
9
 
10
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/ // Keep in sync with numberRegEx in Parser.js
10
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/
11
11
  const latinRegEx = /[A-Ba-z]/
12
12
 
13
13
  const italicNumber = (text, variant, tag) => {
@@ -8,7 +8,7 @@
8
8
  * https://mit-license.org/
9
9
  */
10
10
 
11
- export const version = "0.10.3";
11
+ export const version = "0.10.4";
12
12
 
13
13
  export function postProcess(block) {
14
14
  const labelMap = {}