temml 0.10.3 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/temml.cjs CHANGED
@@ -1966,6 +1966,48 @@ const consolidateText = mrow => {
1966
1966
  return mtext
1967
1967
  };
1968
1968
 
1969
+ const numberRegEx$1 = /^[0-9]$/;
1970
+ const isCommaOrDot = node => {
1971
+ return (node.type === "atom" && node.text === ",") ||
1972
+ (node.type === "textord" && node.text === ".")
1973
+ };
1974
+ const consolidateNumbers = expression => {
1975
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1976
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1977
+ if (expression.length < 2) { return }
1978
+ const nums = [];
1979
+ let inNum = false;
1980
+ // Find adjacent numerals
1981
+ for (let i = 0; i < expression.length; i++) {
1982
+ const node = expression[i];
1983
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1984
+ if (!inNum) { nums.push({ start: i }); }
1985
+ inNum = true;
1986
+ } else {
1987
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1988
+ inNum = false;
1989
+ }
1990
+ }
1991
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1992
+
1993
+ // Determine if numeral groups are separated by a comma or dot.
1994
+ for (let i = nums.length - 1; i > 0; i--) {
1995
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1996
+ // Merge the two groups.
1997
+ nums[i - 1].end = nums[i].end;
1998
+ nums.splice(i, 1);
1999
+ }
2000
+ }
2001
+
2002
+ // Consolidate the number nodes
2003
+ for (let i = nums.length - 1; i >= 0; i--) {
2004
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2005
+ expression[nums[i].start].text += expression[j].text;
2006
+ }
2007
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2008
+ }
2009
+ };
2010
+
1969
2011
  /**
1970
2012
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1971
2013
  * unless the array has length 1. Always returns a single node.
@@ -2001,6 +2043,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
2001
2043
  return [group];
2002
2044
  }
2003
2045
 
2046
+ consolidateNumbers(expression);
2047
+
2004
2048
  const groups = [];
2005
2049
  for (let i = 0; i < expression.length; i++) {
2006
2050
  const group = buildGroup$1(expression[i], style);
@@ -7643,7 +7687,7 @@ const smallCaps = Object.freeze({
7643
7687
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7644
7688
  // src/symbols.js.
7645
7689
 
7646
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7690
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7647
7691
  const latinRegEx = /[A-Ba-z]/;
7648
7692
 
7649
7693
  const italicNumber = (text, variant, tag) => {
@@ -7697,7 +7741,7 @@ defineFunctionBuilders({
7697
7741
  const variant = getVariant(group, style) || "normal";
7698
7742
 
7699
7743
  let node;
7700
- if (numberRegEx$1.test(group.text)) {
7744
+ if (numberRegEx.test(group.text)) {
7701
7745
  const tag = group.mode === "text" ? "mtext" : "mn";
7702
7746
  if (variant === "italic" || variant === "bold-italic") {
7703
7747
  return italicNumber(text, variant, tag)
@@ -8010,8 +8054,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
8010
8054
  const tokenRegexString =
8011
8055
  `(${spaceRegexString}+)|` + // whitespace
8012
8056
  `${controlSpaceRegexString}|` + // whitespace
8013
- "(number" + // numbers (in non-strict mode)
8014
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8057
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8015
8058
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8016
8059
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8017
8060
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8026,12 +8069,7 @@ class Lexer {
8026
8069
  // Separate accents from characters
8027
8070
  this.input = input;
8028
8071
  this.settings = settings;
8029
- this.tokenRegex = new RegExp(
8030
- // Strict Temml, like TeX, lexes one numeral at a time.
8031
- // Default Temml lexes contiguous numerals into a single <mn> element.
8032
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8033
- "g"
8034
- );
8072
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8035
8073
  // Category codes. The lexer only supports comment characters (14) for now.
8036
8074
  // MacroExpander additionally distinguishes active (13).
8037
8075
  this.catcodes = {
@@ -11799,8 +11837,6 @@ var unicodeSymbols = {
11799
11837
 
11800
11838
  /* eslint no-constant-condition:0 */
11801
11839
 
11802
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
11803
-
11804
11840
  /**
11805
11841
  * This file contains the parser used to parse out a TeX expression from the
11806
11842
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12722,15 +12758,6 @@ class Parser {
12722
12758
  };
12723
12759
  }
12724
12760
  symbol = s;
12725
- } else if (!this.strict && numberRegEx.test(text)) {
12726
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
12727
- this.consume();
12728
- return {
12729
- type: "textord",
12730
- mode: this.mode,
12731
- loc: SourceLocation.range(nucleus),
12732
- text
12733
- }
12734
12761
  } else if (text.charCodeAt(0) >= 0x80) {
12735
12762
  // no symbol for e.g. ^
12736
12763
  if (this.settings.strict) {
@@ -12970,7 +12997,7 @@ class Style {
12970
12997
  * https://mit-license.org/
12971
12998
  */
12972
12999
 
12973
- const version = "0.10.3";
13000
+ const version = "0.10.4";
12974
13001
 
12975
13002
  function postProcess(block) {
12976
13003
  const labelMap = {};
package/dist/temml.js CHANGED
@@ -1967,6 +1967,48 @@ var temml = (function () {
1967
1967
  return mtext
1968
1968
  };
1969
1969
 
1970
+ const numberRegEx$1 = /^[0-9]$/;
1971
+ const isCommaOrDot = node => {
1972
+ return (node.type === "atom" && node.text === ",") ||
1973
+ (node.type === "textord" && node.text === ".")
1974
+ };
1975
+ const consolidateNumbers = expression => {
1976
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1977
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1978
+ if (expression.length < 2) { return }
1979
+ const nums = [];
1980
+ let inNum = false;
1981
+ // Find adjacent numerals
1982
+ for (let i = 0; i < expression.length; i++) {
1983
+ const node = expression[i];
1984
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1985
+ if (!inNum) { nums.push({ start: i }); }
1986
+ inNum = true;
1987
+ } else {
1988
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1989
+ inNum = false;
1990
+ }
1991
+ }
1992
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1993
+
1994
+ // Determine if numeral groups are separated by a comma or dot.
1995
+ for (let i = nums.length - 1; i > 0; i--) {
1996
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1997
+ // Merge the two groups.
1998
+ nums[i - 1].end = nums[i].end;
1999
+ nums.splice(i, 1);
2000
+ }
2001
+ }
2002
+
2003
+ // Consolidate the number nodes
2004
+ for (let i = nums.length - 1; i >= 0; i--) {
2005
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2006
+ expression[nums[i].start].text += expression[j].text;
2007
+ }
2008
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2009
+ }
2010
+ };
2011
+
1970
2012
  /**
1971
2013
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1972
2014
  * unless the array has length 1. Always returns a single node.
@@ -2002,6 +2044,8 @@ var temml = (function () {
2002
2044
  return [group];
2003
2045
  }
2004
2046
 
2047
+ consolidateNumbers(expression);
2048
+
2005
2049
  const groups = [];
2006
2050
  for (let i = 0; i < expression.length; i++) {
2007
2051
  const group = buildGroup$1(expression[i], style);
@@ -7644,7 +7688,7 @@ var temml = (function () {
7644
7688
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7645
7689
  // src/symbols.js.
7646
7690
 
7647
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7691
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7648
7692
  const latinRegEx = /[A-Ba-z]/;
7649
7693
 
7650
7694
  const italicNumber = (text, variant, tag) => {
@@ -7698,7 +7742,7 @@ var temml = (function () {
7698
7742
  const variant = getVariant(group, style) || "normal";
7699
7743
 
7700
7744
  let node;
7701
- if (numberRegEx$1.test(group.text)) {
7745
+ if (numberRegEx.test(group.text)) {
7702
7746
  const tag = group.mode === "text" ? "mtext" : "mn";
7703
7747
  if (variant === "italic" || variant === "bold-italic") {
7704
7748
  return italicNumber(text, variant, tag)
@@ -8011,8 +8055,7 @@ var temml = (function () {
8011
8055
  const tokenRegexString =
8012
8056
  `(${spaceRegexString}+)|` + // whitespace
8013
8057
  `${controlSpaceRegexString}|` + // whitespace
8014
- "(number" + // numbers (in non-strict mode)
8015
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8058
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8016
8059
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8017
8060
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8018
8061
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8027,12 +8070,7 @@ var temml = (function () {
8027
8070
  // Separate accents from characters
8028
8071
  this.input = input;
8029
8072
  this.settings = settings;
8030
- this.tokenRegex = new RegExp(
8031
- // Strict Temml, like TeX, lexes one numeral at a time.
8032
- // Default Temml lexes contiguous numerals into a single <mn> element.
8033
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8034
- "g"
8035
- );
8073
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8036
8074
  // Category codes. The lexer only supports comment characters (14) for now.
8037
8075
  // MacroExpander additionally distinguishes active (13).
8038
8076
  this.catcodes = {
@@ -9900,8 +9938,6 @@ var temml = (function () {
9900
9938
 
9901
9939
  /* eslint no-constant-condition:0 */
9902
9940
 
9903
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
9904
-
9905
9941
  /**
9906
9942
  * This file contains the parser used to parse out a TeX expression from the
9907
9943
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -10823,15 +10859,6 @@ var temml = (function () {
10823
10859
  };
10824
10860
  }
10825
10861
  symbol = s;
10826
- } else if (!this.strict && numberRegEx.test(text)) {
10827
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
10828
- this.consume();
10829
- return {
10830
- type: "textord",
10831
- mode: this.mode,
10832
- loc: SourceLocation.range(nucleus),
10833
- text
10834
- }
10835
10862
  } else if (text.charCodeAt(0) >= 0x80) {
10836
10863
  // no symbol for e.g. ^
10837
10864
  if (this.settings.strict) {
@@ -11071,7 +11098,7 @@ var temml = (function () {
11071
11098
  * https://mit-license.org/
11072
11099
  */
11073
11100
 
11074
- const version = "0.10.3";
11101
+ const version = "0.10.4";
11075
11102
 
11076
11103
  function postProcess(block) {
11077
11104
  const labelMap = {};