temml 0.10.3 → 0.10.4

Sign up to get free protection for your applications and to get access to all the features.
package/dist/temml.cjs CHANGED
@@ -1966,6 +1966,48 @@ const consolidateText = mrow => {
1966
1966
  return mtext
1967
1967
  };
1968
1968
 
1969
+ const numberRegEx$1 = /^[0-9]$/;
1970
+ const isCommaOrDot = node => {
1971
+ return (node.type === "atom" && node.text === ",") ||
1972
+ (node.type === "textord" && node.text === ".")
1973
+ };
1974
+ const consolidateNumbers = expression => {
1975
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1976
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1977
+ if (expression.length < 2) { return }
1978
+ const nums = [];
1979
+ let inNum = false;
1980
+ // Find adjacent numerals
1981
+ for (let i = 0; i < expression.length; i++) {
1982
+ const node = expression[i];
1983
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1984
+ if (!inNum) { nums.push({ start: i }); }
1985
+ inNum = true;
1986
+ } else {
1987
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1988
+ inNum = false;
1989
+ }
1990
+ }
1991
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1992
+
1993
+ // Determine if numeral groups are separated by a comma or dot.
1994
+ for (let i = nums.length - 1; i > 0; i--) {
1995
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1996
+ // Merge the two groups.
1997
+ nums[i - 1].end = nums[i].end;
1998
+ nums.splice(i, 1);
1999
+ }
2000
+ }
2001
+
2002
+ // Consolidate the number nodes
2003
+ for (let i = nums.length - 1; i >= 0; i--) {
2004
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2005
+ expression[nums[i].start].text += expression[j].text;
2006
+ }
2007
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2008
+ }
2009
+ };
2010
+
1969
2011
  /**
1970
2012
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1971
2013
  * unless the array has length 1. Always returns a single node.
@@ -2001,6 +2043,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
2001
2043
  return [group];
2002
2044
  }
2003
2045
 
2046
+ consolidateNumbers(expression);
2047
+
2004
2048
  const groups = [];
2005
2049
  for (let i = 0; i < expression.length; i++) {
2006
2050
  const group = buildGroup$1(expression[i], style);
@@ -7643,7 +7687,7 @@ const smallCaps = Object.freeze({
7643
7687
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7644
7688
  // src/symbols.js.
7645
7689
 
7646
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7690
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7647
7691
  const latinRegEx = /[A-Ba-z]/;
7648
7692
 
7649
7693
  const italicNumber = (text, variant, tag) => {
@@ -7697,7 +7741,7 @@ defineFunctionBuilders({
7697
7741
  const variant = getVariant(group, style) || "normal";
7698
7742
 
7699
7743
  let node;
7700
- if (numberRegEx$1.test(group.text)) {
7744
+ if (numberRegEx.test(group.text)) {
7701
7745
  const tag = group.mode === "text" ? "mtext" : "mn";
7702
7746
  if (variant === "italic" || variant === "bold-italic") {
7703
7747
  return italicNumber(text, variant, tag)
@@ -8010,8 +8054,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
8010
8054
  const tokenRegexString =
8011
8055
  `(${spaceRegexString}+)|` + // whitespace
8012
8056
  `${controlSpaceRegexString}|` + // whitespace
8013
- "(number" + // numbers (in non-strict mode)
8014
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8057
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8015
8058
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8016
8059
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8017
8060
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8026,12 +8069,7 @@ class Lexer {
8026
8069
  // Separate accents from characters
8027
8070
  this.input = input;
8028
8071
  this.settings = settings;
8029
- this.tokenRegex = new RegExp(
8030
- // Strict Temml, like TeX, lexes one numeral at a time.
8031
- // Default Temml lexes contiguous numerals into a single <mn> element.
8032
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8033
- "g"
8034
- );
8072
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8035
8073
  // Category codes. The lexer only supports comment characters (14) for now.
8036
8074
  // MacroExpander additionally distinguishes active (13).
8037
8075
  this.catcodes = {
@@ -11799,8 +11837,6 @@ var unicodeSymbols = {
11799
11837
 
11800
11838
  /* eslint no-constant-condition:0 */
11801
11839
 
11802
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
11803
-
11804
11840
  /**
11805
11841
  * This file contains the parser used to parse out a TeX expression from the
11806
11842
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12722,15 +12758,6 @@ class Parser {
12722
12758
  };
12723
12759
  }
12724
12760
  symbol = s;
12725
- } else if (!this.strict && numberRegEx.test(text)) {
12726
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
12727
- this.consume();
12728
- return {
12729
- type: "textord",
12730
- mode: this.mode,
12731
- loc: SourceLocation.range(nucleus),
12732
- text
12733
- }
12734
12761
  } else if (text.charCodeAt(0) >= 0x80) {
12735
12762
  // no symbol for e.g. ^
12736
12763
  if (this.settings.strict) {
@@ -12970,7 +12997,7 @@ class Style {
12970
12997
  * https://mit-license.org/
12971
12998
  */
12972
12999
 
12973
- const version = "0.10.3";
13000
+ const version = "0.10.4";
12974
13001
 
12975
13002
  function postProcess(block) {
12976
13003
  const labelMap = {};
package/dist/temml.js CHANGED
@@ -1967,6 +1967,48 @@ var temml = (function () {
1967
1967
  return mtext
1968
1968
  };
1969
1969
 
1970
+ const numberRegEx$1 = /^[0-9]$/;
1971
+ const isCommaOrDot = node => {
1972
+ return (node.type === "atom" && node.text === ",") ||
1973
+ (node.type === "textord" && node.text === ".")
1974
+ };
1975
+ const consolidateNumbers = expression => {
1976
+ // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
1977
+ // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
1978
+ if (expression.length < 2) { return }
1979
+ const nums = [];
1980
+ let inNum = false;
1981
+ // Find adjacent numerals
1982
+ for (let i = 0; i < expression.length; i++) {
1983
+ const node = expression[i];
1984
+ if (node.type === "textord" && numberRegEx$1.test(node.text)) {
1985
+ if (!inNum) { nums.push({ start: i }); }
1986
+ inNum = true;
1987
+ } else {
1988
+ if (inNum) { nums[nums.length - 1].end = i - 1; }
1989
+ inNum = false;
1990
+ }
1991
+ }
1992
+ if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
1993
+
1994
+ // Determine if numeral groups are separated by a comma or dot.
1995
+ for (let i = nums.length - 1; i > 0; i--) {
1996
+ if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
1997
+ // Merge the two groups.
1998
+ nums[i - 1].end = nums[i].end;
1999
+ nums.splice(i, 1);
2000
+ }
2001
+ }
2002
+
2003
+ // Consolidate the number nodes
2004
+ for (let i = nums.length - 1; i >= 0; i--) {
2005
+ for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
2006
+ expression[nums[i].start].text += expression[j].text;
2007
+ }
2008
+ expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
2009
+ }
2010
+ };
2011
+
1970
2012
  /**
1971
2013
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
1972
2014
  * unless the array has length 1. Always returns a single node.
@@ -2002,6 +2044,8 @@ var temml = (function () {
2002
2044
  return [group];
2003
2045
  }
2004
2046
 
2047
+ consolidateNumbers(expression);
2048
+
2005
2049
  const groups = [];
2006
2050
  for (let i = 0; i < expression.length; i++) {
2007
2051
  const group = buildGroup$1(expression[i], style);
@@ -7644,7 +7688,7 @@ var temml = (function () {
7644
7688
  // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
7645
7689
  // src/symbols.js.
7646
7690
 
7647
- const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in Parser.js
7691
+ const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
7648
7692
  const latinRegEx = /[A-Ba-z]/;
7649
7693
 
7650
7694
  const italicNumber = (text, variant, tag) => {
@@ -7698,7 +7742,7 @@ var temml = (function () {
7698
7742
  const variant = getVariant(group, style) || "normal";
7699
7743
 
7700
7744
  let node;
7701
- if (numberRegEx$1.test(group.text)) {
7745
+ if (numberRegEx.test(group.text)) {
7702
7746
  const tag = group.mode === "text" ? "mtext" : "mn";
7703
7747
  if (variant === "italic" || variant === "bold-italic") {
7704
7748
  return italicNumber(text, variant, tag)
@@ -8011,8 +8055,7 @@ var temml = (function () {
8011
8055
  const tokenRegexString =
8012
8056
  `(${spaceRegexString}+)|` + // whitespace
8013
8057
  `${controlSpaceRegexString}|` + // whitespace
8014
- "(number" + // numbers (in non-strict mode)
8015
- "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8058
+ "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
8016
8059
  `${combiningDiacriticalMarkString}*` + // ...plus accents
8017
8060
  "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
8018
8061
  `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8027,12 +8070,7 @@ var temml = (function () {
8027
8070
  // Separate accents from characters
8028
8071
  this.input = input;
8029
8072
  this.settings = settings;
8030
- this.tokenRegex = new RegExp(
8031
- // Strict Temml, like TeX, lexes one numeral at a time.
8032
- // Default Temml lexes contiguous numerals into a single <mn> element.
8033
- tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
8034
- "g"
8035
- );
8073
+ this.tokenRegex = new RegExp(tokenRegexString, 'g');
8036
8074
  // Category codes. The lexer only supports comment characters (14) for now.
8037
8075
  // MacroExpander additionally distinguishes active (13).
8038
8076
  this.catcodes = {
@@ -9900,8 +9938,6 @@ var temml = (function () {
9900
9938
 
9901
9939
  /* eslint no-constant-condition:0 */
9902
9940
 
9903
- const numberRegEx = /^\d(?:[\d,.]*\d)?$/; // Keep in sync with numberRegEx in symbolsOrd.js
9904
-
9905
9941
  /**
9906
9942
  * This file contains the parser used to parse out a TeX expression from the
9907
9943
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -10823,15 +10859,6 @@ var temml = (function () {
10823
10859
  };
10824
10860
  }
10825
10861
  symbol = s;
10826
- } else if (!this.strict && numberRegEx.test(text)) {
10827
- // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
10828
- this.consume();
10829
- return {
10830
- type: "textord",
10831
- mode: this.mode,
10832
- loc: SourceLocation.range(nucleus),
10833
- text
10834
- }
10835
10862
  } else if (text.charCodeAt(0) >= 0x80) {
10836
10863
  // no symbol for e.g. ^
10837
10864
  if (this.settings.strict) {
@@ -11071,7 +11098,7 @@ var temml = (function () {
11071
11098
  * https://mit-license.org/
11072
11099
  */
11073
11100
 
11074
- const version = "0.10.3";
11101
+ const version = "0.10.4";
11075
11102
 
11076
11103
  function postProcess(block) {
11077
11104
  const labelMap = {};