npm - temml - Versions diffs - 0.10.3 → 0.10.4 - Mend

temml 0.10.3 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/temml.cjs +49 -22
package/dist/temml.js +49 -22
package/dist/temml.min.js +1 -1
package/dist/temml.mjs +49 -22
package/dist/temmlPostProcess.js +1 -1
package/package.json +8 -2
package/src/Lexer.js +2 -8
package/src/Parser.js +0 -11
package/src/buildMathML.js +44 -0
package/src/functions/symbolsOrd.js +1 -1
package/src/postProcess.js +1 -1

package/dist/temml.cjs CHANGED Viewed

@@ -1966,6 +1966,48 @@ const consolidateText = mrow => {
   return mtext
 };
+const numberRegEx$1 = /^[0-9]$/;
+const isCommaOrDot = node => {
+  return (node.type === "atom" && node.text === ",") ||
+         (node.type === "textord" && node.text === ".")
+};
+const consolidateNumbers = expression => {
+  // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
+  // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
+  if (expression.length < 2) { return }
+  const nums = [];
+  let inNum = false;
+  // Find adjacent numerals
+  for (let i = 0; i < expression.length; i++) {
+    const node = expression[i];
+    if (node.type === "textord" && numberRegEx$1.test(node.text)) {
+      if (!inNum) { nums.push({ start: i }); }
+      inNum = true;
+    } else {
+      if (inNum) { nums[nums.length - 1].end = i - 1; }
+      inNum = false;
+    }
+  }
+  if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
+  // Determine if numeral groups are separated by a comma or dot.
+  for (let i = nums.length - 1; i > 0; i--) {
+    if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
+      // Merge the two groups.
+      nums[i - 1].end = nums[i].end;
+      nums.splice(i, 1);
+    }
+  }
+  // Consolidate the number nodes
+  for (let i = nums.length - 1; i >= 0; i--) {
+    for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
+      expression[nums[i].start].text += expression[j].text;
+    }
+    expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
+  }
+};
 /**
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
  * unless the array has length 1.  Always returns a single node.
@@ -2001,6 +2043,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
     return [group];
   }
+  consolidateNumbers(expression);
   const groups = [];
   for (let i = 0; i < expression.length; i++) {
     const group = buildGroup$1(expression[i], style);
@@ -7643,7 +7687,7 @@ const smallCaps = Object.freeze({
 // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
 // src/symbols.js.
-const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in Parser.js
+const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
 const latinRegEx = /[A-Ba-z]/;
 const italicNumber = (text, variant, tag) => {
@@ -7697,7 +7741,7 @@ defineFunctionBuilders({
     const variant = getVariant(group, style) || "normal";
     let node;
-    if (numberRegEx$1.test(group.text)) {
+    if (numberRegEx.test(group.text)) {
       const tag = group.mode === "text" ? "mtext" : "mn";
       if (variant === "italic" || variant === "bold-italic") {
         return italicNumber(text, variant, tag)
@@ -8010,8 +8054,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
 const tokenRegexString =
   `(${spaceRegexString}+)|` + // whitespace
   `${controlSpaceRegexString}|` +  // whitespace
-  "(number" +         // numbers (in non-strict mode)
-  "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
+  "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
   `${combiningDiacriticalMarkString}*` + // ...plus accents
   "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
   `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8026,12 +8069,7 @@ class Lexer {
     // Separate accents from characters
     this.input = input;
     this.settings = settings;
-    this.tokenRegex = new RegExp(
-      // Strict Temml, like TeX, lexes one numeral at a time.
-      // Default Temml lexes contiguous numerals into a single <mn> element.
-      tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
-      "g"
-    );
+    this.tokenRegex = new RegExp(tokenRegexString, 'g');
     // Category codes. The lexer only supports comment characters (14) for now.
     // MacroExpander additionally distinguishes active (13).
     this.catcodes = {
@@ -11799,8 +11837,6 @@ var unicodeSymbols = {
 /* eslint no-constant-condition:0 */
-const numberRegEx = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in symbolsOrd.js
 /**
  * This file contains the parser used to parse out a TeX expression from the
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12722,15 +12758,6 @@ class Parser {
         };
       }
       symbol = s;
-    } else if (!this.strict && numberRegEx.test(text)) {
-      // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
-      this.consume();
-      return {
-        type: "textord",
-        mode: this.mode,
-        loc: SourceLocation.range(nucleus),
-        text
-      }
     } else if (text.charCodeAt(0) >= 0x80) {
       // no symbol for e.g. ^
       if (this.settings.strict) {
@@ -12970,7 +12997,7 @@ class Style {
  * https://mit-license.org/
  */
-const version = "0.10.3";
+const version = "0.10.4";
 function postProcess(block) {
   const labelMap = {};

package/dist/temml.js CHANGED Viewed

@@ -1967,6 +1967,48 @@ var temml = (function () {
     return mtext
   };
+  const numberRegEx$1 = /^[0-9]$/;
+  const isCommaOrDot = node => {
+    return (node.type === "atom" && node.text === ",") ||
+           (node.type === "textord" && node.text === ".")
+  };
+  const consolidateNumbers = expression => {
+    // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
+    // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
+    if (expression.length < 2) { return }
+    const nums = [];
+    let inNum = false;
+    // Find adjacent numerals
+    for (let i = 0; i < expression.length; i++) {
+      const node = expression[i];
+      if (node.type === "textord" && numberRegEx$1.test(node.text)) {
+        if (!inNum) { nums.push({ start: i }); }
+        inNum = true;
+      } else {
+        if (inNum) { nums[nums.length - 1].end = i - 1; }
+        inNum = false;
+      }
+    }
+    if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
+    // Determine if numeral groups are separated by a comma or dot.
+    for (let i = nums.length - 1; i > 0; i--) {
+      if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
+        // Merge the two groups.
+        nums[i - 1].end = nums[i].end;
+        nums.splice(i, 1);
+      }
+    }
+    // Consolidate the number nodes
+    for (let i = nums.length - 1; i >= 0; i--) {
+      for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
+        expression[nums[i].start].text += expression[j].text;
+      }
+      expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
+    }
+  };
   /**
    * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
    * unless the array has length 1.  Always returns a single node.
@@ -2002,6 +2044,8 @@ var temml = (function () {
       return [group];
     }
+    consolidateNumbers(expression);
     const groups = [];
     for (let i = 0; i < expression.length; i++) {
       const group = buildGroup$1(expression[i], style);
@@ -7644,7 +7688,7 @@ var temml = (function () {
   // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
   // src/symbols.js.
-  const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in Parser.js
+  const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
   const latinRegEx = /[A-Ba-z]/;
   const italicNumber = (text, variant, tag) => {
@@ -7698,7 +7742,7 @@ var temml = (function () {
       const variant = getVariant(group, style) || "normal";
       let node;
-      if (numberRegEx$1.test(group.text)) {
+      if (numberRegEx.test(group.text)) {
         const tag = group.mode === "text" ? "mtext" : "mn";
         if (variant === "italic" || variant === "bold-italic") {
           return italicNumber(text, variant, tag)
@@ -8011,8 +8055,7 @@ var temml = (function () {
   const tokenRegexString =
     `(${spaceRegexString}+)|` + // whitespace
     `${controlSpaceRegexString}|` +  // whitespace
-    "(number" +         // numbers (in non-strict mode)
-    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
+    "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
     `${combiningDiacriticalMarkString}*` + // ...plus accents
     "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
     `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8027,12 +8070,7 @@ var temml = (function () {
       // Separate accents from characters
       this.input = input;
       this.settings = settings;
-      this.tokenRegex = new RegExp(
-        // Strict Temml, like TeX, lexes one numeral at a time.
-        // Default Temml lexes contiguous numerals into a single <mn> element.
-        tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
-        "g"
-      );
+      this.tokenRegex = new RegExp(tokenRegexString, 'g');
       // Category codes. The lexer only supports comment characters (14) for now.
       // MacroExpander additionally distinguishes active (13).
       this.catcodes = {
@@ -9900,8 +9938,6 @@ var temml = (function () {
   /* eslint no-constant-condition:0 */
-  const numberRegEx = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in symbolsOrd.js
   /**
    * This file contains the parser used to parse out a TeX expression from the
    * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -10823,15 +10859,6 @@ var temml = (function () {
           };
         }
         symbol = s;
-      } else if (!this.strict && numberRegEx.test(text)) {
-        // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
-        this.consume();
-        return {
-          type: "textord",
-          mode: this.mode,
-          loc: SourceLocation.range(nucleus),
-          text
-        }
       } else if (text.charCodeAt(0) >= 0x80) {
         // no symbol for e.g. ^
         if (this.settings.strict) {
@@ -11071,7 +11098,7 @@ var temml = (function () {
    * https://mit-license.org/
    */
-  const version = "0.10.3";
+  const version = "0.10.4";
   function postProcess(block) {
     const labelMap = {};