npm - temml - Versions diffs - 0.10.3 → 0.10.4 - Mend

temml 0.10.3 → 0.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/temml.cjs +49 -22
package/dist/temml.js +49 -22
package/dist/temml.min.js +1 -1
package/dist/temml.mjs +49 -22
package/dist/temmlPostProcess.js +1 -1
package/package.json +8 -2
package/src/Lexer.js +2 -8
package/src/Parser.js +0 -11
package/src/buildMathML.js +44 -0
package/src/functions/symbolsOrd.js +1 -1
package/src/postProcess.js +1 -1

package/dist/temml.mjs CHANGED Viewed

@@ -1964,6 +1964,48 @@ const consolidateText = mrow => {
   return mtext
 };
+const numberRegEx$1 = /^[0-9]$/;
+const isCommaOrDot = node => {
+  return (node.type === "atom" && node.text === ",") ||
+         (node.type === "textord" && node.text === ".")
+};
+const consolidateNumbers = expression => {
+  // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
+  // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
+  if (expression.length < 2) { return }
+  const nums = [];
+  let inNum = false;
+  // Find adjacent numerals
+  for (let i = 0; i < expression.length; i++) {
+    const node = expression[i];
+    if (node.type === "textord" && numberRegEx$1.test(node.text)) {
+      if (!inNum) { nums.push({ start: i }); }
+      inNum = true;
+    } else {
+      if (inNum) { nums[nums.length - 1].end = i - 1; }
+      inNum = false;
+    }
+  }
+  if (inNum) { nums[nums.length - 1].end = expression.length - 1; }
+  // Determine if numeral groups are separated by a comma or dot.
+  for (let i = nums.length - 1; i > 0; i--) {
+    if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
+      // Merge the two groups.
+      nums[i - 1].end = nums[i].end;
+      nums.splice(i, 1);
+    }
+  }
+  // Consolidate the number nodes
+  for (let i = nums.length - 1; i >= 0; i--) {
+    for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
+      expression[nums[i].start].text += expression[j].text;
+    }
+    expression.splice(nums[i].start + 1, nums[i].end - nums[i].start);
+  }
+};
 /**
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
  * unless the array has length 1.  Always returns a single node.
@@ -1999,6 +2041,8 @@ const buildExpression = function(expression, style, isOrdgroup) {
     return [group];
   }
+  consolidateNumbers(expression);
   const groups = [];
   for (let i = 0; i < expression.length; i++) {
     const group = buildGroup$1(expression[i], style);
@@ -7641,7 +7685,7 @@ const smallCaps = Object.freeze({
 // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
 // src/symbols.js.
-const numberRegEx$1 = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in Parser.js
+const numberRegEx = /^\d(?:[\d,.]*\d)?$/;
 const latinRegEx = /[A-Ba-z]/;
 const italicNumber = (text, variant, tag) => {
@@ -7695,7 +7739,7 @@ defineFunctionBuilders({
     const variant = getVariant(group, style) || "normal";
     let node;
-    if (numberRegEx$1.test(group.text)) {
+    if (numberRegEx.test(group.text)) {
       const tag = group.mode === "text" ? "mtext" : "mn";
       if (variant === "italic" || variant === "bold-italic") {
         return italicNumber(text, variant, tag)
@@ -8008,8 +8052,7 @@ const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriticalMark
 const tokenRegexString =
   `(${spaceRegexString}+)|` + // whitespace
   `${controlSpaceRegexString}|` +  // whitespace
-  "(number" +         // numbers (in non-strict mode)
-  "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
+  "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
   `${combiningDiacriticalMarkString}*` + // ...plus accents
   "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
   `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -8024,12 +8067,7 @@ class Lexer {
     // Separate accents from characters
     this.input = input;
     this.settings = settings;
-    this.tokenRegex = new RegExp(
-      // Strict Temml, like TeX, lexes one numeral at a time.
-      // Default Temml lexes contiguous numerals into a single <mn> element.
-      tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
-      "g"
-    );
+    this.tokenRegex = new RegExp(tokenRegexString, 'g');
     // Category codes. The lexer only supports comment characters (14) for now.
     // MacroExpander additionally distinguishes active (13).
     this.catcodes = {
@@ -11797,8 +11835,6 @@ var unicodeSymbols = {
 /* eslint no-constant-condition:0 */
-const numberRegEx = /^\d(?:[\d,.]*\d)?$/;  // Keep in sync with numberRegEx in symbolsOrd.js
 /**
  * This file contains the parser used to parse out a TeX expression from the
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -12720,15 +12756,6 @@ class Parser {
         };
       }
       symbol = s;
-    } else if (!this.strict && numberRegEx.test(text)) {
-      // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
-      this.consume();
-      return {
-        type: "textord",
-        mode: this.mode,
-        loc: SourceLocation.range(nucleus),
-        text
-      }
     } else if (text.charCodeAt(0) >= 0x80) {
       // no symbol for e.g. ^
       if (this.settings.strict) {
@@ -12968,7 +12995,7 @@ class Style {
  * https://mit-license.org/
  */
-const version = "0.10.3";
+const version = "0.10.4";
 function postProcess(block) {
   const labelMap = {};

package/dist/temmlPostProcess.js CHANGED Viewed

@@ -14,7 +14,7 @@
    * https://mit-license.org/
    */
-  const version = "0.10.3";
+  const version = "0.10.4";
   function postProcess(block) {
     const labelMap = {};

package/package.json CHANGED Viewed

@@ -1,8 +1,14 @@
 {
   "name": "temml",
-  "version": "0.10.3",
+  "version": "0.10.4",
   "description": "TeX to MathML conversion in JavaScript.",
   "main": "dist/temml.js",
+  "exports": {
+    ".": {
+      "require": "./dist/temml.cjs"
+    },
+    "./*": "./*"
+  },
   "homepage": "https://temml.org",
   "repository": {
     "type": "git",
@@ -24,7 +30,7 @@
   },
   "scripts": {
     "lint": "eslint temml.js src",
-    "unit-test": "node -r esm ./test/unit-test.js",
+    "unit-test": "node ./test/unit-test.cjs",
     "visual-test": "node utils/buildTests.js",
     "test": "yarn lint && node utils/buildTests.js && yarn unit-test",
     "minify": "terser test/temml.js -o site/assets/temml.min.js -c -m && terser contrib/mhchem/mhchem.js -o site/assets/mhchem.min.js -c -m",

package/src/Lexer.js CHANGED Viewed

@@ -49,8 +49,7 @@ export const combiningDiacriticalMarksEndRegex = new RegExp(`${combiningDiacriti
 const tokenRegexString =
   `(${spaceRegexString}+)|` + // whitespace
   `${controlSpaceRegexString}|` +  // whitespace
-  "(number" +         // numbers (in non-strict mode)
-  "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
+  "([!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
   `${combiningDiacriticalMarkString}*` + // ...plus accents
   "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
   `${combiningDiacriticalMarkString}*` + // ...plus accents
@@ -65,12 +64,7 @@ export default class Lexer {
     // Separate accents from characters
     this.input = input;
     this.settings = settings;
-    this.tokenRegex = new RegExp(
-      // Strict Temml, like TeX, lexes one numeral at a time.
-      // Default Temml lexes contiguous numerals into a single <mn> element.
-      tokenRegexString.replace("number|", settings.strict ? "" : "\\d(?:[\\d,.]*\\d)?|"),
-      "g"
-    );
+    this.tokenRegex = new RegExp(tokenRegexString, 'g');
     // Category codes. The lexer only supports comment characters (14) for now.
     // MacroExpander additionally distinguishes active (13).
     this.catcodes = {

package/src/Parser.js CHANGED Viewed

@@ -16,8 +16,6 @@ import { isDelimiter } from "./functions/delimsizing"
 import unicodeAccents from /*preval*/ "./unicodeAccents";
 import unicodeSymbols from /*preval*/ "./unicodeSymbols";
-const numberRegEx = /^\d(?:[\d,.]*\d)?$/  // Keep in sync with numberRegEx in symbolsOrd.js
 /**
  * This file contains the parser used to parse out a TeX expression from the
  * input. Since TeX isn't context-free, standard parsers don't work particularly
@@ -939,15 +937,6 @@ export default class Parser {
         };
       }
       symbol = s;
-    } else if (!this.strict && numberRegEx.test(text)) {
-      // A number. Wrap in a <mn> if in math mode; <mtext> otherwise.
-      this.consume()
-      return {
-        type: "textord",
-        mode: this.mode,
-        loc: SourceLocation.range(nucleus),
-        text
-      }
     } else if (text.charCodeAt(0) >= 0x80) {
       // no symbol for e.g. ^
       if (this.settings.strict) {

package/src/buildMathML.js CHANGED Viewed

@@ -75,6 +75,48 @@ export const consolidateText = mrow => {
   return mtext
 }
+const numberRegEx = /^[0-9]$/
+const isCommaOrDot = node => {
+  return (node.type === "atom" && node.text === ",") ||
+         (node.type === "textord" && node.text === ".")
+}
+const consolidateNumbers = expression => {
+  // Consolidate adjacent numbers. We want to return <mn>1,506.3</mn>,
+  // not <mn>1</mn><mo>,</mo><mn>5</mn><mn>0</mn><mn>6</mn><mi>.</mi><mn>3</mn>
+  if (expression.length < 2) { return }
+  const nums = [];
+  let inNum = false
+  // Find adjacent numerals
+  for (let i = 0; i < expression.length; i++) {
+    const node = expression[i];
+    if (node.type === "textord" && numberRegEx.test(node.text)) {
+      if (!inNum) { nums.push({ start: i }) }
+      inNum = true
+    } else {
+      if (inNum) { nums[nums.length - 1].end = i - 1 }
+      inNum = false
+    }
+  }
+  if (inNum) { nums[nums.length - 1].end = expression.length - 1 }
+  // Determine if numeral groups are separated by a comma or dot.
+  for (let i = nums.length - 1; i > 0; i--) {
+    if (nums[i - 1].end === nums[i].start - 2 && isCommaOrDot(expression[nums[i].start - 1])) {
+      // Merge the two groups.
+      nums[i - 1].end = nums[i].end
+      nums.splice(i, 1)
+    }
+  }
+  // Consolidate the number nodes
+  for (let i = nums.length - 1; i >= 0; i--) {
+    for (let j = nums[i].start + 1; j <= nums[i].end; j++) {
+      expression[nums[i].start].text += expression[j].text
+    }
+    expression.splice(nums[i].start + 1, nums[i].end - nums[i].start)
+  }
+}
 /**
  * Wrap the given array of nodes in an <mrow> node if needed, i.e.,
  * unless the array has length 1.  Always returns a single node.
@@ -110,6 +152,8 @@ export const buildExpression = function(expression, style, isOrdgroup) {
     return [group];
   }
+  consolidateNumbers(expression)
   const groups = [];
   for (let i = 0; i < expression.length; i++) {
     const group = buildGroup(expression[i], style);

package/src/functions/symbolsOrd.js CHANGED Viewed

@@ -7,7 +7,7 @@ import * as mml from "../buildMathML"
 // "mathord" and "textord" ParseNodes created in Parser.js from symbol Groups in
 // src/symbols.js.
-const numberRegEx = /^\d(?:[\d,.]*\d)?$/  // Keep in sync with numberRegEx in Parser.js
+const numberRegEx = /^\d(?:[\d,.]*\d)?$/
 const latinRegEx = /[A-Ba-z]/
 const italicNumber = (text, variant, tag) => {

package/src/postProcess.js CHANGED Viewed

@@ -8,7 +8,7 @@
  * https://mit-license.org/
  */
-export const version = "0.10.3";
+export const version = "0.10.4";
 export function postProcess(block) {
   const labelMap = {}