npm - ohm-js - Versions diffs - 16.3.0-dev.unicode-code-point-escape → 16.3.0 - Mend

ohm-js 16.3.0-dev.unicode-code-point-escape → 16.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ohm-js",
-  "version": "16.3.0-dev.unicode-code-point-escape",
+  "version": "16.3.0",
   "description": "An object-oriented language for parsing and pattern matching",
   "repository": "https://github.com/harc/ohm",
   "keywords": [
@@ -59,33 +59,35 @@
     "Jason Merrill <jwmerrill@gmail.com>",
     "Ray Toal <rtoal@lmu.edu>",
     "Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
+    "megabuz <3299889+megabuz@users.noreply.github.com>",
     "stagas <gstagas@gmail.com>",
     "Jonathan Edwards <JonathanMEdwards@gmail.com>",
     "Milan Lajtoš <milan.lajtos@me.com>",
     "Neil Jewers <njjewers@uwaterloo.ca>",
-    "megabuz <3299889+megabuz@users.noreply.github.com>",
     "Mike Niebling <(none)>",
-    "sfinnie <scott.finnie@gmail.com>",
-    "Justin Chase <justin.m.chase@gmail.com>",
+    "AngryPowman <angrypowman@qq.com>",
+    "Patrick Dubroy <patrick@sourcegraph.com>",
+    "Leslie Ying <acetophore@users.noreply.github.com>",
     "Pierre Donias <pierre.donias@gmail.com>",
+    "Justin Chase <justin.m.chase@gmail.com>",
     "Ian Harris <ian@fofgof.xyz>",
-    "Daniel Tomlinson <DanielTomlinson@me.com>",
     "Stan Rozenraukh <stan@stanistan.com>",
     "Stephan Seidt <stephan.seidt@gmail.com>",
     "Steve Phillips <steve@tryingtobeawesome.com>",
     "Szymon Kaliski <kaliskiszymon@gmail.com>",
     "Thomas Nyberg <tomnyberg@gmail.com>",
-    "Casey Olson <casey.m.olson@gmail.com>",
+    "Daniel Tomlinson <DanielTomlinson@me.com>",
     "Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
     "Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
-    "Arthur Carabott <arthurc@gmail.com>",
+    "Casey Olson <casey.m.olson@gmail.com>",
     "abego <ub@abego-software.de>",
     "acslk <d_vd415@hotmail.com>",
     "codeZeilen <codeZeilen@users.noreply.github.com>",
-    "AngryPowman <angrypowman@qq.com>",
+    "kassadin <kassadin@foxmail.com>",
+    "Arthur Carabott <arthurc@gmail.com>",
+    "owch <bowenrainyday@gmail.com>",
     "Luca Guzzon <luca.guzzon@gmail.com>",
-    "Leslie Ying <acetophore@users.noreply.github.com>",
-    "owch <bowenrainyday@gmail.com>"
+    "sfinnie <scott.finnie@gmail.com>"
   ],
   "dependencies": {},
   "devDependencies": {

package/src/common.js CHANGED Viewed

@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
 exports.assert = function(cond, message) {
   if (!cond) {
-    throw new Error(message);
+    throw new Error(message || 'Assertion failed');
   }
 };
@@ -134,24 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
   return this.strings.join('');
 };
-// Character escaping and unescaping
-exports.escapeChar = function(c, optDelim) {
-  const charCode = c.charCodeAt(0);
-  if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
-    return c;
-  } else if (charCode < 128) {
-    return escapeStringFor[charCode];
-  } else if (128 <= charCode && charCode < 256) {
-    return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
-  } else {
-    return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
-  }
-};
 const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
-exports.unescapeChar = function(s) {
+exports.unescapeCodePoint = function(s) {
   if (s.charAt(0) === '\\') {
     switch (s.charAt(1)) {
       case 'b':

package/src/errors.js CHANGED Viewed

@@ -4,9 +4,9 @@
 // Imports
 // --------------------------------------------------------------------
-const pexprs = require('./pexprs-main');
+const {assert} = require('./common');
 const Namespace = require('./Namespace');
+const pexprs = require('./pexprs-main');
 // --------------------------------------------------------------------
 // Private stuff
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
       ', got ' +
       actual +
       ')',
-      expr.source
+      expr
   );
 }
@@ -209,6 +209,18 @@ function multipleSuperSplices(expr) {
   return createError("'...' can appear at most once in a rule body", expr.source);
 }
+// Unicode code point escapes
+function invalidCodePoint(applyWrapper) {
+  const node = applyWrapper._node;
+  assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
+  // Get an interval that covers all of the hex digits.
+  const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
+  const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
+  return createError(`U+${fullInterval.contents} is not a valid Unicode code point`, fullInterval);
+}
 // ----------------- Kleene operators -----------------
 function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
@@ -314,6 +326,7 @@ module.exports = {
   inconsistentArity,
   incorrectArgumentType,
   intervalSourcesDontMatch,
+  invalidCodePoint,
   invalidConstructorCall,
   invalidParameter,
   grammarSyntaxError,

package/src/main.js CHANGED Viewed

@@ -226,8 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
       return c.visit();
     },
-    terminalChar(_) {
-      return common.unescapeChar(this.sourceString);
+    escapeChar(c) {
+      try {
+        return common.unescapeCodePoint(this.sourceString);
+      } catch (err) {
+        if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
+          throw errors.invalidCodePoint(c);
+        }
+        throw err; // Rethrow
+      }
     },
     NonemptyListOf(x, _, xs) {

package/src/ohm-grammar.ohm CHANGED Viewed

@@ -102,7 +102,8 @@ Ohm {
     | "\\n"                                      -- lineFeed
     | "\\r"                                      -- carriageReturn
     | "\\t"                                      -- tab
-    | "\\u{" hexDigit+ "}"                       -- unicodeCodePoint
+    | "\\u{" hexDigit hexDigit? hexDigit?
+             hexDigit? hexDigit? hexDigit? "}"   -- unicodeCodePoint
     | "\\u" hexDigit hexDigit hexDigit hexDigit  -- unicodeEscape
     | "\\x" hexDigit hexDigit                    -- hexEscape

package/src/pexprs-eval.js CHANGED Viewed

@@ -77,11 +77,12 @@ pexprs.Range.prototype.eval = function(state) {
   const {inputStream} = state;
   const origPos = inputStream.pos;
-  const cp =
-    this.from.length > 1 || this.to.length > 1 ?
-      inputStream.nextCodePoint() :
-      inputStream.nextCharCode();
+  // A range can operate in one of two modes: matching a single, 16-bit _code unit_,
+  // or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
+  const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
+  // Always compare by code point value to get the correct result in all scenarios.
+  // Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
   if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
     state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
     return true;

package/src/pexprs-main.js CHANGED Viewed

@@ -53,6 +53,9 @@ class Range extends PExpr {
     super();
     this.from = from;
     this.to = to;
+    // If either `from` or `to` is made up of multiple code units, then
+    // the range should consume a full code point, not a single code unit.
+    this.matchCodePoint = from.length > 1 || to.length > 1;
   }
 }