ohm-js 16.3.0-dev.unicode-code-point-escape → 16.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ohm-js",
3
- "version": "16.3.0-dev.unicode-code-point-escape",
3
+ "version": "16.3.0",
4
4
  "description": "An object-oriented language for parsing and pattern matching",
5
5
  "repository": "https://github.com/harc/ohm",
6
6
  "keywords": [
@@ -59,33 +59,35 @@
59
59
  "Jason Merrill <jwmerrill@gmail.com>",
60
60
  "Ray Toal <rtoal@lmu.edu>",
61
61
  "Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
62
+ "megabuz <3299889+megabuz@users.noreply.github.com>",
62
63
  "stagas <gstagas@gmail.com>",
63
64
  "Jonathan Edwards <JonathanMEdwards@gmail.com>",
64
65
  "Milan Lajtoš <milan.lajtos@me.com>",
65
66
  "Neil Jewers <njjewers@uwaterloo.ca>",
66
- "megabuz <3299889+megabuz@users.noreply.github.com>",
67
67
  "Mike Niebling <(none)>",
68
- "sfinnie <scott.finnie@gmail.com>",
69
- "Justin Chase <justin.m.chase@gmail.com>",
68
+ "AngryPowman <angrypowman@qq.com>",
69
+ "Patrick Dubroy <patrick@sourcegraph.com>",
70
+ "Leslie Ying <acetophore@users.noreply.github.com>",
70
71
  "Pierre Donias <pierre.donias@gmail.com>",
72
+ "Justin Chase <justin.m.chase@gmail.com>",
71
73
  "Ian Harris <ian@fofgof.xyz>",
72
- "Daniel Tomlinson <DanielTomlinson@me.com>",
73
74
  "Stan Rozenraukh <stan@stanistan.com>",
74
75
  "Stephan Seidt <stephan.seidt@gmail.com>",
75
76
  "Steve Phillips <steve@tryingtobeawesome.com>",
76
77
  "Szymon Kaliski <kaliskiszymon@gmail.com>",
77
78
  "Thomas Nyberg <tomnyberg@gmail.com>",
78
- "Casey Olson <casey.m.olson@gmail.com>",
79
+ "Daniel Tomlinson <DanielTomlinson@me.com>",
79
80
  "Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
80
81
  "Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
81
- "Arthur Carabott <arthurc@gmail.com>",
82
+ "Casey Olson <casey.m.olson@gmail.com>",
82
83
  "abego <ub@abego-software.de>",
83
84
  "acslk <d_vd415@hotmail.com>",
84
85
  "codeZeilen <codeZeilen@users.noreply.github.com>",
85
- "AngryPowman <angrypowman@qq.com>",
86
+ "kassadin <kassadin@foxmail.com>",
87
+ "Arthur Carabott <arthurc@gmail.com>",
88
+ "owch <bowenrainyday@gmail.com>",
86
89
  "Luca Guzzon <luca.guzzon@gmail.com>",
87
- "Leslie Ying <acetophore@users.noreply.github.com>",
88
- "owch <bowenrainyday@gmail.com>"
90
+ "sfinnie <scott.finnie@gmail.com>"
89
91
  ],
90
92
  "dependencies": {},
91
93
  "devDependencies": {
package/src/common.js CHANGED
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
40
40
 
41
41
  exports.assert = function(cond, message) {
42
42
  if (!cond) {
43
- throw new Error(message);
43
+ throw new Error(message || 'Assertion failed');
44
44
  }
45
45
  };
46
46
 
@@ -134,24 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
134
134
  return this.strings.join('');
135
135
  };
136
136
 
137
- // Character escaping and unescaping
138
-
139
- exports.escapeChar = function(c, optDelim) {
140
- const charCode = c.charCodeAt(0);
141
- if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
142
- return c;
143
- } else if (charCode < 128) {
144
- return escapeStringFor[charCode];
145
- } else if (128 <= charCode && charCode < 256) {
146
- return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
147
- } else {
148
- return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
149
- }
150
- };
151
-
152
137
  const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
153
138
 
154
- exports.unescapeChar = function(s) {
139
+ exports.unescapeCodePoint = function(s) {
155
140
  if (s.charAt(0) === '\\') {
156
141
  switch (s.charAt(1)) {
157
142
  case 'b':
package/src/errors.js CHANGED
@@ -4,9 +4,9 @@
4
4
  // Imports
5
5
  // --------------------------------------------------------------------
6
6
 
7
- const pexprs = require('./pexprs-main');
8
-
7
+ const {assert} = require('./common');
9
8
  const Namespace = require('./Namespace');
9
+ const pexprs = require('./pexprs-main');
10
10
 
11
11
  // --------------------------------------------------------------------
12
12
  // Private stuff
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
136
136
  ', got ' +
137
137
  actual +
138
138
  ')',
139
- expr.source
139
+ expr
140
140
  );
141
141
  }
142
142
 
@@ -209,6 +209,18 @@ function multipleSuperSplices(expr) {
209
209
  return createError("'...' can appear at most once in a rule body", expr.source);
210
210
  }
211
211
 
212
+ // Unicode code point escapes
213
+
214
+ function invalidCodePoint(applyWrapper) {
215
+ const node = applyWrapper._node;
216
+ assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
217
+
218
+ // Get an interval that covers all of the hex digits.
219
+ const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
220
+ const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
221
+ return createError(`U+${fullInterval.contents} is not a valid Unicode code point`, fullInterval);
222
+ }
223
+
212
224
  // ----------------- Kleene operators -----------------
213
225
 
214
226
  function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
@@ -314,6 +326,7 @@ module.exports = {
314
326
  inconsistentArity,
315
327
  incorrectArgumentType,
316
328
  intervalSourcesDontMatch,
329
+ invalidCodePoint,
317
330
  invalidConstructorCall,
318
331
  invalidParameter,
319
332
  grammarSyntaxError,
package/src/main.js CHANGED
@@ -226,8 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
226
226
  return c.visit();
227
227
  },
228
228
 
229
- terminalChar(_) {
230
- return common.unescapeChar(this.sourceString);
229
+ escapeChar(c) {
230
+ try {
231
+ return common.unescapeCodePoint(this.sourceString);
232
+ } catch (err) {
233
+ if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
234
+ throw errors.invalidCodePoint(c);
235
+ }
236
+ throw err; // Rethrow
237
+ }
231
238
  },
232
239
 
233
240
  NonemptyListOf(x, _, xs) {
@@ -102,7 +102,8 @@ Ohm {
102
102
  | "\\n" -- lineFeed
103
103
  | "\\r" -- carriageReturn
104
104
  | "\\t" -- tab
105
- | "\\u{" hexDigit+ "}" -- unicodeCodePoint
105
+ | "\\u{" hexDigit hexDigit? hexDigit?
106
+ hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
106
107
  | "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
107
108
  | "\\x" hexDigit hexDigit -- hexEscape
108
109
 
@@ -77,11 +77,12 @@ pexprs.Range.prototype.eval = function(state) {
77
77
  const {inputStream} = state;
78
78
  const origPos = inputStream.pos;
79
79
 
80
- const cp =
81
- this.from.length > 1 || this.to.length > 1 ?
82
- inputStream.nextCodePoint() :
83
- inputStream.nextCharCode();
80
+ // A range can operate in one of two modes: matching a single, 16-bit _code unit_,
81
+ // or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
82
+ const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
84
83
 
84
+ // Always compare by code point value to get the correct result in all scenarios.
85
+ // Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
85
86
  if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
86
87
  state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
87
88
  return true;
@@ -53,6 +53,9 @@ class Range extends PExpr {
53
53
  super();
54
54
  this.from = from;
55
55
  this.to = to;
56
+ // If either `from` or `to` is made up of multiple code units, then
57
+ // the range should consume a full code point, not a single code unit.
58
+ this.matchCodePoint = from.length > 1 || to.length > 1;
56
59
  }
57
60
  }
58
61