ohm-js 16.2.0 → 16.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,5 @@
1
1
  'use strict';
2
2
 
3
- // --------------------------------------------------------------------
4
- // Imports
5
- // --------------------------------------------------------------------
6
-
7
- const pexprs = require('../src/pexprs');
8
- const MatchResult = require('../src/MatchResult');
9
- const Grammar = require('../src/Grammar');
10
-
11
3
  // --------------------------------------------------------------------
12
4
  // Operations
13
5
  // --------------------------------------------------------------------
@@ -23,11 +15,6 @@ const defaultOperation = {
23
15
 
24
16
  // without customization
25
17
  if (!Object.prototype.hasOwnProperty.call(mapping, ctorName)) {
26
- // intermediate node
27
- if (this._node instanceof pexprs.Alt || this._node instanceof pexprs.Apply) {
28
- return children[0].toAST(mapping);
29
- }
30
-
31
18
  // lexical rule
32
19
  if (this.isLexical()) {
33
20
  return this.sourceString;
@@ -111,8 +98,8 @@ const defaultOperation = {
111
98
  // The optional `mapping` parameter can be used to customize how the nodes of the CST
112
99
  // are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping).
113
100
  function toAST(res, mapping) {
114
- if (!(res instanceof MatchResult) || res.failed()) {
115
- throw new Error('toAST() expects a succesfull MatchResult as first parameter');
101
+ if (typeof res.failed !== 'function' || res.failed()) {
102
+ throw new Error('toAST() expects a succesful MatchResult as first parameter');
116
103
  }
117
104
 
118
105
  mapping = Object.assign({}, mapping);
@@ -130,7 +117,7 @@ function toAST(res, mapping) {
130
117
 
131
118
  // Returns a semantics containg the toAST(mapping) operation for the given grammar g.
132
119
  function semanticsForToAST(g) {
133
- if (!(g instanceof Grammar)) {
120
+ if (typeof g.createSemantics !== 'function') {
134
121
  throw new Error('semanticsToAST() expects a Grammar as parameter');
135
122
  }
136
123
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ohm-js",
3
- "version": "16.2.0",
3
+ "version": "16.3.1",
4
4
  "description": "An object-oriented language for parsing and pattern matching",
5
5
  "repository": "https://github.com/harc/ohm",
6
6
  "keywords": [
@@ -15,7 +15,7 @@
15
15
  "rapid",
16
16
  "prototyping"
17
17
  ],
18
- "homepage": "https://ohmlang.github.io/",
18
+ "homepage": "https://ohmjs.org",
19
19
  "bugs": "https://github.com/harc/ohm/issues",
20
20
  "main": "index.js",
21
21
  "module": "dist/ohm.esm.js",
@@ -59,33 +59,35 @@
59
59
  "Jason Merrill <jwmerrill@gmail.com>",
60
60
  "Ray Toal <rtoal@lmu.edu>",
61
61
  "Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
62
+ "megabuz <3299889+megabuz@users.noreply.github.com>",
62
63
  "stagas <gstagas@gmail.com>",
63
64
  "Jonathan Edwards <JonathanMEdwards@gmail.com>",
64
65
  "Milan Lajtoš <milan.lajtos@me.com>",
65
66
  "Neil Jewers <njjewers@uwaterloo.ca>",
66
- "megabuz <3299889+megabuz@users.noreply.github.com>",
67
67
  "Mike Niebling <(none)>",
68
- "sfinnie <scott.finnie@gmail.com>",
69
- "Justin Chase <justin.m.chase@gmail.com>",
68
+ "AngryPowman <angrypowman@qq.com>",
69
+ "Patrick Dubroy <patrick@sourcegraph.com>",
70
+ "Leslie Ying <acetophore@users.noreply.github.com>",
70
71
  "Pierre Donias <pierre.donias@gmail.com>",
72
+ "Justin Chase <justin.m.chase@gmail.com>",
71
73
  "Ian Harris <ian@fofgof.xyz>",
72
- "Daniel Tomlinson <DanielTomlinson@me.com>",
73
74
  "Stan Rozenraukh <stan@stanistan.com>",
74
75
  "Stephan Seidt <stephan.seidt@gmail.com>",
75
76
  "Steve Phillips <steve@tryingtobeawesome.com>",
76
77
  "Szymon Kaliski <kaliskiszymon@gmail.com>",
77
78
  "Thomas Nyberg <tomnyberg@gmail.com>",
78
- "Casey Olson <casey.m.olson@gmail.com>",
79
+ "Daniel Tomlinson <DanielTomlinson@me.com>",
79
80
  "Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
80
81
  "Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
81
- "Arthur Carabott <arthurc@gmail.com>",
82
+ "Casey Olson <casey.m.olson@gmail.com>",
82
83
  "abego <ub@abego-software.de>",
83
84
  "acslk <d_vd415@hotmail.com>",
84
85
  "codeZeilen <codeZeilen@users.noreply.github.com>",
85
- "AngryPowman <angrypowman@qq.com>",
86
+ "kassadin <kassadin@foxmail.com>",
87
+ "Arthur Carabott <arthurc@gmail.com>",
88
+ "owch <bowenrainyday@gmail.com>",
86
89
  "Luca Guzzon <luca.guzzon@gmail.com>",
87
- "Leslie Ying <acetophore@users.noreply.github.com>",
88
- "owch <bowenrainyday@gmail.com>"
90
+ "sfinnie <scott.finnie@gmail.com>"
89
91
  ],
90
92
  "dependencies": {},
91
93
  "devDependencies": {
@@ -29,6 +29,21 @@ InputStream.prototype = {
29
29
  return ans;
30
30
  },
31
31
 
32
+ nextCharCode() {
33
+ const nextChar = this.next();
34
+ return nextChar && nextChar.charCodeAt(0);
35
+ },
36
+
37
+ nextCodePoint() {
38
+ const cp = this.source.slice(this.pos++).codePointAt(0);
39
+ // If the code point is beyond plane 0, it takes up two characters.
40
+ if (cp > 0xffff) {
41
+ this.pos += 1;
42
+ }
43
+ this.examinedLength = Math.max(this.examinedLength, this.pos);
44
+ return cp;
45
+ },
46
+
32
47
  matchString(s, optIgnoreCase) {
33
48
  let idx;
34
49
  if (optIgnoreCase) {
package/src/common.js CHANGED
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
40
40
 
41
41
  exports.assert = function(cond, message) {
42
42
  if (!cond) {
43
- throw new Error(message);
43
+ throw new Error(message || 'Assertion failed');
44
44
  }
45
45
  };
46
46
 
@@ -134,22 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
134
134
  return this.strings.join('');
135
135
  };
136
136
 
137
- // Character escaping and unescaping
137
+ const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
138
138
 
139
- exports.escapeChar = function(c, optDelim) {
140
- const charCode = c.charCodeAt(0);
141
- if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
142
- return c;
143
- } else if (charCode < 128) {
144
- return escapeStringFor[charCode];
145
- } else if (128 <= charCode && charCode < 256) {
146
- return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
147
- } else {
148
- return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
149
- }
150
- };
151
-
152
- exports.unescapeChar = function(s) {
139
+ exports.unescapeCodePoint = function(s) {
153
140
  if (s.charAt(0) === '\\') {
154
141
  switch (s.charAt(1)) {
155
142
  case 'b':
@@ -165,9 +152,11 @@ exports.unescapeChar = function(s) {
165
152
  case 'v':
166
153
  return '\v';
167
154
  case 'x':
168
- return String.fromCharCode(parseInt(s.substring(2, 4), 16));
155
+ return escapeUnicode(s.slice(2, 4));
169
156
  case 'u':
170
- return String.fromCharCode(parseInt(s.substring(2, 6), 16));
157
+ return s.charAt(2) === '{' ?
158
+ escapeUnicode(s.slice(3, -1)) :
159
+ escapeUnicode(s.slice(2, 6));
171
160
  default:
172
161
  return s.charAt(1);
173
162
  }
package/src/errors.js CHANGED
@@ -4,9 +4,9 @@
4
4
  // Imports
5
5
  // --------------------------------------------------------------------
6
6
 
7
- const pexprs = require('./pexprs-main');
8
-
7
+ const {assert} = require('./common');
9
8
  const Namespace = require('./Namespace');
9
+ const pexprs = require('./pexprs-main');
10
10
 
11
11
  // --------------------------------------------------------------------
12
12
  // Private stuff
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
136
136
  ', got ' +
137
137
  actual +
138
138
  ')',
139
- expr.source
139
+ expr
140
140
  );
141
141
  }
142
142
 
@@ -209,6 +209,21 @@ function multipleSuperSplices(expr) {
209
209
  return createError("'...' can appear at most once in a rule body", expr.source);
210
210
  }
211
211
 
212
+ // Unicode code point escapes
213
+
214
+ function invalidCodePoint(applyWrapper) {
215
+ const node = applyWrapper._node;
216
+ assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
217
+
218
+ // Get an interval that covers all of the hex digits.
219
+ const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
220
+ const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
221
+ return createError(
222
+ `U+${fullInterval.contents} is not a valid Unicode code point`,
223
+ fullInterval
224
+ );
225
+ }
226
+
212
227
  // ----------------- Kleene operators -----------------
213
228
 
214
229
  function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
@@ -314,6 +329,7 @@ module.exports = {
314
329
  inconsistentArity,
315
330
  incorrectArgumentType,
316
331
  intervalSourcesDontMatch,
332
+ invalidCodePoint,
317
333
  invalidConstructorCall,
318
334
  invalidParameter,
319
335
  grammarSyntaxError,
package/src/main.js CHANGED
@@ -226,12 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
226
226
  return c.visit();
227
227
  },
228
228
 
229
- terminalChar(_) {
230
- return common.unescapeChar(this.sourceString);
231
- },
232
-
233
- escapeChar(_) {
234
- return this.sourceString;
229
+ escapeChar(c) {
230
+ try {
231
+ return common.unescapeCodePoint(this.sourceString);
232
+ } catch (err) {
233
+ if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
234
+ throw errors.invalidCodePoint(c);
235
+ }
236
+ throw err; // Rethrow
237
+ }
235
238
  },
236
239
 
237
240
  NonemptyListOf(x, _, xs) {
@@ -92,7 +92,7 @@ Ohm {
92
92
 
93
93
  terminalChar
94
94
  = escapeChar
95
- | ~"\\" ~"\"" ~"\n" any
95
+ | ~"\\" ~"\"" ~"\n" "\u{0}".."\u{10FFFF}"
96
96
 
97
97
  escapeChar (an escape sequence)
98
98
  = "\\\\" -- backslash
@@ -102,6 +102,8 @@ Ohm {
102
102
  | "\\n" -- lineFeed
103
103
  | "\\r" -- carriageReturn
104
104
  | "\\t" -- tab
105
+ | "\\u{" hexDigit hexDigit? hexDigit?
106
+ hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
105
107
  | "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
106
108
  | "\\x" hexDigit hexDigit -- hexEscape
107
109
 
@@ -76,9 +76,15 @@ pexprs.Terminal.prototype.eval = function(state) {
76
76
  pexprs.Range.prototype.eval = function(state) {
77
77
  const {inputStream} = state;
78
78
  const origPos = inputStream.pos;
79
- const ch = inputStream.next();
80
- if (ch && this.from <= ch && ch <= this.to) {
81
- state.pushBinding(new TerminalNode(state.grammar, ch), origPos);
79
+
80
+ // A range can operate in one of two modes: matching a single, 16-bit _code unit_,
81
+ // or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
82
+ const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
83
+
84
+ // Always compare by code point value to get the correct result in all scenarios.
85
+ // Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
86
+ if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
87
+ state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
82
88
  return true;
83
89
  } else {
84
90
  state.processFailure(origPos, this);
@@ -53,6 +53,9 @@ class Range extends PExpr {
53
53
  super();
54
54
  this.from = from;
55
55
  this.to = to;
56
+ // If either `from` or `to` is made up of multiple code units, then
57
+ // the range should consume a full code point, not a single code unit.
58
+ this.matchCodePoint = from.length > 1 || to.length > 1;
56
59
  }
57
60
  }
58
61