ohm-js 16.3.0-dev.unicode-code-point-escape → 16.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,5 @@
1
1
  'use strict';
2
2
 
3
- // --------------------------------------------------------------------
4
- // Imports
5
- // --------------------------------------------------------------------
6
-
7
- const pexprs = require('../src/pexprs');
8
- const MatchResult = require('../src/MatchResult');
9
- const Grammar = require('../src/Grammar');
10
-
11
3
  // --------------------------------------------------------------------
12
4
  // Operations
13
5
  // --------------------------------------------------------------------
@@ -23,11 +15,6 @@ const defaultOperation = {
23
15
 
24
16
  // without customization
25
17
  if (!Object.prototype.hasOwnProperty.call(mapping, ctorName)) {
26
- // intermediate node
27
- if (this._node instanceof pexprs.Alt || this._node instanceof pexprs.Apply) {
28
- return children[0].toAST(mapping);
29
- }
30
-
31
18
  // lexical rule
32
19
  if (this.isLexical()) {
33
20
  return this.sourceString;
@@ -111,8 +98,8 @@ const defaultOperation = {
111
98
  // The optional `mapping` parameter can be used to customize how the nodes of the CST
112
99
  // are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping).
113
100
  function toAST(res, mapping) {
114
- if (!(res instanceof MatchResult) || res.failed()) {
115
- throw new Error('toAST() expects a succesfull MatchResult as first parameter');
101
+ if (typeof res.failed !== 'function' || res.failed()) {
102
+ throw new Error('toAST() expects a succesful MatchResult as first parameter');
116
103
  }
117
104
 
118
105
  mapping = Object.assign({}, mapping);
@@ -130,7 +117,7 @@ function toAST(res, mapping) {
130
117
 
131
118
  // Returns a semantics containg the toAST(mapping) operation for the given grammar g.
132
119
  function semanticsForToAST(g) {
133
- if (!(g instanceof Grammar)) {
120
+ if (typeof g.createSemantics !== 'function') {
134
121
  throw new Error('semanticsToAST() expects a Grammar as parameter');
135
122
  }
136
123
 
package/index.d.ts CHANGED
@@ -309,13 +309,6 @@ declare namespace ohm {
309
309
  */
310
310
  isOptional: boolean;
311
311
 
312
- /**
313
- * For a terminal node, the raw value that was consumed from the
314
- * input stream.
315
- * @deprecated Use `sourceString` instead.
316
- */
317
- primitiveValue: string;
318
-
319
312
  /**
320
313
  * In addition to the properties defined above, within a given
321
314
  * semantics, every node also has a method/property corresponding to
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ohm-js",
3
- "version": "16.3.0-dev.unicode-code-point-escape",
3
+ "version": "16.3.2",
4
4
  "description": "An object-oriented language for parsing and pattern matching",
5
5
  "repository": "https://github.com/harc/ohm",
6
6
  "keywords": [
@@ -15,7 +15,7 @@
15
15
  "rapid",
16
16
  "prototyping"
17
17
  ],
18
- "homepage": "https://ohmlang.github.io/",
18
+ "homepage": "https://ohmjs.org",
19
19
  "bugs": "https://github.com/harc/ohm/issues",
20
20
  "main": "index.js",
21
21
  "module": "dist/ohm.esm.js",
@@ -59,33 +59,35 @@
59
59
  "Jason Merrill <jwmerrill@gmail.com>",
60
60
  "Ray Toal <rtoal@lmu.edu>",
61
61
  "Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
62
+ "megabuz <3299889+megabuz@users.noreply.github.com>",
62
63
  "stagas <gstagas@gmail.com>",
63
64
  "Jonathan Edwards <JonathanMEdwards@gmail.com>",
64
65
  "Milan Lajtoš <milan.lajtos@me.com>",
65
66
  "Neil Jewers <njjewers@uwaterloo.ca>",
66
- "megabuz <3299889+megabuz@users.noreply.github.com>",
67
67
  "Mike Niebling <(none)>",
68
- "sfinnie <scott.finnie@gmail.com>",
69
- "Justin Chase <justin.m.chase@gmail.com>",
68
+ "AngryPowman <angrypowman@qq.com>",
69
+ "Patrick Dubroy <patrick@sourcegraph.com>",
70
+ "Leslie Ying <acetophore@users.noreply.github.com>",
70
71
  "Pierre Donias <pierre.donias@gmail.com>",
72
+ "Justin Chase <justin.m.chase@gmail.com>",
71
73
  "Ian Harris <ian@fofgof.xyz>",
72
- "Daniel Tomlinson <DanielTomlinson@me.com>",
73
74
  "Stan Rozenraukh <stan@stanistan.com>",
74
75
  "Stephan Seidt <stephan.seidt@gmail.com>",
75
76
  "Steve Phillips <steve@tryingtobeawesome.com>",
76
77
  "Szymon Kaliski <kaliskiszymon@gmail.com>",
77
78
  "Thomas Nyberg <tomnyberg@gmail.com>",
78
- "Casey Olson <casey.m.olson@gmail.com>",
79
+ "Daniel Tomlinson <DanielTomlinson@me.com>",
79
80
  "Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
80
81
  "Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
81
- "Arthur Carabott <arthurc@gmail.com>",
82
+ "Casey Olson <casey.m.olson@gmail.com>",
82
83
  "abego <ub@abego-software.de>",
83
84
  "acslk <d_vd415@hotmail.com>",
84
85
  "codeZeilen <codeZeilen@users.noreply.github.com>",
85
- "AngryPowman <angrypowman@qq.com>",
86
+ "kassadin <kassadin@foxmail.com>",
87
+ "Arthur Carabott <arthurc@gmail.com>",
88
+ "owch <bowenrainyday@gmail.com>",
86
89
  "Luca Guzzon <luca.guzzon@gmail.com>",
87
- "Leslie Ying <acetophore@users.noreply.github.com>",
88
- "owch <bowenrainyday@gmail.com>"
90
+ "sfinnie <scott.finnie@gmail.com>"
89
91
  ],
90
92
  "dependencies": {},
91
93
  "devDependencies": {
@@ -35,7 +35,7 @@ class CaseInsensitiveTerminal extends PExpr {
35
35
  state.processFailure(origPos, this);
36
36
  return false;
37
37
  } else {
38
- state.pushBinding(new TerminalNode(state.grammar, matchStr), origPos);
38
+ state.pushBinding(new TerminalNode(matchStr.length), origPos);
39
39
  return true;
40
40
  }
41
41
  }
package/src/MatchState.js CHANGED
@@ -219,7 +219,7 @@ MatchState.prototype = {
219
219
  // Returns the memoized trace entry for `expr` at `pos`, if one exists, `null` otherwise.
220
220
  getMemoizedTraceEntry(pos, expr) {
221
221
  const posInfo = this.memoTable[pos];
222
- if (posInfo && expr.ruleName) {
222
+ if (posInfo && expr instanceof pexprs.Apply) {
223
223
  const memoRec = posInfo.memo[expr.toMemoKey()];
224
224
  if (memoRec && memoRec.traceEntry) {
225
225
  const entry = memoRec.traceEntry.cloneWithExpr(expr);
@@ -361,11 +361,15 @@ MatchState.prototype = {
361
361
  key => this.recordedFailures[key]
362
362
  );
363
363
  }
364
+ const cst = this._bindings[0];
365
+ if (cst) {
366
+ cst.grammar = this.grammar;
367
+ }
364
368
  return new MatchResult(
365
369
  this.matcher,
366
370
  this.input,
367
371
  this.startExpr,
368
- this._bindings[0],
372
+ cst,
369
373
  this._bindingOffsets[0],
370
374
  this.rightmostFailurePosition,
371
375
  rightmostFailures
package/src/Semantics.js CHANGED
@@ -47,11 +47,6 @@ class Wrapper {
47
47
  return '[semantics wrapper for ' + this._node.grammar.name + ']';
48
48
  }
49
49
 
50
- // This is used by ohm editor to display a node wrapper appropriately.
51
- toJSON() {
52
- return this.toString();
53
- }
54
-
55
50
  _forgetMemoizedResultFor(attributeName) {
56
51
  // Remove the memoized attribute from the cstNode and all its children.
57
52
  delete this._node[this._semantics.attributeKeys[attributeName]];
@@ -131,7 +126,7 @@ class Wrapper {
131
126
  const childWrappers = optChildWrappers || [];
132
127
 
133
128
  const childNodes = childWrappers.map(c => c._node);
134
- const iter = new IterationNode(this._node.grammar, childNodes, [], -1, false);
129
+ const iter = new IterationNode(childNodes, [], -1, false);
135
130
 
136
131
  const wrapper = this._semantics.wrap(iter, null, null);
137
132
  wrapper._childWrappers = childWrappers;
@@ -158,18 +153,6 @@ class Wrapper {
158
153
  return this._node.numChildren();
159
154
  }
160
155
 
161
- // Returns the primitive value of this CST node, if it's a terminal node. Otherwise,
162
- // throws an exception.
163
- // DEPRECATED: Use `sourceString` instead.
164
- get primitiveValue() {
165
- if (this.isTerminal()) {
166
- return this._node.primitiveValue;
167
- }
168
- throw new TypeError(
169
- "tried to access the 'primitiveValue' attribute of a non-terminal CST node"
170
- );
171
- }
172
-
173
156
  // Returns the contents of the input stream consumed by this CST node.
174
157
  get sourceString() {
175
158
  return this.source.contents;
package/src/common.js CHANGED
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
40
40
 
41
41
  exports.assert = function(cond, message) {
42
42
  if (!cond) {
43
- throw new Error(message);
43
+ throw new Error(message || 'Assertion failed');
44
44
  }
45
45
  };
46
46
 
@@ -134,24 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
134
134
  return this.strings.join('');
135
135
  };
136
136
 
137
- // Character escaping and unescaping
138
-
139
- exports.escapeChar = function(c, optDelim) {
140
- const charCode = c.charCodeAt(0);
141
- if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
142
- return c;
143
- } else if (charCode < 128) {
144
- return escapeStringFor[charCode];
145
- } else if (128 <= charCode && charCode < 256) {
146
- return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
147
- } else {
148
- return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
149
- }
150
- };
151
-
152
137
  const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
153
138
 
154
- exports.unescapeChar = function(s) {
139
+ exports.unescapeCodePoint = function(s) {
155
140
  if (s.charAt(0) === '\\') {
156
141
  switch (s.charAt(1)) {
157
142
  case 'b':
package/src/errors.js CHANGED
@@ -4,9 +4,9 @@
4
4
  // Imports
5
5
  // --------------------------------------------------------------------
6
6
 
7
- const pexprs = require('./pexprs-main');
8
-
7
+ const {assert} = require('./common');
9
8
  const Namespace = require('./Namespace');
9
+ const pexprs = require('./pexprs-main');
10
10
 
11
11
  // --------------------------------------------------------------------
12
12
  // Private stuff
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
136
136
  ', got ' +
137
137
  actual +
138
138
  ')',
139
- expr.source
139
+ expr
140
140
  );
141
141
  }
142
142
 
@@ -209,6 +209,21 @@ function multipleSuperSplices(expr) {
209
209
  return createError("'...' can appear at most once in a rule body", expr.source);
210
210
  }
211
211
 
212
+ // Unicode code point escapes
213
+
214
+ function invalidCodePoint(applyWrapper) {
215
+ const node = applyWrapper._node;
216
+ assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
217
+
218
+ // Get an interval that covers all of the hex digits.
219
+ const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
220
+ const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
221
+ return createError(
222
+ `U+${fullInterval.contents} is not a valid Unicode code point`,
223
+ fullInterval
224
+ );
225
+ }
226
+
212
227
  // ----------------- Kleene operators -----------------
213
228
 
214
229
  function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
@@ -314,6 +329,7 @@ module.exports = {
314
329
  inconsistentArity,
315
330
  incorrectArgumentType,
316
331
  intervalSourcesDontMatch,
332
+ invalidCodePoint,
317
333
  invalidConstructorCall,
318
334
  invalidParameter,
319
335
  grammarSyntaxError,
package/src/main.js CHANGED
@@ -226,8 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
226
226
  return c.visit();
227
227
  },
228
228
 
229
- terminalChar(_) {
230
- return common.unescapeChar(this.sourceString);
229
+ escapeChar(c) {
230
+ try {
231
+ return common.unescapeCodePoint(this.sourceString);
232
+ } catch (err) {
233
+ if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
234
+ throw errors.invalidCodePoint(c);
235
+ }
236
+ throw err; // Rethrow
237
+ }
231
238
  },
232
239
 
233
240
  NonemptyListOf(x, _, xs) {
package/src/nodes.js CHANGED
@@ -2,20 +2,19 @@
2
2
 
3
3
  const common = require('./common');
4
4
 
5
- // Ensures that the deprecation warning for `primitiveValue` only appears once.
6
- let didWarnForPrimitiveValue = false;
7
-
8
5
  // --------------------------------------------------------------------
9
6
  // Private stuff
10
7
  // --------------------------------------------------------------------
11
8
 
12
9
  class Node {
13
- constructor(grammar, ctorName, matchLength) {
14
- this.grammar = grammar;
15
- this.ctorName = ctorName;
10
+ constructor(matchLength) {
16
11
  this.matchLength = matchLength;
17
12
  }
18
13
 
14
+ get ctorName() {
15
+ throw new Error('subclass responsibility');
16
+ }
17
+
19
18
  numChildren() {
20
19
  return this.children ? this.children.length : 0;
21
20
  }
@@ -109,52 +108,38 @@ class Node {
109
108
  isOptional() {
110
109
  return false;
111
110
  }
112
-
113
- toJSON() {
114
- return {[this.ctorName]: this.children};
115
- }
116
111
  }
117
112
 
118
113
  // Terminals
119
114
 
120
115
  class TerminalNode extends Node {
121
- constructor(grammar, value) {
122
- const matchLength = value ? value.length : 0;
123
- super(grammar, '_terminal', matchLength);
124
- this._value = value;
116
+ get ctorName() {
117
+ return '_terminal';
125
118
  }
126
119
 
127
120
  isTerminal() {
128
121
  return true;
129
122
  }
130
123
 
131
- toJSON() {
132
- return {[this.ctorName]: this._value};
133
- }
134
-
135
124
  get primitiveValue() {
136
- if (!didWarnForPrimitiveValue) {
137
- // eslint-disable-next-line no-console
138
- console.warn(
139
- 'Warning: primitiveValue is deprecated and will be removed in a future version of Ohm. ' +
140
- 'Use sourceString instead.'
141
- );
142
- didWarnForPrimitiveValue = true;
143
- }
144
-
145
- return this._value;
125
+ throw new Error('The `primitiveValue` property was removed in Ohm v17.');
146
126
  }
147
127
  }
148
128
 
149
129
  // Nonterminals
150
130
 
151
131
  class NonterminalNode extends Node {
152
- constructor(grammar, ruleName, children, childOffsets, matchLength) {
153
- super(grammar, ruleName, matchLength);
132
+ constructor(ruleName, children, childOffsets, matchLength) {
133
+ super(matchLength);
134
+ this.ruleName = ruleName;
154
135
  this.children = children;
155
136
  this.childOffsets = childOffsets;
156
137
  }
157
138
 
139
+ get ctorName() {
140
+ return this.ruleName;
141
+ }
142
+
158
143
  isNonterminal() {
159
144
  return true;
160
145
  }
@@ -171,13 +156,17 @@ class NonterminalNode extends Node {
171
156
  // Iterations
172
157
 
173
158
  class IterationNode extends Node {
174
- constructor(grammar, children, childOffsets, matchLength, isOptional) {
175
- super(grammar, '_iter', matchLength);
159
+ constructor(children, childOffsets, matchLength, isOptional) {
160
+ super(matchLength);
176
161
  this.children = children;
177
162
  this.childOffsets = childOffsets;
178
163
  this.optional = isOptional;
179
164
  }
180
165
 
166
+ get ctorName() {
167
+ return '_iter';
168
+ }
169
+
181
170
  isIteration() {
182
171
  return true;
183
172
  }
@@ -102,7 +102,8 @@ Ohm {
102
102
  | "\\n" -- lineFeed
103
103
  | "\\r" -- carriageReturn
104
104
  | "\\t" -- tab
105
- | "\\u{" hexDigit+ "}" -- unicodeCodePoint
105
+ | "\\u{" hexDigit hexDigit? hexDigit?
106
+ hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
106
107
  | "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
107
108
  | "\\x" hexDigit hexDigit -- hexEscape
108
109
 
@@ -41,7 +41,7 @@ pexprs.any.eval = function(state) {
41
41
  const origPos = inputStream.pos;
42
42
  const ch = inputStream.next();
43
43
  if (ch) {
44
- state.pushBinding(new TerminalNode(state.grammar, ch), origPos);
44
+ state.pushBinding(new TerminalNode(ch.length), origPos);
45
45
  return true;
46
46
  } else {
47
47
  state.processFailure(origPos, this);
@@ -53,7 +53,7 @@ pexprs.end.eval = function(state) {
53
53
  const {inputStream} = state;
54
54
  const origPos = inputStream.pos;
55
55
  if (inputStream.atEnd()) {
56
- state.pushBinding(new TerminalNode(state.grammar, undefined), origPos);
56
+ state.pushBinding(new TerminalNode(0), origPos);
57
57
  return true;
58
58
  } else {
59
59
  state.processFailure(origPos, this);
@@ -68,7 +68,7 @@ pexprs.Terminal.prototype.eval = function(state) {
68
68
  state.processFailure(origPos, this);
69
69
  return false;
70
70
  } else {
71
- state.pushBinding(new TerminalNode(state.grammar, this.obj), origPos);
71
+ state.pushBinding(new TerminalNode(this.obj.length), origPos);
72
72
  return true;
73
73
  }
74
74
  };
@@ -77,13 +77,14 @@ pexprs.Range.prototype.eval = function(state) {
77
77
  const {inputStream} = state;
78
78
  const origPos = inputStream.pos;
79
79
 
80
- const cp =
81
- this.from.length > 1 || this.to.length > 1 ?
82
- inputStream.nextCodePoint() :
83
- inputStream.nextCharCode();
80
+ // A range can operate in one of two modes: matching a single, 16-bit _code unit_,
81
+ // or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
82
+ const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
84
83
 
84
+ // Always compare by code point value to get the correct result in all scenarios.
85
+ // Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
85
86
  if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
86
- state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
87
+ state.pushBinding(new TerminalNode(String.fromCodePoint(cp).length), origPos);
87
88
  return true;
88
89
  } else {
89
90
  state.processFailure(origPos, this);
@@ -168,7 +169,7 @@ pexprs.Iter.prototype.eval = function(state) {
168
169
  const isOptional = this instanceof pexprs.Opt;
169
170
  for (idx = 0; idx < cols.length; idx++) {
170
171
  state._bindings.push(
171
- new IterationNode(state.grammar, cols[idx], colOffsets[idx], matchLength, isOptional)
172
+ new IterationNode(cols[idx], colOffsets[idx], matchLength, isOptional)
172
173
  );
173
174
  state._bindingOffsets.push(offset);
174
175
  }
@@ -343,13 +344,8 @@ pexprs.Apply.prototype.evalOnce = function(expr, state) {
343
344
  const arity = expr.getArity();
344
345
  const bindings = state._bindings.splice(state._bindings.length - arity, arity);
345
346
  const offsets = state._bindingOffsets.splice(state._bindingOffsets.length - arity, arity);
346
- return new NonterminalNode(
347
- state.grammar,
348
- this.ruleName,
349
- bindings,
350
- offsets,
351
- inputStream.pos - origPos
352
- );
347
+ const matchLength = inputStream.pos - origPos;
348
+ return new NonterminalNode(this.ruleName, bindings, offsets, matchLength);
353
349
  } else {
354
350
  return false;
355
351
  }
@@ -404,7 +400,7 @@ pexprs.UnicodeChar.prototype.eval = function(state) {
404
400
  const origPos = inputStream.pos;
405
401
  const ch = inputStream.next();
406
402
  if (ch && this.pattern.test(ch)) {
407
- state.pushBinding(new TerminalNode(state.grammar, ch), origPos);
403
+ state.pushBinding(new TerminalNode(ch.length), origPos);
408
404
  return true;
409
405
  } else {
410
406
  state.processFailure(origPos, this);
@@ -53,6 +53,9 @@ class Range extends PExpr {
53
53
  super();
54
54
  this.from = from;
55
55
  this.to = to;
56
+ // If either `from` or `to` is made up of multiple code units, then
57
+ // the range should consume a full code point, not a single code unit.
58
+ this.matchCodePoint = from.length > 1 || to.length > 1;
56
59
  }
57
60
  }
58
61