ohm-js 16.3.0-dev.unicode-code-point-escape → 16.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -0
- package/dist/ohm-grammar.js +1 -1
- package/dist/ohm.esm.js +407 -432
- package/dist/ohm.js +70 -78
- package/dist/ohm.min.js +1 -1
- package/extras/semantics-toAST.js +3 -16
- package/index.d.ts +0 -7
- package/package.json +13 -11
- package/src/CaseInsensitiveTerminal.js +1 -1
- package/src/MatchState.js +6 -2
- package/src/Semantics.js +1 -18
- package/src/common.js +2 -17
- package/src/errors.js +19 -3
- package/src/main.js +9 -2
- package/src/nodes.js +21 -32
- package/src/ohm-grammar.ohm +2 -1
- package/src/pexprs-eval.js +13 -17
- package/src/pexprs-main.js +3 -0
|
@@ -1,13 +1,5 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
// --------------------------------------------------------------------
|
|
4
|
-
// Imports
|
|
5
|
-
// --------------------------------------------------------------------
|
|
6
|
-
|
|
7
|
-
const pexprs = require('../src/pexprs');
|
|
8
|
-
const MatchResult = require('../src/MatchResult');
|
|
9
|
-
const Grammar = require('../src/Grammar');
|
|
10
|
-
|
|
11
3
|
// --------------------------------------------------------------------
|
|
12
4
|
// Operations
|
|
13
5
|
// --------------------------------------------------------------------
|
|
@@ -23,11 +15,6 @@ const defaultOperation = {
|
|
|
23
15
|
|
|
24
16
|
// without customization
|
|
25
17
|
if (!Object.prototype.hasOwnProperty.call(mapping, ctorName)) {
|
|
26
|
-
// intermediate node
|
|
27
|
-
if (this._node instanceof pexprs.Alt || this._node instanceof pexprs.Apply) {
|
|
28
|
-
return children[0].toAST(mapping);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
18
|
// lexical rule
|
|
32
19
|
if (this.isLexical()) {
|
|
33
20
|
return this.sourceString;
|
|
@@ -111,8 +98,8 @@ const defaultOperation = {
|
|
|
111
98
|
// The optional `mapping` parameter can be used to customize how the nodes of the CST
|
|
112
99
|
// are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping).
|
|
113
100
|
function toAST(res, mapping) {
|
|
114
|
-
if (
|
|
115
|
-
throw new Error('toAST() expects a
|
|
101
|
+
if (typeof res.failed !== 'function' || res.failed()) {
|
|
102
|
+
throw new Error('toAST() expects a succesful MatchResult as first parameter');
|
|
116
103
|
}
|
|
117
104
|
|
|
118
105
|
mapping = Object.assign({}, mapping);
|
|
@@ -130,7 +117,7 @@ function toAST(res, mapping) {
|
|
|
130
117
|
|
|
131
118
|
// Returns a semantics containg the toAST(mapping) operation for the given grammar g.
|
|
132
119
|
function semanticsForToAST(g) {
|
|
133
|
-
if (
|
|
120
|
+
if (typeof g.createSemantics !== 'function') {
|
|
134
121
|
throw new Error('semanticsToAST() expects a Grammar as parameter');
|
|
135
122
|
}
|
|
136
123
|
|
package/index.d.ts
CHANGED
|
@@ -309,13 +309,6 @@ declare namespace ohm {
|
|
|
309
309
|
*/
|
|
310
310
|
isOptional: boolean;
|
|
311
311
|
|
|
312
|
-
/**
|
|
313
|
-
* For a terminal node, the raw value that was consumed from the
|
|
314
|
-
* input stream.
|
|
315
|
-
* @deprecated Use `sourceString` instead.
|
|
316
|
-
*/
|
|
317
|
-
primitiveValue: string;
|
|
318
|
-
|
|
319
312
|
/**
|
|
320
313
|
* In addition to the properties defined above, within a given
|
|
321
314
|
* semantics, every node also has a method/property corresponding to
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ohm-js",
|
|
3
|
-
"version": "16.3.
|
|
3
|
+
"version": "16.3.2",
|
|
4
4
|
"description": "An object-oriented language for parsing and pattern matching",
|
|
5
5
|
"repository": "https://github.com/harc/ohm",
|
|
6
6
|
"keywords": [
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"rapid",
|
|
16
16
|
"prototyping"
|
|
17
17
|
],
|
|
18
|
-
"homepage": "https://
|
|
18
|
+
"homepage": "https://ohmjs.org",
|
|
19
19
|
"bugs": "https://github.com/harc/ohm/issues",
|
|
20
20
|
"main": "index.js",
|
|
21
21
|
"module": "dist/ohm.esm.js",
|
|
@@ -59,33 +59,35 @@
|
|
|
59
59
|
"Jason Merrill <jwmerrill@gmail.com>",
|
|
60
60
|
"Ray Toal <rtoal@lmu.edu>",
|
|
61
61
|
"Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
|
|
62
|
+
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
62
63
|
"stagas <gstagas@gmail.com>",
|
|
63
64
|
"Jonathan Edwards <JonathanMEdwards@gmail.com>",
|
|
64
65
|
"Milan Lajtoš <milan.lajtos@me.com>",
|
|
65
66
|
"Neil Jewers <njjewers@uwaterloo.ca>",
|
|
66
|
-
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
67
67
|
"Mike Niebling <(none)>",
|
|
68
|
-
"
|
|
69
|
-
"
|
|
68
|
+
"AngryPowman <angrypowman@qq.com>",
|
|
69
|
+
"Patrick Dubroy <patrick@sourcegraph.com>",
|
|
70
|
+
"Leslie Ying <acetophore@users.noreply.github.com>",
|
|
70
71
|
"Pierre Donias <pierre.donias@gmail.com>",
|
|
72
|
+
"Justin Chase <justin.m.chase@gmail.com>",
|
|
71
73
|
"Ian Harris <ian@fofgof.xyz>",
|
|
72
|
-
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
73
74
|
"Stan Rozenraukh <stan@stanistan.com>",
|
|
74
75
|
"Stephan Seidt <stephan.seidt@gmail.com>",
|
|
75
76
|
"Steve Phillips <steve@tryingtobeawesome.com>",
|
|
76
77
|
"Szymon Kaliski <kaliskiszymon@gmail.com>",
|
|
77
78
|
"Thomas Nyberg <tomnyberg@gmail.com>",
|
|
78
|
-
"
|
|
79
|
+
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
79
80
|
"Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
|
|
80
81
|
"Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
|
|
81
|
-
"
|
|
82
|
+
"Casey Olson <casey.m.olson@gmail.com>",
|
|
82
83
|
"abego <ub@abego-software.de>",
|
|
83
84
|
"acslk <d_vd415@hotmail.com>",
|
|
84
85
|
"codeZeilen <codeZeilen@users.noreply.github.com>",
|
|
85
|
-
"
|
|
86
|
+
"kassadin <kassadin@foxmail.com>",
|
|
87
|
+
"Arthur Carabott <arthurc@gmail.com>",
|
|
88
|
+
"owch <bowenrainyday@gmail.com>",
|
|
86
89
|
"Luca Guzzon <luca.guzzon@gmail.com>",
|
|
87
|
-
"
|
|
88
|
-
"owch <bowenrainyday@gmail.com>"
|
|
90
|
+
"sfinnie <scott.finnie@gmail.com>"
|
|
89
91
|
],
|
|
90
92
|
"dependencies": {},
|
|
91
93
|
"devDependencies": {
|
|
@@ -35,7 +35,7 @@ class CaseInsensitiveTerminal extends PExpr {
|
|
|
35
35
|
state.processFailure(origPos, this);
|
|
36
36
|
return false;
|
|
37
37
|
} else {
|
|
38
|
-
state.pushBinding(new TerminalNode(
|
|
38
|
+
state.pushBinding(new TerminalNode(matchStr.length), origPos);
|
|
39
39
|
return true;
|
|
40
40
|
}
|
|
41
41
|
}
|
package/src/MatchState.js
CHANGED
|
@@ -219,7 +219,7 @@ MatchState.prototype = {
|
|
|
219
219
|
// Returns the memoized trace entry for `expr` at `pos`, if one exists, `null` otherwise.
|
|
220
220
|
getMemoizedTraceEntry(pos, expr) {
|
|
221
221
|
const posInfo = this.memoTable[pos];
|
|
222
|
-
if (posInfo && expr.
|
|
222
|
+
if (posInfo && expr instanceof pexprs.Apply) {
|
|
223
223
|
const memoRec = posInfo.memo[expr.toMemoKey()];
|
|
224
224
|
if (memoRec && memoRec.traceEntry) {
|
|
225
225
|
const entry = memoRec.traceEntry.cloneWithExpr(expr);
|
|
@@ -361,11 +361,15 @@ MatchState.prototype = {
|
|
|
361
361
|
key => this.recordedFailures[key]
|
|
362
362
|
);
|
|
363
363
|
}
|
|
364
|
+
const cst = this._bindings[0];
|
|
365
|
+
if (cst) {
|
|
366
|
+
cst.grammar = this.grammar;
|
|
367
|
+
}
|
|
364
368
|
return new MatchResult(
|
|
365
369
|
this.matcher,
|
|
366
370
|
this.input,
|
|
367
371
|
this.startExpr,
|
|
368
|
-
|
|
372
|
+
cst,
|
|
369
373
|
this._bindingOffsets[0],
|
|
370
374
|
this.rightmostFailurePosition,
|
|
371
375
|
rightmostFailures
|
package/src/Semantics.js
CHANGED
|
@@ -47,11 +47,6 @@ class Wrapper {
|
|
|
47
47
|
return '[semantics wrapper for ' + this._node.grammar.name + ']';
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
-
// This is used by ohm editor to display a node wrapper appropriately.
|
|
51
|
-
toJSON() {
|
|
52
|
-
return this.toString();
|
|
53
|
-
}
|
|
54
|
-
|
|
55
50
|
_forgetMemoizedResultFor(attributeName) {
|
|
56
51
|
// Remove the memoized attribute from the cstNode and all its children.
|
|
57
52
|
delete this._node[this._semantics.attributeKeys[attributeName]];
|
|
@@ -131,7 +126,7 @@ class Wrapper {
|
|
|
131
126
|
const childWrappers = optChildWrappers || [];
|
|
132
127
|
|
|
133
128
|
const childNodes = childWrappers.map(c => c._node);
|
|
134
|
-
const iter = new IterationNode(
|
|
129
|
+
const iter = new IterationNode(childNodes, [], -1, false);
|
|
135
130
|
|
|
136
131
|
const wrapper = this._semantics.wrap(iter, null, null);
|
|
137
132
|
wrapper._childWrappers = childWrappers;
|
|
@@ -158,18 +153,6 @@ class Wrapper {
|
|
|
158
153
|
return this._node.numChildren();
|
|
159
154
|
}
|
|
160
155
|
|
|
161
|
-
// Returns the primitive value of this CST node, if it's a terminal node. Otherwise,
|
|
162
|
-
// throws an exception.
|
|
163
|
-
// DEPRECATED: Use `sourceString` instead.
|
|
164
|
-
get primitiveValue() {
|
|
165
|
-
if (this.isTerminal()) {
|
|
166
|
-
return this._node.primitiveValue;
|
|
167
|
-
}
|
|
168
|
-
throw new TypeError(
|
|
169
|
-
"tried to access the 'primitiveValue' attribute of a non-terminal CST node"
|
|
170
|
-
);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
156
|
// Returns the contents of the input stream consumed by this CST node.
|
|
174
157
|
get sourceString() {
|
|
175
158
|
return this.source.contents;
|
package/src/common.js
CHANGED
|
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
|
|
|
40
40
|
|
|
41
41
|
exports.assert = function(cond, message) {
|
|
42
42
|
if (!cond) {
|
|
43
|
-
throw new Error(message);
|
|
43
|
+
throw new Error(message || 'Assertion failed');
|
|
44
44
|
}
|
|
45
45
|
};
|
|
46
46
|
|
|
@@ -134,24 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
|
|
|
134
134
|
return this.strings.join('');
|
|
135
135
|
};
|
|
136
136
|
|
|
137
|
-
// Character escaping and unescaping
|
|
138
|
-
|
|
139
|
-
exports.escapeChar = function(c, optDelim) {
|
|
140
|
-
const charCode = c.charCodeAt(0);
|
|
141
|
-
if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
|
|
142
|
-
return c;
|
|
143
|
-
} else if (charCode < 128) {
|
|
144
|
-
return escapeStringFor[charCode];
|
|
145
|
-
} else if (128 <= charCode && charCode < 256) {
|
|
146
|
-
return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
|
|
147
|
-
} else {
|
|
148
|
-
return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
|
|
149
|
-
}
|
|
150
|
-
};
|
|
151
|
-
|
|
152
137
|
const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
|
|
153
138
|
|
|
154
|
-
exports.
|
|
139
|
+
exports.unescapeCodePoint = function(s) {
|
|
155
140
|
if (s.charAt(0) === '\\') {
|
|
156
141
|
switch (s.charAt(1)) {
|
|
157
142
|
case 'b':
|
package/src/errors.js
CHANGED
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
// Imports
|
|
5
5
|
// --------------------------------------------------------------------
|
|
6
6
|
|
|
7
|
-
const
|
|
8
|
-
|
|
7
|
+
const {assert} = require('./common');
|
|
9
8
|
const Namespace = require('./Namespace');
|
|
9
|
+
const pexprs = require('./pexprs-main');
|
|
10
10
|
|
|
11
11
|
// --------------------------------------------------------------------
|
|
12
12
|
// Private stuff
|
|
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
|
|
|
136
136
|
', got ' +
|
|
137
137
|
actual +
|
|
138
138
|
')',
|
|
139
|
-
expr
|
|
139
|
+
expr
|
|
140
140
|
);
|
|
141
141
|
}
|
|
142
142
|
|
|
@@ -209,6 +209,21 @@ function multipleSuperSplices(expr) {
|
|
|
209
209
|
return createError("'...' can appear at most once in a rule body", expr.source);
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
// Unicode code point escapes
|
|
213
|
+
|
|
214
|
+
function invalidCodePoint(applyWrapper) {
|
|
215
|
+
const node = applyWrapper._node;
|
|
216
|
+
assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
|
|
217
|
+
|
|
218
|
+
// Get an interval that covers all of the hex digits.
|
|
219
|
+
const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
|
|
220
|
+
const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
|
|
221
|
+
return createError(
|
|
222
|
+
`U+${fullInterval.contents} is not a valid Unicode code point`,
|
|
223
|
+
fullInterval
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
212
227
|
// ----------------- Kleene operators -----------------
|
|
213
228
|
|
|
214
229
|
function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
|
|
@@ -314,6 +329,7 @@ module.exports = {
|
|
|
314
329
|
inconsistentArity,
|
|
315
330
|
incorrectArgumentType,
|
|
316
331
|
intervalSourcesDontMatch,
|
|
332
|
+
invalidCodePoint,
|
|
317
333
|
invalidConstructorCall,
|
|
318
334
|
invalidParameter,
|
|
319
335
|
grammarSyntaxError,
|
package/src/main.js
CHANGED
|
@@ -226,8 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
|
|
|
226
226
|
return c.visit();
|
|
227
227
|
},
|
|
228
228
|
|
|
229
|
-
|
|
230
|
-
|
|
229
|
+
escapeChar(c) {
|
|
230
|
+
try {
|
|
231
|
+
return common.unescapeCodePoint(this.sourceString);
|
|
232
|
+
} catch (err) {
|
|
233
|
+
if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
|
|
234
|
+
throw errors.invalidCodePoint(c);
|
|
235
|
+
}
|
|
236
|
+
throw err; // Rethrow
|
|
237
|
+
}
|
|
231
238
|
},
|
|
232
239
|
|
|
233
240
|
NonemptyListOf(x, _, xs) {
|
package/src/nodes.js
CHANGED
|
@@ -2,20 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
const common = require('./common');
|
|
4
4
|
|
|
5
|
-
// Ensures that the deprecation warning for `primitiveValue` only appears once.
|
|
6
|
-
let didWarnForPrimitiveValue = false;
|
|
7
|
-
|
|
8
5
|
// --------------------------------------------------------------------
|
|
9
6
|
// Private stuff
|
|
10
7
|
// --------------------------------------------------------------------
|
|
11
8
|
|
|
12
9
|
class Node {
|
|
13
|
-
constructor(
|
|
14
|
-
this.grammar = grammar;
|
|
15
|
-
this.ctorName = ctorName;
|
|
10
|
+
constructor(matchLength) {
|
|
16
11
|
this.matchLength = matchLength;
|
|
17
12
|
}
|
|
18
13
|
|
|
14
|
+
get ctorName() {
|
|
15
|
+
throw new Error('subclass responsibility');
|
|
16
|
+
}
|
|
17
|
+
|
|
19
18
|
numChildren() {
|
|
20
19
|
return this.children ? this.children.length : 0;
|
|
21
20
|
}
|
|
@@ -109,52 +108,38 @@ class Node {
|
|
|
109
108
|
isOptional() {
|
|
110
109
|
return false;
|
|
111
110
|
}
|
|
112
|
-
|
|
113
|
-
toJSON() {
|
|
114
|
-
return {[this.ctorName]: this.children};
|
|
115
|
-
}
|
|
116
111
|
}
|
|
117
112
|
|
|
118
113
|
// Terminals
|
|
119
114
|
|
|
120
115
|
class TerminalNode extends Node {
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
super(grammar, '_terminal', matchLength);
|
|
124
|
-
this._value = value;
|
|
116
|
+
get ctorName() {
|
|
117
|
+
return '_terminal';
|
|
125
118
|
}
|
|
126
119
|
|
|
127
120
|
isTerminal() {
|
|
128
121
|
return true;
|
|
129
122
|
}
|
|
130
123
|
|
|
131
|
-
toJSON() {
|
|
132
|
-
return {[this.ctorName]: this._value};
|
|
133
|
-
}
|
|
134
|
-
|
|
135
124
|
get primitiveValue() {
|
|
136
|
-
|
|
137
|
-
// eslint-disable-next-line no-console
|
|
138
|
-
console.warn(
|
|
139
|
-
'Warning: primitiveValue is deprecated and will be removed in a future version of Ohm. ' +
|
|
140
|
-
'Use sourceString instead.'
|
|
141
|
-
);
|
|
142
|
-
didWarnForPrimitiveValue = true;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return this._value;
|
|
125
|
+
throw new Error('The `primitiveValue` property was removed in Ohm v17.');
|
|
146
126
|
}
|
|
147
127
|
}
|
|
148
128
|
|
|
149
129
|
// Nonterminals
|
|
150
130
|
|
|
151
131
|
class NonterminalNode extends Node {
|
|
152
|
-
constructor(
|
|
153
|
-
super(
|
|
132
|
+
constructor(ruleName, children, childOffsets, matchLength) {
|
|
133
|
+
super(matchLength);
|
|
134
|
+
this.ruleName = ruleName;
|
|
154
135
|
this.children = children;
|
|
155
136
|
this.childOffsets = childOffsets;
|
|
156
137
|
}
|
|
157
138
|
|
|
139
|
+
get ctorName() {
|
|
140
|
+
return this.ruleName;
|
|
141
|
+
}
|
|
142
|
+
|
|
158
143
|
isNonterminal() {
|
|
159
144
|
return true;
|
|
160
145
|
}
|
|
@@ -171,13 +156,17 @@ class NonterminalNode extends Node {
|
|
|
171
156
|
// Iterations
|
|
172
157
|
|
|
173
158
|
class IterationNode extends Node {
|
|
174
|
-
constructor(
|
|
175
|
-
super(
|
|
159
|
+
constructor(children, childOffsets, matchLength, isOptional) {
|
|
160
|
+
super(matchLength);
|
|
176
161
|
this.children = children;
|
|
177
162
|
this.childOffsets = childOffsets;
|
|
178
163
|
this.optional = isOptional;
|
|
179
164
|
}
|
|
180
165
|
|
|
166
|
+
get ctorName() {
|
|
167
|
+
return '_iter';
|
|
168
|
+
}
|
|
169
|
+
|
|
181
170
|
isIteration() {
|
|
182
171
|
return true;
|
|
183
172
|
}
|
package/src/ohm-grammar.ohm
CHANGED
|
@@ -102,7 +102,8 @@ Ohm {
|
|
|
102
102
|
| "\\n" -- lineFeed
|
|
103
103
|
| "\\r" -- carriageReturn
|
|
104
104
|
| "\\t" -- tab
|
|
105
|
-
| "\\u{" hexDigit
|
|
105
|
+
| "\\u{" hexDigit hexDigit? hexDigit?
|
|
106
|
+
hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
|
|
106
107
|
| "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
|
|
107
108
|
| "\\x" hexDigit hexDigit -- hexEscape
|
|
108
109
|
|
package/src/pexprs-eval.js
CHANGED
|
@@ -41,7 +41,7 @@ pexprs.any.eval = function(state) {
|
|
|
41
41
|
const origPos = inputStream.pos;
|
|
42
42
|
const ch = inputStream.next();
|
|
43
43
|
if (ch) {
|
|
44
|
-
state.pushBinding(new TerminalNode(
|
|
44
|
+
state.pushBinding(new TerminalNode(ch.length), origPos);
|
|
45
45
|
return true;
|
|
46
46
|
} else {
|
|
47
47
|
state.processFailure(origPos, this);
|
|
@@ -53,7 +53,7 @@ pexprs.end.eval = function(state) {
|
|
|
53
53
|
const {inputStream} = state;
|
|
54
54
|
const origPos = inputStream.pos;
|
|
55
55
|
if (inputStream.atEnd()) {
|
|
56
|
-
state.pushBinding(new TerminalNode(
|
|
56
|
+
state.pushBinding(new TerminalNode(0), origPos);
|
|
57
57
|
return true;
|
|
58
58
|
} else {
|
|
59
59
|
state.processFailure(origPos, this);
|
|
@@ -68,7 +68,7 @@ pexprs.Terminal.prototype.eval = function(state) {
|
|
|
68
68
|
state.processFailure(origPos, this);
|
|
69
69
|
return false;
|
|
70
70
|
} else {
|
|
71
|
-
state.pushBinding(new TerminalNode(
|
|
71
|
+
state.pushBinding(new TerminalNode(this.obj.length), origPos);
|
|
72
72
|
return true;
|
|
73
73
|
}
|
|
74
74
|
};
|
|
@@ -77,13 +77,14 @@ pexprs.Range.prototype.eval = function(state) {
|
|
|
77
77
|
const {inputStream} = state;
|
|
78
78
|
const origPos = inputStream.pos;
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
inputStream.nextCharCode();
|
|
80
|
+
// A range can operate in one of two modes: matching a single, 16-bit _code unit_,
|
|
81
|
+
// or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
|
|
82
|
+
const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
|
|
84
83
|
|
|
84
|
+
// Always compare by code point value to get the correct result in all scenarios.
|
|
85
|
+
// Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
|
|
85
86
|
if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
|
|
86
|
-
state.pushBinding(new TerminalNode(
|
|
87
|
+
state.pushBinding(new TerminalNode(String.fromCodePoint(cp).length), origPos);
|
|
87
88
|
return true;
|
|
88
89
|
} else {
|
|
89
90
|
state.processFailure(origPos, this);
|
|
@@ -168,7 +169,7 @@ pexprs.Iter.prototype.eval = function(state) {
|
|
|
168
169
|
const isOptional = this instanceof pexprs.Opt;
|
|
169
170
|
for (idx = 0; idx < cols.length; idx++) {
|
|
170
171
|
state._bindings.push(
|
|
171
|
-
new IterationNode(
|
|
172
|
+
new IterationNode(cols[idx], colOffsets[idx], matchLength, isOptional)
|
|
172
173
|
);
|
|
173
174
|
state._bindingOffsets.push(offset);
|
|
174
175
|
}
|
|
@@ -343,13 +344,8 @@ pexprs.Apply.prototype.evalOnce = function(expr, state) {
|
|
|
343
344
|
const arity = expr.getArity();
|
|
344
345
|
const bindings = state._bindings.splice(state._bindings.length - arity, arity);
|
|
345
346
|
const offsets = state._bindingOffsets.splice(state._bindingOffsets.length - arity, arity);
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
this.ruleName,
|
|
349
|
-
bindings,
|
|
350
|
-
offsets,
|
|
351
|
-
inputStream.pos - origPos
|
|
352
|
-
);
|
|
347
|
+
const matchLength = inputStream.pos - origPos;
|
|
348
|
+
return new NonterminalNode(this.ruleName, bindings, offsets, matchLength);
|
|
353
349
|
} else {
|
|
354
350
|
return false;
|
|
355
351
|
}
|
|
@@ -404,7 +400,7 @@ pexprs.UnicodeChar.prototype.eval = function(state) {
|
|
|
404
400
|
const origPos = inputStream.pos;
|
|
405
401
|
const ch = inputStream.next();
|
|
406
402
|
if (ch && this.pattern.test(ch)) {
|
|
407
|
-
state.pushBinding(new TerminalNode(
|
|
403
|
+
state.pushBinding(new TerminalNode(ch.length), origPos);
|
|
408
404
|
return true;
|
|
409
405
|
} else {
|
|
410
406
|
state.processFailure(origPos, this);
|
package/src/pexprs-main.js
CHANGED
|
@@ -53,6 +53,9 @@ class Range extends PExpr {
|
|
|
53
53
|
super();
|
|
54
54
|
this.from = from;
|
|
55
55
|
this.to = to;
|
|
56
|
+
// If either `from` or `to` is made up of multiple code units, then
|
|
57
|
+
// the range should consume a full code point, not a single code unit.
|
|
58
|
+
this.matchCodePoint = from.length > 1 || to.length > 1;
|
|
56
59
|
}
|
|
57
60
|
}
|
|
58
61
|
|