ohm-js 16.2.0 → 16.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -15
- package/dist/built-in-rules.js.old +2 -0
- package/dist/ohm-grammar.js +1 -1
- package/dist/ohm-grammar.js.old +2 -0
- package/dist/ohm.esm.js +388 -367
- package/dist/ohm.js +56 -33
- package/dist/ohm.min.js +1 -1
- package/extras/semantics-toAST.js +3 -16
- package/package.json +13 -11
- package/src/InputStream.js +15 -0
- package/src/common.js +7 -18
- package/src/errors.js +19 -3
- package/src/main.js +9 -6
- package/src/ohm-grammar.ohm +3 -1
- package/src/pexprs-eval.js +9 -3
- package/src/pexprs-main.js +3 -0
|
@@ -1,13 +1,5 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
// --------------------------------------------------------------------
|
|
4
|
-
// Imports
|
|
5
|
-
// --------------------------------------------------------------------
|
|
6
|
-
|
|
7
|
-
const pexprs = require('../src/pexprs');
|
|
8
|
-
const MatchResult = require('../src/MatchResult');
|
|
9
|
-
const Grammar = require('../src/Grammar');
|
|
10
|
-
|
|
11
3
|
// --------------------------------------------------------------------
|
|
12
4
|
// Operations
|
|
13
5
|
// --------------------------------------------------------------------
|
|
@@ -23,11 +15,6 @@ const defaultOperation = {
|
|
|
23
15
|
|
|
24
16
|
// without customization
|
|
25
17
|
if (!Object.prototype.hasOwnProperty.call(mapping, ctorName)) {
|
|
26
|
-
// intermediate node
|
|
27
|
-
if (this._node instanceof pexprs.Alt || this._node instanceof pexprs.Apply) {
|
|
28
|
-
return children[0].toAST(mapping);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
18
|
// lexical rule
|
|
32
19
|
if (this.isLexical()) {
|
|
33
20
|
return this.sourceString;
|
|
@@ -111,8 +98,8 @@ const defaultOperation = {
|
|
|
111
98
|
// The optional `mapping` parameter can be used to customize how the nodes of the CST
|
|
112
99
|
// are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping).
|
|
113
100
|
function toAST(res, mapping) {
|
|
114
|
-
if (
|
|
115
|
-
throw new Error('toAST() expects a
|
|
101
|
+
if (typeof res.failed !== 'function' || res.failed()) {
|
|
102
|
+
throw new Error('toAST() expects a succesful MatchResult as first parameter');
|
|
116
103
|
}
|
|
117
104
|
|
|
118
105
|
mapping = Object.assign({}, mapping);
|
|
@@ -130,7 +117,7 @@ function toAST(res, mapping) {
|
|
|
130
117
|
|
|
131
118
|
// Returns a semantics containg the toAST(mapping) operation for the given grammar g.
|
|
132
119
|
function semanticsForToAST(g) {
|
|
133
|
-
if (
|
|
120
|
+
if (typeof g.createSemantics !== 'function') {
|
|
134
121
|
throw new Error('semanticsToAST() expects a Grammar as parameter');
|
|
135
122
|
}
|
|
136
123
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ohm-js",
|
|
3
|
-
"version": "16.
|
|
3
|
+
"version": "16.3.1",
|
|
4
4
|
"description": "An object-oriented language for parsing and pattern matching",
|
|
5
5
|
"repository": "https://github.com/harc/ohm",
|
|
6
6
|
"keywords": [
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"rapid",
|
|
16
16
|
"prototyping"
|
|
17
17
|
],
|
|
18
|
-
"homepage": "https://
|
|
18
|
+
"homepage": "https://ohmjs.org",
|
|
19
19
|
"bugs": "https://github.com/harc/ohm/issues",
|
|
20
20
|
"main": "index.js",
|
|
21
21
|
"module": "dist/ohm.esm.js",
|
|
@@ -59,33 +59,35 @@
|
|
|
59
59
|
"Jason Merrill <jwmerrill@gmail.com>",
|
|
60
60
|
"Ray Toal <rtoal@lmu.edu>",
|
|
61
61
|
"Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
|
|
62
|
+
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
62
63
|
"stagas <gstagas@gmail.com>",
|
|
63
64
|
"Jonathan Edwards <JonathanMEdwards@gmail.com>",
|
|
64
65
|
"Milan Lajtoš <milan.lajtos@me.com>",
|
|
65
66
|
"Neil Jewers <njjewers@uwaterloo.ca>",
|
|
66
|
-
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
67
67
|
"Mike Niebling <(none)>",
|
|
68
|
-
"
|
|
69
|
-
"
|
|
68
|
+
"AngryPowman <angrypowman@qq.com>",
|
|
69
|
+
"Patrick Dubroy <patrick@sourcegraph.com>",
|
|
70
|
+
"Leslie Ying <acetophore@users.noreply.github.com>",
|
|
70
71
|
"Pierre Donias <pierre.donias@gmail.com>",
|
|
72
|
+
"Justin Chase <justin.m.chase@gmail.com>",
|
|
71
73
|
"Ian Harris <ian@fofgof.xyz>",
|
|
72
|
-
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
73
74
|
"Stan Rozenraukh <stan@stanistan.com>",
|
|
74
75
|
"Stephan Seidt <stephan.seidt@gmail.com>",
|
|
75
76
|
"Steve Phillips <steve@tryingtobeawesome.com>",
|
|
76
77
|
"Szymon Kaliski <kaliskiszymon@gmail.com>",
|
|
77
78
|
"Thomas Nyberg <tomnyberg@gmail.com>",
|
|
78
|
-
"
|
|
79
|
+
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
79
80
|
"Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
|
|
80
81
|
"Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
|
|
81
|
-
"
|
|
82
|
+
"Casey Olson <casey.m.olson@gmail.com>",
|
|
82
83
|
"abego <ub@abego-software.de>",
|
|
83
84
|
"acslk <d_vd415@hotmail.com>",
|
|
84
85
|
"codeZeilen <codeZeilen@users.noreply.github.com>",
|
|
85
|
-
"
|
|
86
|
+
"kassadin <kassadin@foxmail.com>",
|
|
87
|
+
"Arthur Carabott <arthurc@gmail.com>",
|
|
88
|
+
"owch <bowenrainyday@gmail.com>",
|
|
86
89
|
"Luca Guzzon <luca.guzzon@gmail.com>",
|
|
87
|
-
"
|
|
88
|
-
"owch <bowenrainyday@gmail.com>"
|
|
90
|
+
"sfinnie <scott.finnie@gmail.com>"
|
|
89
91
|
],
|
|
90
92
|
"dependencies": {},
|
|
91
93
|
"devDependencies": {
|
package/src/InputStream.js
CHANGED
|
@@ -29,6 +29,21 @@ InputStream.prototype = {
|
|
|
29
29
|
return ans;
|
|
30
30
|
},
|
|
31
31
|
|
|
32
|
+
nextCharCode() {
|
|
33
|
+
const nextChar = this.next();
|
|
34
|
+
return nextChar && nextChar.charCodeAt(0);
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
nextCodePoint() {
|
|
38
|
+
const cp = this.source.slice(this.pos++).codePointAt(0);
|
|
39
|
+
// If the code point is beyond plane 0, it takes up two characters.
|
|
40
|
+
if (cp > 0xffff) {
|
|
41
|
+
this.pos += 1;
|
|
42
|
+
}
|
|
43
|
+
this.examinedLength = Math.max(this.examinedLength, this.pos);
|
|
44
|
+
return cp;
|
|
45
|
+
},
|
|
46
|
+
|
|
32
47
|
matchString(s, optIgnoreCase) {
|
|
33
48
|
let idx;
|
|
34
49
|
if (optIgnoreCase) {
|
package/src/common.js
CHANGED
|
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
|
|
|
40
40
|
|
|
41
41
|
exports.assert = function(cond, message) {
|
|
42
42
|
if (!cond) {
|
|
43
|
-
throw new Error(message);
|
|
43
|
+
throw new Error(message || 'Assertion failed');
|
|
44
44
|
}
|
|
45
45
|
};
|
|
46
46
|
|
|
@@ -134,22 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
|
|
|
134
134
|
return this.strings.join('');
|
|
135
135
|
};
|
|
136
136
|
|
|
137
|
-
|
|
137
|
+
const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
|
|
138
138
|
|
|
139
|
-
exports.
|
|
140
|
-
const charCode = c.charCodeAt(0);
|
|
141
|
-
if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
|
|
142
|
-
return c;
|
|
143
|
-
} else if (charCode < 128) {
|
|
144
|
-
return escapeStringFor[charCode];
|
|
145
|
-
} else if (128 <= charCode && charCode < 256) {
|
|
146
|
-
return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
|
|
147
|
-
} else {
|
|
148
|
-
return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
|
|
149
|
-
}
|
|
150
|
-
};
|
|
151
|
-
|
|
152
|
-
exports.unescapeChar = function(s) {
|
|
139
|
+
exports.unescapeCodePoint = function(s) {
|
|
153
140
|
if (s.charAt(0) === '\\') {
|
|
154
141
|
switch (s.charAt(1)) {
|
|
155
142
|
case 'b':
|
|
@@ -165,9 +152,11 @@ exports.unescapeChar = function(s) {
|
|
|
165
152
|
case 'v':
|
|
166
153
|
return '\v';
|
|
167
154
|
case 'x':
|
|
168
|
-
return
|
|
155
|
+
return escapeUnicode(s.slice(2, 4));
|
|
169
156
|
case 'u':
|
|
170
|
-
return
|
|
157
|
+
return s.charAt(2) === '{' ?
|
|
158
|
+
escapeUnicode(s.slice(3, -1)) :
|
|
159
|
+
escapeUnicode(s.slice(2, 6));
|
|
171
160
|
default:
|
|
172
161
|
return s.charAt(1);
|
|
173
162
|
}
|
package/src/errors.js
CHANGED
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
// Imports
|
|
5
5
|
// --------------------------------------------------------------------
|
|
6
6
|
|
|
7
|
-
const
|
|
8
|
-
|
|
7
|
+
const {assert} = require('./common');
|
|
9
8
|
const Namespace = require('./Namespace');
|
|
9
|
+
const pexprs = require('./pexprs-main');
|
|
10
10
|
|
|
11
11
|
// --------------------------------------------------------------------
|
|
12
12
|
// Private stuff
|
|
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
|
|
|
136
136
|
', got ' +
|
|
137
137
|
actual +
|
|
138
138
|
')',
|
|
139
|
-
expr
|
|
139
|
+
expr
|
|
140
140
|
);
|
|
141
141
|
}
|
|
142
142
|
|
|
@@ -209,6 +209,21 @@ function multipleSuperSplices(expr) {
|
|
|
209
209
|
return createError("'...' can appear at most once in a rule body", expr.source);
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
// Unicode code point escapes
|
|
213
|
+
|
|
214
|
+
function invalidCodePoint(applyWrapper) {
|
|
215
|
+
const node = applyWrapper._node;
|
|
216
|
+
assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
|
|
217
|
+
|
|
218
|
+
// Get an interval that covers all of the hex digits.
|
|
219
|
+
const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
|
|
220
|
+
const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
|
|
221
|
+
return createError(
|
|
222
|
+
`U+${fullInterval.contents} is not a valid Unicode code point`,
|
|
223
|
+
fullInterval
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
|
|
212
227
|
// ----------------- Kleene operators -----------------
|
|
213
228
|
|
|
214
229
|
function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
|
|
@@ -314,6 +329,7 @@ module.exports = {
|
|
|
314
329
|
inconsistentArity,
|
|
315
330
|
incorrectArgumentType,
|
|
316
331
|
intervalSourcesDontMatch,
|
|
332
|
+
invalidCodePoint,
|
|
317
333
|
invalidConstructorCall,
|
|
318
334
|
invalidParameter,
|
|
319
335
|
grammarSyntaxError,
|
package/src/main.js
CHANGED
|
@@ -226,12 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
|
|
|
226
226
|
return c.visit();
|
|
227
227
|
},
|
|
228
228
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
229
|
+
escapeChar(c) {
|
|
230
|
+
try {
|
|
231
|
+
return common.unescapeCodePoint(this.sourceString);
|
|
232
|
+
} catch (err) {
|
|
233
|
+
if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
|
|
234
|
+
throw errors.invalidCodePoint(c);
|
|
235
|
+
}
|
|
236
|
+
throw err; // Rethrow
|
|
237
|
+
}
|
|
235
238
|
},
|
|
236
239
|
|
|
237
240
|
NonemptyListOf(x, _, xs) {
|
package/src/ohm-grammar.ohm
CHANGED
|
@@ -92,7 +92,7 @@ Ohm {
|
|
|
92
92
|
|
|
93
93
|
terminalChar
|
|
94
94
|
= escapeChar
|
|
95
|
-
|
|
95
|
+
| ~"\\" ~"\"" ~"\n" "\u{0}".."\u{10FFFF}"
|
|
96
96
|
|
|
97
97
|
escapeChar (an escape sequence)
|
|
98
98
|
= "\\\\" -- backslash
|
|
@@ -102,6 +102,8 @@ Ohm {
|
|
|
102
102
|
| "\\n" -- lineFeed
|
|
103
103
|
| "\\r" -- carriageReturn
|
|
104
104
|
| "\\t" -- tab
|
|
105
|
+
| "\\u{" hexDigit hexDigit? hexDigit?
|
|
106
|
+
hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
|
|
105
107
|
| "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
|
|
106
108
|
| "\\x" hexDigit hexDigit -- hexEscape
|
|
107
109
|
|
package/src/pexprs-eval.js
CHANGED
|
@@ -76,9 +76,15 @@ pexprs.Terminal.prototype.eval = function(state) {
|
|
|
76
76
|
pexprs.Range.prototype.eval = function(state) {
|
|
77
77
|
const {inputStream} = state;
|
|
78
78
|
const origPos = inputStream.pos;
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
|
|
80
|
+
// A range can operate in one of two modes: matching a single, 16-bit _code unit_,
|
|
81
|
+
// or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
|
|
82
|
+
const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
|
|
83
|
+
|
|
84
|
+
// Always compare by code point value to get the correct result in all scenarios.
|
|
85
|
+
// Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
|
|
86
|
+
if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
|
|
87
|
+
state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
|
|
82
88
|
return true;
|
|
83
89
|
} else {
|
|
84
90
|
state.processFailure(origPos, this);
|
package/src/pexprs-main.js
CHANGED
|
@@ -53,6 +53,9 @@ class Range extends PExpr {
|
|
|
53
53
|
super();
|
|
54
54
|
this.from = from;
|
|
55
55
|
this.to = to;
|
|
56
|
+
// If either `from` or `to` is made up of multiple code units, then
|
|
57
|
+
// the range should consume a full code point, not a single code unit.
|
|
58
|
+
this.matchCodePoint = from.length > 1 || to.length > 1;
|
|
56
59
|
}
|
|
57
60
|
}
|
|
58
61
|
|