ohm-js 16.3.0-dev.unicode-code-point-escape → 16.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -0
- package/dist/ohm-grammar.js +1 -1
- package/dist/ohm.esm.js +54 -40
- package/dist/ohm.js +36 -28
- package/dist/ohm.min.js +1 -1
- package/package.json +12 -10
- package/src/common.js +2 -17
- package/src/errors.js +16 -3
- package/src/main.js +9 -2
- package/src/ohm-grammar.ohm +2 -1
- package/src/pexprs-eval.js +5 -4
- package/src/pexprs-main.js +3 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ohm-js",
|
|
3
|
-
"version": "16.3.0
|
|
3
|
+
"version": "16.3.0",
|
|
4
4
|
"description": "An object-oriented language for parsing and pattern matching",
|
|
5
5
|
"repository": "https://github.com/harc/ohm",
|
|
6
6
|
"keywords": [
|
|
@@ -59,33 +59,35 @@
|
|
|
59
59
|
"Jason Merrill <jwmerrill@gmail.com>",
|
|
60
60
|
"Ray Toal <rtoal@lmu.edu>",
|
|
61
61
|
"Yoshiki Ohshima <Yoshiki.Ohshima@acm.org>",
|
|
62
|
+
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
62
63
|
"stagas <gstagas@gmail.com>",
|
|
63
64
|
"Jonathan Edwards <JonathanMEdwards@gmail.com>",
|
|
64
65
|
"Milan Lajtoš <milan.lajtos@me.com>",
|
|
65
66
|
"Neil Jewers <njjewers@uwaterloo.ca>",
|
|
66
|
-
"megabuz <3299889+megabuz@users.noreply.github.com>",
|
|
67
67
|
"Mike Niebling <(none)>",
|
|
68
|
-
"
|
|
69
|
-
"
|
|
68
|
+
"AngryPowman <angrypowman@qq.com>",
|
|
69
|
+
"Patrick Dubroy <patrick@sourcegraph.com>",
|
|
70
|
+
"Leslie Ying <acetophore@users.noreply.github.com>",
|
|
70
71
|
"Pierre Donias <pierre.donias@gmail.com>",
|
|
72
|
+
"Justin Chase <justin.m.chase@gmail.com>",
|
|
71
73
|
"Ian Harris <ian@fofgof.xyz>",
|
|
72
|
-
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
73
74
|
"Stan Rozenraukh <stan@stanistan.com>",
|
|
74
75
|
"Stephan Seidt <stephan.seidt@gmail.com>",
|
|
75
76
|
"Steve Phillips <steve@tryingtobeawesome.com>",
|
|
76
77
|
"Szymon Kaliski <kaliskiszymon@gmail.com>",
|
|
77
78
|
"Thomas Nyberg <tomnyberg@gmail.com>",
|
|
78
|
-
"
|
|
79
|
+
"Daniel Tomlinson <DanielTomlinson@me.com>",
|
|
79
80
|
"Vse Mozhet Byt <vsemozhetbyt@gmail.com>",
|
|
80
81
|
"Wil Chung <10446+iamwilhelm@users.noreply.github.com>",
|
|
81
|
-
"
|
|
82
|
+
"Casey Olson <casey.m.olson@gmail.com>",
|
|
82
83
|
"abego <ub@abego-software.de>",
|
|
83
84
|
"acslk <d_vd415@hotmail.com>",
|
|
84
85
|
"codeZeilen <codeZeilen@users.noreply.github.com>",
|
|
85
|
-
"
|
|
86
|
+
"kassadin <kassadin@foxmail.com>",
|
|
87
|
+
"Arthur Carabott <arthurc@gmail.com>",
|
|
88
|
+
"owch <bowenrainyday@gmail.com>",
|
|
86
89
|
"Luca Guzzon <luca.guzzon@gmail.com>",
|
|
87
|
-
"
|
|
88
|
-
"owch <bowenrainyday@gmail.com>"
|
|
90
|
+
"sfinnie <scott.finnie@gmail.com>"
|
|
89
91
|
],
|
|
90
92
|
"dependencies": {},
|
|
91
93
|
"devDependencies": {
|
package/src/common.js
CHANGED
|
@@ -40,7 +40,7 @@ exports.abstract = function(optMethodName) {
|
|
|
40
40
|
|
|
41
41
|
exports.assert = function(cond, message) {
|
|
42
42
|
if (!cond) {
|
|
43
|
-
throw new Error(message);
|
|
43
|
+
throw new Error(message || 'Assertion failed');
|
|
44
44
|
}
|
|
45
45
|
};
|
|
46
46
|
|
|
@@ -134,24 +134,9 @@ exports.StringBuffer.prototype.contents = function() {
|
|
|
134
134
|
return this.strings.join('');
|
|
135
135
|
};
|
|
136
136
|
|
|
137
|
-
// Character escaping and unescaping
|
|
138
|
-
|
|
139
|
-
exports.escapeChar = function(c, optDelim) {
|
|
140
|
-
const charCode = c.charCodeAt(0);
|
|
141
|
-
if ((c === '"' || c === "'") && optDelim && c !== optDelim) {
|
|
142
|
-
return c;
|
|
143
|
-
} else if (charCode < 128) {
|
|
144
|
-
return escapeStringFor[charCode];
|
|
145
|
-
} else if (128 <= charCode && charCode < 256) {
|
|
146
|
-
return '\\x' + exports.padLeft(charCode.toString(16), 2, '0');
|
|
147
|
-
} else {
|
|
148
|
-
return '\\u' + exports.padLeft(charCode.toString(16), 4, '0');
|
|
149
|
-
}
|
|
150
|
-
};
|
|
151
|
-
|
|
152
137
|
const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
|
|
153
138
|
|
|
154
|
-
exports.
|
|
139
|
+
exports.unescapeCodePoint = function(s) {
|
|
155
140
|
if (s.charAt(0) === '\\') {
|
|
156
141
|
switch (s.charAt(1)) {
|
|
157
142
|
case 'b':
|
package/src/errors.js
CHANGED
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
// Imports
|
|
5
5
|
// --------------------------------------------------------------------
|
|
6
6
|
|
|
7
|
-
const
|
|
8
|
-
|
|
7
|
+
const {assert} = require('./common');
|
|
9
8
|
const Namespace = require('./Namespace');
|
|
9
|
+
const pexprs = require('./pexprs-main');
|
|
10
10
|
|
|
11
11
|
// --------------------------------------------------------------------
|
|
12
12
|
// Private stuff
|
|
@@ -136,7 +136,7 @@ function wrongNumberOfArguments(ruleName, expected, actual, expr) {
|
|
|
136
136
|
', got ' +
|
|
137
137
|
actual +
|
|
138
138
|
')',
|
|
139
|
-
expr
|
|
139
|
+
expr
|
|
140
140
|
);
|
|
141
141
|
}
|
|
142
142
|
|
|
@@ -209,6 +209,18 @@ function multipleSuperSplices(expr) {
|
|
|
209
209
|
return createError("'...' can appear at most once in a rule body", expr.source);
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
// Unicode code point escapes
|
|
213
|
+
|
|
214
|
+
function invalidCodePoint(applyWrapper) {
|
|
215
|
+
const node = applyWrapper._node;
|
|
216
|
+
assert(node && node.isNonterminal() && node.ctorName === 'escapeChar_unicodeCodePoint');
|
|
217
|
+
|
|
218
|
+
// Get an interval that covers all of the hex digits.
|
|
219
|
+
const digitIntervals = applyWrapper.children.slice(1, -1).map(d => d.source);
|
|
220
|
+
const fullInterval = digitIntervals[0].coverageWith(...digitIntervals.slice(1));
|
|
221
|
+
return createError(`U+${fullInterval.contents} is not a valid Unicode code point`, fullInterval);
|
|
222
|
+
}
|
|
223
|
+
|
|
212
224
|
// ----------------- Kleene operators -----------------
|
|
213
225
|
|
|
214
226
|
function kleeneExprHasNullableOperand(kleeneExpr, applicationStack) {
|
|
@@ -314,6 +326,7 @@ module.exports = {
|
|
|
314
326
|
inconsistentArity,
|
|
315
327
|
incorrectArgumentType,
|
|
316
328
|
intervalSourcesDontMatch,
|
|
329
|
+
invalidCodePoint,
|
|
317
330
|
invalidConstructorCall,
|
|
318
331
|
invalidParameter,
|
|
319
332
|
grammarSyntaxError,
|
package/src/main.js
CHANGED
|
@@ -226,8 +226,15 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
|
|
|
226
226
|
return c.visit();
|
|
227
227
|
},
|
|
228
228
|
|
|
229
|
-
|
|
230
|
-
|
|
229
|
+
escapeChar(c) {
|
|
230
|
+
try {
|
|
231
|
+
return common.unescapeCodePoint(this.sourceString);
|
|
232
|
+
} catch (err) {
|
|
233
|
+
if (err instanceof RangeError && err.message.startsWith('Invalid code point ')) {
|
|
234
|
+
throw errors.invalidCodePoint(c);
|
|
235
|
+
}
|
|
236
|
+
throw err; // Rethrow
|
|
237
|
+
}
|
|
231
238
|
},
|
|
232
239
|
|
|
233
240
|
NonemptyListOf(x, _, xs) {
|
package/src/ohm-grammar.ohm
CHANGED
|
@@ -102,7 +102,8 @@ Ohm {
|
|
|
102
102
|
| "\\n" -- lineFeed
|
|
103
103
|
| "\\r" -- carriageReturn
|
|
104
104
|
| "\\t" -- tab
|
|
105
|
-
| "\\u{" hexDigit
|
|
105
|
+
| "\\u{" hexDigit hexDigit? hexDigit?
|
|
106
|
+
hexDigit? hexDigit? hexDigit? "}" -- unicodeCodePoint
|
|
106
107
|
| "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
|
|
107
108
|
| "\\x" hexDigit hexDigit -- hexEscape
|
|
108
109
|
|
package/src/pexprs-eval.js
CHANGED
|
@@ -77,11 +77,12 @@ pexprs.Range.prototype.eval = function(state) {
|
|
|
77
77
|
const {inputStream} = state;
|
|
78
78
|
const origPos = inputStream.pos;
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
inputStream.nextCharCode();
|
|
80
|
+
// A range can operate in one of two modes: matching a single, 16-bit _code unit_,
|
|
81
|
+
// or matching a _code point_. (Code points over 0xFFFF take up two 16-bit code units.)
|
|
82
|
+
const cp = this.matchCodePoint ? inputStream.nextCodePoint() : inputStream.nextCharCode();
|
|
84
83
|
|
|
84
|
+
// Always compare by code point value to get the correct result in all scenarios.
|
|
85
|
+
// Note that for strings of length 1, codePointAt(0) and charPointAt(0) are equivalent.
|
|
85
86
|
if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
|
|
86
87
|
state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
|
|
87
88
|
return true;
|
package/src/pexprs-main.js
CHANGED
|
@@ -53,6 +53,9 @@ class Range extends PExpr {
|
|
|
53
53
|
super();
|
|
54
54
|
this.from = from;
|
|
55
55
|
this.to = to;
|
|
56
|
+
// If either `from` or `to` is made up of multiple code units, then
|
|
57
|
+
// the range should consume a full code point, not a single code unit.
|
|
58
|
+
this.matchCodePoint = from.length > 1 || to.length > 1;
|
|
56
59
|
}
|
|
57
60
|
}
|
|
58
61
|
|