ohm-js 16.2.0-pre.esm → 16.3.0-dev.unicode-code-point-escape

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ohm-js",
3
- "version": "16.2.0-pre.esm",
3
+ "version": "16.3.0-dev.unicode-code-point-escape",
4
4
  "description": "An object-oriented language for parsing and pattern matching",
5
5
  "repository": "https://github.com/harc/ohm",
6
6
  "keywords": [
@@ -29,6 +29,21 @@ InputStream.prototype = {
29
29
  return ans;
30
30
  },
31
31
 
32
+ nextCharCode() {
33
+ const nextChar = this.next();
34
+ return nextChar && nextChar.charCodeAt(0);
35
+ },
36
+
37
+ nextCodePoint() {
38
+ const cp = this.source.slice(this.pos++).codePointAt(0);
39
+ // If the code point is beyond plane 0, it takes up two characters.
40
+ if (cp > 0xffff) {
41
+ this.pos += 1;
42
+ }
43
+ this.examinedLength = Math.max(this.examinedLength, this.pos);
44
+ return cp;
45
+ },
46
+
32
47
  matchString(s, optIgnoreCase) {
33
48
  let idx;
34
49
  if (optIgnoreCase) {
package/src/common.js CHANGED
@@ -149,6 +149,8 @@ exports.escapeChar = function(c, optDelim) {
149
149
  }
150
150
  };
151
151
 
152
+ const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
153
+
152
154
  exports.unescapeChar = function(s) {
153
155
  if (s.charAt(0) === '\\') {
154
156
  switch (s.charAt(1)) {
@@ -165,9 +167,11 @@ exports.unescapeChar = function(s) {
165
167
  case 'v':
166
168
  return '\v';
167
169
  case 'x':
168
- return String.fromCharCode(parseInt(s.substring(2, 4), 16));
170
+ return escapeUnicode(s.slice(2, 4));
169
171
  case 'u':
170
- return String.fromCharCode(parseInt(s.substring(2, 6), 16));
172
+ return s.charAt(2) === '{' ?
173
+ escapeUnicode(s.slice(3, -1)) :
174
+ escapeUnicode(s.slice(2, 6));
171
175
  default:
172
176
  return s.charAt(1);
173
177
  }
package/src/main.js CHANGED
@@ -230,10 +230,6 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
230
230
  return common.unescapeChar(this.sourceString);
231
231
  },
232
232
 
233
- escapeChar(_) {
234
- return this.sourceString;
235
- },
236
-
237
233
  NonemptyListOf(x, _, xs) {
238
234
  return [x.visit()].concat(xs.children.map(c => c.visit()));
239
235
  },
@@ -92,7 +92,7 @@ Ohm {
92
92
 
93
93
  terminalChar
94
94
  = escapeChar
95
- | ~"\\" ~"\"" ~"\n" any
95
+ | ~"\\" ~"\"" ~"\n" "\u{0}".."\u{10FFFF}"
96
96
 
97
97
  escapeChar (an escape sequence)
98
98
  = "\\\\" -- backslash
@@ -102,6 +102,7 @@ Ohm {
102
102
  | "\\n" -- lineFeed
103
103
  | "\\r" -- carriageReturn
104
104
  | "\\t" -- tab
105
+ | "\\u{" hexDigit+ "}" -- unicodeCodePoint
105
106
  | "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
106
107
  | "\\x" hexDigit hexDigit -- hexEscape
107
108
 
@@ -76,9 +76,14 @@ pexprs.Terminal.prototype.eval = function(state) {
76
76
  pexprs.Range.prototype.eval = function(state) {
77
77
  const {inputStream} = state;
78
78
  const origPos = inputStream.pos;
79
- const ch = inputStream.next();
80
- if (ch && this.from <= ch && ch <= this.to) {
81
- state.pushBinding(new TerminalNode(state.grammar, ch), origPos);
79
+
80
+ const cp =
81
+ this.from.length > 1 || this.to.length > 1 ?
82
+ inputStream.nextCodePoint() :
83
+ inputStream.nextCharCode();
84
+
85
+ if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
86
+ state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
82
87
  return true;
83
88
  } else {
84
89
  state.processFailure(origPos, this);