ohm-js 16.2.0 → 16.3.0-dev.unicode-code-point-escape

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ohm-js",
3
- "version": "16.2.0",
3
+ "version": "16.3.0-dev.unicode-code-point-escape",
4
4
  "description": "An object-oriented language for parsing and pattern matching",
5
5
  "repository": "https://github.com/harc/ohm",
6
6
  "keywords": [
@@ -29,6 +29,21 @@ InputStream.prototype = {
29
29
  return ans;
30
30
  },
31
31
 
32
+ nextCharCode() {
33
+ const nextChar = this.next();
34
+ return nextChar && nextChar.charCodeAt(0);
35
+ },
36
+
37
+ nextCodePoint() {
38
+ const cp = this.source.slice(this.pos++).codePointAt(0);
39
+ // If the code point is beyond plane 0, it takes up two characters.
40
+ if (cp > 0xffff) {
41
+ this.pos += 1;
42
+ }
43
+ this.examinedLength = Math.max(this.examinedLength, this.pos);
44
+ return cp;
45
+ },
46
+
32
47
  matchString(s, optIgnoreCase) {
33
48
  let idx;
34
49
  if (optIgnoreCase) {
package/src/common.js CHANGED
@@ -149,6 +149,8 @@ exports.escapeChar = function(c, optDelim) {
149
149
  }
150
150
  };
151
151
 
152
+ const escapeUnicode = str => String.fromCodePoint(parseInt(str, 16));
153
+
152
154
  exports.unescapeChar = function(s) {
153
155
  if (s.charAt(0) === '\\') {
154
156
  switch (s.charAt(1)) {
@@ -165,9 +167,11 @@ exports.unescapeChar = function(s) {
165
167
  case 'v':
166
168
  return '\v';
167
169
  case 'x':
168
- return String.fromCharCode(parseInt(s.substring(2, 4), 16));
170
+ return escapeUnicode(s.slice(2, 4));
169
171
  case 'u':
170
- return String.fromCharCode(parseInt(s.substring(2, 6), 16));
172
+ return s.charAt(2) === '{' ?
173
+ escapeUnicode(s.slice(3, -1)) :
174
+ escapeUnicode(s.slice(2, 6));
171
175
  default:
172
176
  return s.charAt(1);
173
177
  }
package/src/main.js CHANGED
@@ -230,10 +230,6 @@ function buildGrammar(match, namespace, optOhmGrammarForTesting) {
230
230
  return common.unescapeChar(this.sourceString);
231
231
  },
232
232
 
233
- escapeChar(_) {
234
- return this.sourceString;
235
- },
236
-
237
233
  NonemptyListOf(x, _, xs) {
238
234
  return [x.visit()].concat(xs.children.map(c => c.visit()));
239
235
  },
@@ -92,7 +92,7 @@ Ohm {
92
92
 
93
93
  terminalChar
94
94
  = escapeChar
95
- | ~"\\" ~"\"" ~"\n" any
95
+ | ~"\\" ~"\"" ~"\n" "\u{0}".."\u{10FFFF}"
96
96
 
97
97
  escapeChar (an escape sequence)
98
98
  = "\\\\" -- backslash
@@ -102,6 +102,7 @@ Ohm {
102
102
  | "\\n" -- lineFeed
103
103
  | "\\r" -- carriageReturn
104
104
  | "\\t" -- tab
105
+ | "\\u{" hexDigit+ "}" -- unicodeCodePoint
105
106
  | "\\u" hexDigit hexDigit hexDigit hexDigit -- unicodeEscape
106
107
  | "\\x" hexDigit hexDigit -- hexEscape
107
108
 
@@ -76,9 +76,14 @@ pexprs.Terminal.prototype.eval = function(state) {
76
76
  pexprs.Range.prototype.eval = function(state) {
77
77
  const {inputStream} = state;
78
78
  const origPos = inputStream.pos;
79
- const ch = inputStream.next();
80
- if (ch && this.from <= ch && ch <= this.to) {
81
- state.pushBinding(new TerminalNode(state.grammar, ch), origPos);
79
+
80
+ const cp =
81
+ this.from.length > 1 || this.to.length > 1 ?
82
+ inputStream.nextCodePoint() :
83
+ inputStream.nextCharCode();
84
+
85
+ if (cp !== undefined && this.from.codePointAt(0) <= cp && cp <= this.to.codePointAt(0)) {
86
+ state.pushBinding(new TerminalNode(state.grammar, String.fromCodePoint(cp)), origPos);
82
87
  return true;
83
88
  } else {
84
89
  state.processFailure(origPos, this);
package/README.md DELETED
@@ -1,198 +0,0 @@
1
- # [Ohm](https://ohmlang.github.io/) &middot; [![NPM](https://img.shields.io/npm/v/ohm-js.svg)](https://www.npmjs.com/package/ohm-js) ![Node.js CI](https://github.com/harc/ohm/workflows/Node.js%20CI/badge.svg?style=flat-square) [![Chat on Discord](https://img.shields.io/badge/chat-on%20discord-7289da.svg?sanitize=true)](https://discord.gg/KwxY5gegRQ)
2
-
3
- Ohm is a parsing toolkit consisting of a library and a domain-specific language. You can use it to parse custom file formats or quickly build parsers, interpreters, and compilers for programming languages.
4
-
5
- The _Ohm language_ is based on [parsing expression grammars](http://en.wikipedia.org/wiki/Parsing_expression_grammar)
6
- (PEGs), which are a formal way of describing syntax, similar to regular expressions and context-free
7
- grammars. The _Ohm library_ provides a JavaScript interface for creating parsers, interpreters, and
8
- more from the grammars you write.
9
-
10
- - **Full support for left-recursive rules** means that you can define left-associative operators in a natural way.
11
- - **Object-oriented grammar extension** makes it easy to extend an existing language with new syntax.
12
- - **Modular semantic actions.** Unlike many similar tools, Ohm completely
13
- separates grammars from semantic actions. This separation improves modularity and extensibility, and makes both grammars and semantic actions easier to read and understand.
14
- - **Online editor and visualizer.** The [Ohm Editor](https://ohmlang.github.io/editor/) provides instant feedback and an [interactive visualization](https://dubroy.com/blog/visualizing-packrat-parsing/) that makes the entire execution of the parser visible and tangible. It'll [make you feel like you have superpowers](https://twitter.com/kylestetz/status/1349770893120172036). 💪
15
-
16
- Some awesome things people have built using Ohm:
17
-
18
- - [Seymour](https://harc.github.io/seymour-live2017/), a live programming environment for the classroom.
19
- - [Shadama](https://tinlizzie.org/~ohshima/shadama2/live2017/), a particle simulation language designed for high-school science.
20
- - [turtle.audio](http://turtle.audio/), an audio environment where simple text commands generate lines that can play music.
21
- - A [browser-based tool](https://www.arthurcarabott.com/konnakkol/) that turns written _Konnakkol_ (a South Indian vocal percussion art) into audio.
22
- - [Wildcard](https://www.geoffreylitt.com/wildcard/), a browser extension that empowers anyone to modify websites to meet their own specific needs, uses Ohm for its spreadsheet formulas.
23
-
24
- ## Getting Started
25
-
26
- The easiest way to get started with Ohm is to use the [interactive editor](https://ohmlang.github.io/editor/). Alternatively, you can play with one of the following examples on JSFiddle:
27
-
28
- - [Basic parsing example](https://jsfiddle.net/pdubroy/p3b1v2xb/)
29
- - [Arithmetic example with semantics](https://jsfiddle.net/pdubroy/15k63qae/)
30
-
31
- ### Resources
32
-
33
- - Tutorial: [Ohm: Parsing Made Easy](https://nextjournal.com/dubroy/ohm-parsing-made-easy)
34
- - The [math example](examples/math/index.html) is extensively commented and is a good way to dive deeper.
35
- - [Examples](examples/)
36
- - [Documentation](doc/README.md)
37
- - For community support and discussion, join us on [Discord](https://discord.gg/KwxY5gegRQ), [GitHub Discussions](https://github.com/harc/ohm/discussions), or the [ohm-discuss mailing list](https://groups.google.com/u/0/g/ohm-discuss).
38
- - For updates, follow [@\_ohmjs on Twitter](https://twitter.com/_ohmjs).
39
-
40
- ### Installation
41
-
42
- For use in the browser:
43
-
44
- - Download [ohm.js](https://unpkg.com/ohm-js@latest/dist/ohm.js) (development version, with full source and comments) or [ohm.min.js](https://unpkg.com/ohm-js@latest/dist/ohm.min.js) (a minified version for faster page loads).
45
- - Add a new script tag to your page, and set the `src` attribute to the path of the file you just downloaded. E.g.:
46
-
47
- ```html
48
- <script src="ohm.js"></script>
49
- ```
50
-
51
- This creates a global variable named `ohm`.
52
-
53
- If you are using Node.js, you can just install the `ohm-js` package using [npm](http://npmjs.org):
54
-
55
- npm install ohm-js
56
-
57
- This will install Ohm in the local node_modules folder. Use `require` to access it from a Node script:
58
-
59
- <!-- @markscript
60
- markscript.transformNextBlock(s => s.replace('const ', 'var '));
61
- -->
62
-
63
- ```js
64
- const ohm = require('ohm-js');
65
- ```
66
-
67
- ### Basics
68
-
69
- #### Defining Grammars
70
-
71
- ![Instantiating a grammar](http://harc.github.io//ohm/doc/images/instantiating-grammars.png)
72
-
73
- To use Ohm, you need a grammar that is written in the Ohm language. The grammar provides a formal
74
- definition of the language or data format that you want to parse. There are a few different ways
75
- you can define an Ohm grammar:
76
-
77
- - The simplest option is to define the grammar directly in a JavaScript string and instantiate it
78
- using `ohm.grammar()`. In most cases, you should use a [template literal with String.raw](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/raw):
79
-
80
- ```js
81
- const myGrammar = ohm.grammar(String.raw`
82
- MyGrammar {
83
- greeting = "Hello" | "Hola"
84
- }
85
- `);
86
- ```
87
-
88
- - **In Node.js**, you can define the grammar in a separate file, and read the file's contents and instantiate it using `ohm.grammar(contents)`:
89
-
90
- In `myGrammar.ohm`:
91
-
92
- MyGrammar {
93
- greeting = "Hello" | "Hola"
94
- }
95
-
96
- In JavaScript:
97
-
98
- ```js
99
- const fs = require('fs');
100
- const ohm = require('ohm-js');
101
- const contents = fs.readFileSync('myGrammar.ohm', 'utf-8');
102
- const myGrammar = ohm.grammar(contents);
103
- ```
104
-
105
- For more information, see [Instantiating Grammars](doc/api-reference.md#instantiating-grammars) in the API reference.
106
-
107
- #### Using Grammars
108
-
109
- ![Matching input](http://harc.github.io/ohm/doc/images/matching.png)
110
-
111
- <!-- @markscript
112
- // The duplication here is required because Markscript only executes top-level code blocks.
113
- // TODO: Consider fixing this in Markscript.
114
- const myGrammar = ohm.grammar('MyGrammar { greeting = "Hello" | "Hola" }');
115
- -->
116
-
117
- Once you've instantiated a grammar object, use the grammar's `match()` method to recognize input:
118
-
119
- ```js
120
- const userInput = 'Hello';
121
- const m = myGrammar.match(userInput);
122
- if (m.succeeded()) {
123
- console.log('Greetings, human.');
124
- } else {
125
- console.log("That's not a greeting!");
126
- }
127
- ```
128
-
129
- The result is a MatchResult object. You can use the `succeeded()` and `failed()` methods to see whether the input was recognized or not.
130
-
131
- For more information, see the [main documentation](doc/README.md).
132
-
133
- ### Debugging
134
-
135
- Ohm has two tools to help you debug grammars: a text trace, and a graphical visualizer.
136
-
137
- [![Ohm Visualizer](http://harc.github.io/ohm/doc/images/visualizer-small.png)](https://ohmlang.github.io/editor)
138
-
139
- You can [try the visualizer online](https://ohmlang.github.io/editor).
140
-
141
- To see the text trace for a grammar `g`, just use the [`g.trace()`](./doc/api-reference.md#trace)
142
- method instead of `g.match`. It takes the same arguments, but instead of returning a MatchResult
143
- object, it returns a Trace object — calling its `toString` method returns a string describing
144
- all of the decisions the parser made when trying to match the input. For example, here is the
145
- result of `g.trace('ab').toString()` for the grammar `G { start = letter+ }`:
146
-
147
- <!-- @markscript
148
- markscript.transformNextBlock(function(code) {
149
- const trace = ohm.grammar('G { start = letter+ }').trace('ab');
150
- assert.equal(trace.toString().trim(), code.trim());
151
- });
152
- -->
153
-
154
- ```
155
- ab ✓ start ⇒ "ab"
156
- ab ✓ letter+ ⇒ "ab"
157
- ab ✓ letter ⇒ "a"
158
- ab ✓ lower ⇒ "a"
159
- ab ✓ Unicode [Ll] character ⇒ "a"
160
- b ✓ letter ⇒ "b"
161
- b ✓ lower ⇒ "b"
162
- b ✓ Unicode [Ll] character ⇒ "b"
163
- ✗ letter
164
- ✗ lower
165
- ✗ Unicode [Ll] character
166
- ✗ upper
167
- ✗ Unicode [Lu] character
168
- ✗ unicodeLtmo
169
- ✗ Unicode [Ltmo] character
170
- ✓ end ⇒ ""
171
- ```
172
-
173
- ## Publishing Grammars
174
-
175
- If you've written an Ohm grammar that you'd like to share with others, see
176
- our [suggestions for publishing grammars](./doc/publishing-grammars.md).
177
-
178
- ## Contributing to Ohm
179
-
180
- All you need to get started:
181
-
182
- git clone https://github.com/harc/ohm.git
183
- cd ohm
184
- npm install
185
-
186
- **NOTE:** We recommend using the latest Node.js stable release.
187
-
188
- ### Some useful scripts
189
-
190
- - `npm test` runs the unit tests.
191
- - `npm run test-watch` re-runs the unit tests every time a file changes.
192
- - `npm run build` builds [dist/ohm.js](./dist/ohm.js) and [dist/ohm.min.js](./dist/ohm.min.js),
193
- which are stand-alone bundles that can be included in a webpage.
194
- - When editing Ohm's own grammar (in `src/ohm-grammar.ohm`), run `npm run bootstrap` to re-build Ohm
195
- and test your changes.
196
-
197
- Before submitting a pull request, be sure to add tests, and ensure that `npm run prepublish` runs
198
- without errors.