grammy 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +314 -0
- data/docs/CHANGELOG.md +15 -0
- data/docs/LICENSE.md +21 -0
- data/docs/TODO.md +157 -0
- data/lib/extensions/debugging.rb +46 -0
- data/lib/grammy/ast/transformer.rb +25 -0
- data/lib/grammy/ast.rb +27 -0
- data/lib/grammy/combinator/primitives.rb +41 -0
- data/lib/grammy/errors.rb +3 -0
- data/lib/grammy/grammar.rb +66 -0
- data/lib/grammy/location.rb +27 -0
- data/lib/grammy/match.rb +19 -0
- data/lib/grammy/matcher/alternative.rb +22 -0
- data/lib/grammy/matcher/eof.rb +17 -0
- data/lib/grammy/matcher/eol.rb +17 -0
- data/lib/grammy/matcher/regexp.rb +16 -0
- data/lib/grammy/matcher/repetition.rb +22 -0
- data/lib/grammy/matcher/sequence.rb +16 -0
- data/lib/grammy/matcher/sof.rb +17 -0
- data/lib/grammy/matcher/sol.rb +17 -0
- data/lib/grammy/matcher/string.rb +16 -0
- data/lib/grammy/matcher.rb +17 -0
- data/lib/grammy/parse_tree.rb +11 -0
- data/lib/grammy/scanner.rb +75 -0
- data/lib/grammy/token.rb +28 -0
- data/lib/grammy/tree/transformation.rb +31 -0
- data/lib/grammy/tree.rb +28 -0
- data/lib/grammy.rb +9 -0
- metadata +74 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: d714a645eea4ccadea369eafd8adf22ced89afc16b114fc80fa913bd5d06b73a
|
|
4
|
+
data.tar.gz: 480c0b3c9eea43baf7702359ae8b27105c44ae87d8f88d3c86a56101b8c77b16
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: f1bcba79afd6e4280db5be638510576fdd810feb6628f58ab68fb7666dcb5b58ef8cbeacc03ae03b4104af4a8b6d477ec6df90a5b539b4d2265016b261ae6a0d
|
|
7
|
+
data.tar.gz: e67c989b6a0759a0d189c782b8cb5b17fb8737e210bd373ce76dd6a9fe219a3c3ac28c276e2937537a7bd0b752dd8f5514c41db6727c2f06b259b193f861d3fd
|
data/README.md
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# Grammy
|
|
2
|
+
|
|
3
|
+
Grammy is a tool for generating parsers.
|
|
4
|
+
You describe the language with a grammar written in a Ruby [DSL], similar to [EBNF].
|
|
5
|
+
Grammy dynamically generates a parser from that description.
|
|
6
|
+
You can then use the parser to parse strings into a [parse tree],
|
|
7
|
+
and create an [AST] from the parse tree.
|
|
8
|
+
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
[](https://github.com/stone-lang/stone/blob/master/src/stone/version.rb)
|
|
11
|
+
[](https://rubygems.org/gems/grammy)
|
|
12
|
+
|
|
13
|
+
## ToC
|
|
14
|
+
|
|
15
|
+
- [Features](#features)
|
|
16
|
+
- [Usage](#usage)
|
|
17
|
+
- [Combinators](#combinators)
|
|
18
|
+
- [Parse Tree](#parse-tree)
|
|
19
|
+
- [AST](#ast)
|
|
20
|
+
- [Tests](#tests)
|
|
21
|
+
- [License](#license)
|
|
22
|
+
- [Contributing](#contributing)
|
|
23
|
+
- [Changelog](./docs/CHANGELOG.md)
|
|
24
|
+
- [TODO](./docs/TODO.md)
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
Grammy implements a [PEG] (Parsing Expression Grammar) parser.
|
|
29
|
+
But that's an implementation detail, possibly subject to change,
|
|
30
|
+
if something better comes along (LPEG, GLR, etc).
|
|
31
|
+
PEG parsers are quick (using "packrat" caching) and easy to use.
|
|
32
|
+
They easily handle ambiguous grammars, left or right recursion, and infinite lookahead.
|
|
33
|
+
|
|
34
|
+
You can use Grammy to parse complex languages.
|
|
35
|
+
I'm going to be using it to write a full general-purpose programming language.
|
|
36
|
+
But you can also use it to parse simpler languages, like:
|
|
37
|
+
|
|
38
|
+
- JSON
|
|
39
|
+
- CSV
|
|
40
|
+
- external DSLs
|
|
41
|
+
- configuration files
|
|
42
|
+
- data formats
|
|
43
|
+
- HTTP headers
|
|
44
|
+
|
|
45
|
+
Basically, if you have an [EBNF] grammar, it should be easy to use Grammy to parse it.
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
~~~ ruby
|
|
50
|
+
require "grammy"
|
|
51
|
+
require "arithmetic" # Your grammar file, as below.
|
|
52
|
+
|
|
53
|
+
input = "1+2*3"
|
|
54
|
+
parse_tree = Arithmetic.parse(input)
|
|
55
|
+
ast = parse_tree.ast
|
|
56
|
+
~~~
|
|
57
|
+
|
|
58
|
+
I'm still experimenting with a few different DSL syntaxes for the grammar.
|
|
59
|
+
|
|
60
|
+
### Class Methods
|
|
61
|
+
|
|
62
|
+
NOTE: This is the only syntax currently implemented.
|
|
63
|
+
|
|
64
|
+
~~~ ruby
|
|
65
|
+
require 'grammy'
|
|
66
|
+
|
|
67
|
+
class Arithmetic < Grammy::Grammar
|
|
68
|
+
# Specify which rule to start with.
|
|
69
|
+
start :expression
|
|
70
|
+
|
|
71
|
+
# Define the rules.
|
|
72
|
+
rule(:expression) { term + (plus + term)[0..] }
|
|
73
|
+
rule(:term) { factor + (times + factor)[0..] }
|
|
74
|
+
rule(:factor) { number | parens(expression) }
|
|
75
|
+
terminal(:number) { /\d+/ } # You can also use `token` instead of `terminal`.
|
|
76
|
+
terminal(:plus) { "+" } # A terminal rule matches a string or regex, without needing `str` or `reg`.
|
|
77
|
+
terminal(:times) { "*" } # Other than that, it works like a normal rule.
|
|
78
|
+
|
|
79
|
+
# Define any custom combinators.
|
|
80
|
+
def parens(exp) = str("(") + expression + str(")")
|
|
81
|
+
end
|
|
82
|
+
~~~
|
|
83
|
+
|
|
84
|
+
### Decorated Methods
|
|
85
|
+
|
|
86
|
+
NOTE: This syntax has not been implemented yet.
|
|
87
|
+
|
|
88
|
+
~~~ ruby
|
|
89
|
+
require 'grammy'
|
|
90
|
+
|
|
91
|
+
class Arithmetic < Grammy::Grammar
|
|
92
|
+
# Specify which rule to start with.
|
|
93
|
+
start :expression
|
|
94
|
+
|
|
95
|
+
# Define the rules.
|
|
96
|
+
rule def expression = term + (plus + term)[0..]
|
|
97
|
+
rule def term = factor + (times + factor)[0..]
|
|
98
|
+
rule def factor = number | parens(expression)
|
|
99
|
+
terminal def number = /\d+/
|
|
100
|
+
terminal def plus = "+"
|
|
101
|
+
terminal def times = "*"
|
|
102
|
+
|
|
103
|
+
# Define any custom combinators.
|
|
104
|
+
def parens(exp) = str("(") + expression + str(")")
|
|
105
|
+
end
|
|
106
|
+
~~~
|
|
107
|
+
|
|
108
|
+
### Instance Methods
|
|
109
|
+
|
|
110
|
+
NOTE: This syntax has not been implemented yet.
|
|
111
|
+
|
|
112
|
+
~~~ ruby
|
|
113
|
+
require 'grammy'
|
|
114
|
+
|
|
115
|
+
class Arithmetic < Grammy::Grammar
|
|
116
|
+
# Specify which rule to start with.
|
|
117
|
+
start :expression
|
|
118
|
+
|
|
119
|
+
# Define the rules.
|
|
120
|
+
def expression = rule { term + (str("+") + term)[0..] }
|
|
121
|
+
def term = rule { factor + (str("*") + factor)[0..] }
|
|
122
|
+
def factor = rule { number | parens(expression) }
|
|
123
|
+
def number = terminal { /\d+/ }
|
|
124
|
+
|
|
125
|
+
# Define any custom combinators.
|
|
126
|
+
def parens(exp) = str("(") + expression + str(")")
|
|
127
|
+
end
|
|
128
|
+
~~~
|
|
129
|
+
|
|
130
|
+
## Combinators
|
|
131
|
+
|
|
132
|
+
Grammy uses combinators to define the grammar.
|
|
133
|
+
Combinators are functions that take one or more parsers as arguments and return a new parser.
|
|
134
|
+
|
|
135
|
+
Only a few primitive combinators are needed.
|
|
136
|
+
|
|
137
|
+
### String
|
|
138
|
+
|
|
139
|
+
The `str` combinator is used to match a string; it has an alias of `lit` (for "literal").
|
|
140
|
+
|
|
141
|
+
~~~ ruby
|
|
142
|
+
str("return")
|
|
143
|
+
lit("return")
|
|
144
|
+
~~~
|
|
145
|
+
|
|
146
|
+
### Regex
|
|
147
|
+
|
|
148
|
+
The `reg` combinator is used to match a regular expression.
|
|
149
|
+
|
|
150
|
+
~~~ ruby
|
|
151
|
+
reg(/\d+/)
|
|
152
|
+
~~~
|
|
153
|
+
|
|
154
|
+
### Sequence
|
|
155
|
+
|
|
156
|
+
You use the `seq` combinator to specify a sequence of what to match.
|
|
157
|
+
|
|
158
|
+
This example matches a sequence of a term, a plus sign, and another term.
|
|
159
|
+
|
|
160
|
+
~~~ ruby
|
|
161
|
+
seq(term, str("+"), term)
|
|
162
|
+
~~~
|
|
163
|
+
|
|
164
|
+
### Alternatives
|
|
165
|
+
|
|
166
|
+
The `alt` combinator is used to specify alternatives (multiple choices) of what to match.
|
|
167
|
+
|
|
168
|
+
This example matches either a plus sign or a minus sign.
|
|
169
|
+
|
|
170
|
+
~~~ ruby
|
|
171
|
+
alt(str("+"), str("-"))
|
|
172
|
+
~~~
|
|
173
|
+
|
|
174
|
+
> [!NOTE]
|
|
175
|
+
> In practical use, this would most likely be done with a single `reg` call:
|
|
176
|
+
>
|
|
177
|
+
> ~~~ ruby
|
|
178
|
+
> reg(/[+-]/)
|
|
179
|
+
> ~~~
|
|
180
|
+
|
|
181
|
+
### Repetition
|
|
182
|
+
|
|
183
|
+
The `rep` combinator is used to specify repetition of what to match.
|
|
184
|
+
You can specify the minimum and maximum number of repetitions, using a range.
|
|
185
|
+
|
|
186
|
+
This example matches one or more digits.
|
|
187
|
+
|
|
188
|
+
~~~ ruby
|
|
189
|
+
rep(reg(/\d+/), 1..)
|
|
190
|
+
~~~
|
|
191
|
+
|
|
192
|
+
### Operator DSL
|
|
193
|
+
|
|
194
|
+
You can use `+`, `|`, and `[]` operators in place of the named combinators.
|
|
195
|
+
The `+` operator can be used in place of the `seq` combinator.
|
|
196
|
+
The `|` operator can be used in place of the `alt` combinator.
|
|
197
|
+
The `[]` operator can be used in place of the `rep` combinator.
|
|
198
|
+
|
|
199
|
+
For example, the following two lines are equivalent:
|
|
200
|
+
|
|
201
|
+
~~~ ruby
|
|
202
|
+
seq(alt(term, expr), str("+"), rep(term, 0..1))
|
|
203
|
+
(term | expr) + lit("+") + term[0..1]
|
|
204
|
+
~~~
|
|
205
|
+
|
|
206
|
+
### Start/End of Line/File
|
|
207
|
+
|
|
208
|
+
The `eol` and `sol` combinators match the end of a line and the start of a line.
|
|
209
|
+
The `eof` and `sof` combinators match the end of a file and the start of a file.
|
|
210
|
+
|
|
211
|
+
## Whitespace
|
|
212
|
+
|
|
213
|
+
The `wsp` combinator matches any whitespace characters, including tab, newline, carriage return, etc.
|
|
214
|
+
|
|
215
|
+
## Parse Tree
|
|
216
|
+
|
|
217
|
+
The internal nodes of the parse tree are ParseTree objects.
|
|
218
|
+
The leaves of the parse tree are Token objects.
|
|
219
|
+
|
|
220
|
+
The parse tree generated by the example above will look like this:
|
|
221
|
+
|
|
222
|
+
~~~ ruby
|
|
223
|
+
expected_parse_tree = ParseTree.new("expression", [
|
|
224
|
+
ParseTree.new("term", [
|
|
225
|
+
ParseTree.new("factor", [
|
|
226
|
+
ParseTree.new("number", [
|
|
227
|
+
Token.new("1")
|
|
228
|
+
])
|
|
229
|
+
])
|
|
230
|
+
]),
|
|
231
|
+
Token.new("+"),
|
|
232
|
+
ParseTree.new("term", [
|
|
233
|
+
ParseTree.new("factor", [
|
|
234
|
+
ParseTree.new("number", [
|
|
235
|
+
Token.new("2")
|
|
236
|
+
]),
|
|
237
|
+
Token.new("*"),
|
|
238
|
+
ParseTree.new("factor", [
|
|
239
|
+
ParseTree.new("number", [
|
|
240
|
+
Token.new("3")
|
|
241
|
+
])
|
|
242
|
+
])
|
|
243
|
+
])
|
|
244
|
+
])
|
|
245
|
+
~~~
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
## AST
|
|
249
|
+
|
|
250
|
+
An AST (Abstract Syntax Tree) can be generated from the parse tree.
|
|
251
|
+
Your AST base class can be a subclass of `Grammy::Tree`,
|
|
252
|
+
and use the tree transformation DSL to derive the AST from the parse tree.
|
|
253
|
+
|
|
254
|
+
Given the parse tree above, and this tree transformation DSL:
|
|
255
|
+
|
|
256
|
+
~~~ ruby
|
|
257
|
+
transform(:term) do |node|
|
|
258
|
+
left, op, right = node.children
|
|
259
|
+
AST::BinaryOp.new(op.text, [transform(left), transform(right)])
|
|
260
|
+
end
|
|
261
|
+
transform(:number) { |token| token.with(value: token.text.to_i) }
|
|
262
|
+
~~~
|
|
263
|
+
|
|
264
|
+
You'd end up with the following AST:
|
|
265
|
+
|
|
266
|
+
~~~ ruby
|
|
267
|
+
expected_ast = AST::Arithmetic.new(
|
|
268
|
+
AST::BinaryOp.new("+", [
|
|
269
|
+
AST::Number.new(1),
|
|
270
|
+
AST::BinaryOp.new("*", [
|
|
271
|
+
AST::Number.new(2),
|
|
272
|
+
AST::Number.new(3)
|
|
273
|
+
])
|
|
274
|
+
])
|
|
275
|
+
)
|
|
276
|
+
~~~
|
|
277
|
+
|
|
278
|
+
## Tests
|
|
279
|
+
|
|
280
|
+
~~~ shell
|
|
281
|
+
rspec
|
|
282
|
+
~~~
|
|
283
|
+
|
|
284
|
+
## License
|
|
285
|
+
|
|
286
|
+
Copyright (c) 2024-2025 by Craig Buchek and BoochTek, LLC.
|
|
287
|
+
|
|
288
|
+
This code is licensed under the MIT License.
|
|
289
|
+
See the [LICENSE] for the full details.
|
|
290
|
+
|
|
291
|
+
## Contributing
|
|
292
|
+
|
|
293
|
+
[PRs] and [issues] are welcome!
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
## Release
|
|
297
|
+
|
|
298
|
+
To release a new version, update the version in `lib/VERSION`,
|
|
299
|
+
then build the gem and publish it to RubyGems.
|
|
300
|
+
|
|
301
|
+
~~~ shell
|
|
302
|
+
make publish
|
|
303
|
+
~~~
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
[DSL]: https://en.wikipedia.org/wiki/Domain-specific_language
|
|
308
|
+
[EBNF]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form
|
|
309
|
+
[AST]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
|
310
|
+
[parse tree]: https://en.wikipedia.org/wiki/Parse_tree
|
|
311
|
+
[PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar
|
|
312
|
+
[License]: ./docs/LICENSE.md
|
|
313
|
+
[PRs]: https://github.com/stone-lang/grammy/pulls
|
|
314
|
+
[issues]: https://github.com/stone-lang/grammy/issues
|
data/docs/CHANGELOG.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 (TBD)
|
|
4
|
+
|
|
5
|
+
Initial "release".
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- README, including Usage section
|
|
10
|
+
- CHANGELOG
|
|
11
|
+
- Basic classes: `Grammar`, `Scanner`, `Parser`
|
|
12
|
+
|
|
13
|
+
## Notes
|
|
14
|
+
|
|
15
|
+
- Sections for each release (in order): Changed, Added, Removed, Updated, Fixed
|
data/docs/LICENSE.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Craig Buchek and BoochTek, LLC.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/docs/TODO.md
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# TODO
|
|
2
|
+
|
|
3
|
+
Here's a list of things I'd like to complete to make Grammy **great**.
|
|
4
|
+
|
|
5
|
+
## Scanner
|
|
6
|
+
|
|
7
|
+
- [ ] line continuation
|
|
8
|
+
- [ ] `\` followed immediately by newline (traditional)
|
|
9
|
+
- [ ] newline followed immediately by `\` (more visible)
|
|
10
|
+
- [ ] skip leading whitespace on following line
|
|
11
|
+
- [ ] optional per grammar
|
|
12
|
+
- [ ] optional per rule
|
|
13
|
+
- [ ] indentation
|
|
14
|
+
|
|
15
|
+
## Grammar
|
|
16
|
+
|
|
17
|
+
- [x] primitive combinators: `str` (AKA `lit`) and `reg`
|
|
18
|
+
- [x] primitive combinators: `seq`, `alt`, `rep`
|
|
19
|
+
- [x] primitive combinators: `eol`/`sol`, `eof`/`sof`
|
|
20
|
+
- [x] primitive combinators: `wsp`
|
|
21
|
+
- [x] add `_` to canonical names (where it makes sense)
|
|
22
|
+
- [ ] combinator aliases
|
|
23
|
+
- [ ] `zero_or_more` / `_any`
|
|
24
|
+
- [ ] `one_or_more` / `_some`
|
|
25
|
+
- [ ] `zero_or_one` / `optional` (and maybe `_opt`?)
|
|
26
|
+
- [x] rules
|
|
27
|
+
- [x] DSL syntax
|
|
28
|
+
- `rule(:expression) { term + (str("+") + term)[0..] }`
|
|
29
|
+
- [ ] try instance methods syntax
|
|
30
|
+
- `def expression = term + (str("+") + term)[0..]`
|
|
31
|
+
- [ ] try decorated methods syntax
|
|
32
|
+
- `rule def expression = term + (str("+") + term)[0..]`
|
|
33
|
+
- [x] start rule
|
|
34
|
+
- [x] defaults to first defined rule
|
|
35
|
+
- [x] `terminal` (AKA `token`) rules
|
|
36
|
+
- [ ] AST generation
|
|
37
|
+
- [x] tree transformation DSL
|
|
38
|
+
- [ ] AST builder
|
|
39
|
+
- [ ] actions in grammar rules
|
|
40
|
+
- [ ] user-defined combinators
|
|
41
|
+
- [ ] error handling
|
|
42
|
+
- [ ] error messages
|
|
43
|
+
- [ ] error recovery
|
|
44
|
+
- [ ] `fail`/`catch` combinators?
|
|
45
|
+
- [ ] packrat caching/memoization
|
|
46
|
+
- [ ] lookahead predicates
|
|
47
|
+
- [ ] "and" lookahead predicate
|
|
48
|
+
- [ ] "not" lookahead predicate
|
|
49
|
+
- [ ] "cut operator"
|
|
50
|
+
- [ ] combinator?
|
|
51
|
+
- [ ] automatic insertion
|
|
52
|
+
- [ ] automatic left recursion support
|
|
53
|
+
|
|
54
|
+
## Setup
|
|
55
|
+
|
|
56
|
+
- [x] git (or jj)
|
|
57
|
+
- [ ] root directory only contains worktrees (git --bare; git worktree add)
|
|
58
|
+
- [ ] and maybe support files (`.gitignore`, `.vscode`, etc)
|
|
59
|
+
- [ ] add git aliases for `worktree add -b` and `switch`
|
|
60
|
+
- [x] `.gitignore`
|
|
61
|
+
- [x] GitHub project
|
|
62
|
+
- [x] `.tool-versions`
|
|
63
|
+
- [x] `Gemfile`
|
|
64
|
+
- [x] `.rubocop.yml`
|
|
65
|
+
- [x] `.markdownlint.yml`
|
|
66
|
+
- [x] `.rspec`
|
|
67
|
+
- [ ] `.irbrc`
|
|
68
|
+
- [ ] `.pryrc`
|
|
69
|
+
|
|
70
|
+
## Documentation
|
|
71
|
+
|
|
72
|
+
- [x] comprehensive `README` (and keep it updated)
|
|
73
|
+
- [x] intro
|
|
74
|
+
- [x] table of contents
|
|
75
|
+
- [x] features
|
|
76
|
+
- [ ] installation
|
|
77
|
+
- [x] usage
|
|
78
|
+
- [x] tests
|
|
79
|
+
- [x] contributing
|
|
80
|
+
- [ ] badges (see [shields.io](https://shields.io/))
|
|
81
|
+
- [ ] version
|
|
82
|
+
- [x] license
|
|
83
|
+
- [ ] build status (https://github.com/OWNER/REPO/actions/workflows/WORKFLOW/badge.svg)
|
|
84
|
+
- [ ] test coverage
|
|
85
|
+
- [ ] dependencies status
|
|
86
|
+
- [x] `TODO` (and keep it updated)
|
|
87
|
+
- [ ] `CHANGELOG` (and keep it updated)
|
|
88
|
+
- [x] `LICENSE` file (and keep it updated)
|
|
89
|
+
- [ ] code of conduct
|
|
90
|
+
- [ ] FAQ
|
|
91
|
+
|
|
92
|
+
## Automation
|
|
93
|
+
|
|
94
|
+
- [x] `Makefile` for "standard" common tasks
|
|
95
|
+
- [x] `Rakefile` for "standard" common tasks for Ruby projects
|
|
96
|
+
- [ ] update version
|
|
97
|
+
- [ ] verify that CHANGELOG is updated
|
|
98
|
+
- [x] upload to RubyGems
|
|
99
|
+
- [ ] update dependencies
|
|
100
|
+
- [ ] Ruby
|
|
101
|
+
- [ ] gems
|
|
102
|
+
- [ ] RuboCop (see if any rules need updated config)
|
|
103
|
+
- [ ] bun
|
|
104
|
+
- [x] CI/CD setup (GitHub Actions)
|
|
105
|
+
- [x] linting
|
|
106
|
+
- [x] RuboCop
|
|
107
|
+
- [x] markdownlint
|
|
108
|
+
- [ ] Reek
|
|
109
|
+
- [ ] bundler-audit
|
|
110
|
+
- [ ] test coverage
|
|
111
|
+
- [ ] code quality metrics
|
|
112
|
+
- [ ] git hooks (pre-commit, pre-push, etc)
|
|
113
|
+
- [ ] linting
|
|
114
|
+
- [ ] tests
|
|
115
|
+
- [ ] security checks (bundle audit, etc)
|
|
116
|
+
- [ ] ISSUES_TEMPLATE
|
|
117
|
+
- [ ] PULL_REQUEST_TEMPLATE
|
|
118
|
+
- [ ] `.editorconfig`
|
|
119
|
+
|
|
120
|
+
## Specs
|
|
121
|
+
|
|
122
|
+
- [x] RSpec setup
|
|
123
|
+
- [ ] unit tests
|
|
124
|
+
- [x] scanner
|
|
125
|
+
- [ ] combinators
|
|
126
|
+
- [x] grammar
|
|
127
|
+
- [x] parse tree
|
|
128
|
+
- [ ] AST builder
|
|
129
|
+
- [ ] integration tests
|
|
130
|
+
- [x] parser
|
|
131
|
+
- [ ] parse tree
|
|
132
|
+
- [ ] grammar
|
|
133
|
+
- [ ] AST builder
|
|
134
|
+
- [ ] user-defined combinators
|
|
135
|
+
- [ ] rule actions
|
|
136
|
+
- [ ] performance tests
|
|
137
|
+
- [ ] benchmarks
|
|
138
|
+
- [ ] property (fuzzing) tests
|
|
139
|
+
|
|
140
|
+
## Gem
|
|
141
|
+
|
|
142
|
+
- [x] publish a gem
|
|
143
|
+
- [x] `gemspec`
|
|
144
|
+
- [x] upload to RubyGems
|
|
145
|
+
- [ ] CLI
|
|
146
|
+
- [ ] `bin/grammy`
|
|
147
|
+
- [ ] install with the gem
|
|
148
|
+
- [ ] `--help`
|
|
149
|
+
- [ ] `--version`
|
|
150
|
+
- [ ] `--json` (use JSON nesting for parse tree nesting)
|
|
151
|
+
- [ ] `<grammar_file>` to output a serialized parse tree, with input from `STDIN`
|
|
152
|
+
|
|
153
|
+
## Support
|
|
154
|
+
|
|
155
|
+
- [ ] VS Code configuration
|
|
156
|
+
- [ ] tasks (`launch.json`)
|
|
157
|
+
- [ ] recommended extensions
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# The idea here is to allow `binding.break` and `binding.pry` to work
|
|
2
|
+
# only when running tests or when the DEBUG environment variable is set,
|
|
3
|
+
# and not have RuboCop whine about it.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
require "debug/config"
|
|
7
|
+
DEBUGGER__::CONFIG[:skip_path] = Array(DEBUGGER__::CONFIG[:skip_path]) + [binding.source_location.first]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Binding
|
|
11
|
+
|
|
12
|
+
def break!
|
|
13
|
+
return unless should_debug?
|
|
14
|
+
require "debug"
|
|
15
|
+
require "binding_of_caller"
|
|
16
|
+
caller_binding = binding.of_caller(1)
|
|
17
|
+
# FIXME: This causes the breakpoint to happen **here**, instead of in the caller.
|
|
18
|
+
# The `skip_path` option SHOULD fix it, but I couldn't get it to work at all.
|
|
19
|
+
# I've added the pre-command `up` as a work-around.
|
|
20
|
+
# But it still shows **this** source first, before showing the "correct" location.
|
|
21
|
+
# TODO: File a bug at https://github.com/ruby/debug/issues, requesting parity with Pry.
|
|
22
|
+
caller_binding.break(pre: "up ;; list")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def pry!
|
|
26
|
+
return unless should_debug?
|
|
27
|
+
require "pry"
|
|
28
|
+
require "binding_of_caller"
|
|
29
|
+
caller_binding = binding.of_caller(1)
|
|
30
|
+
caller_binding.pry
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# TODO: Consider moving these elsewhere.
|
|
34
|
+
private def should_debug?
|
|
35
|
+
debug_enabled? || ((defined?(RSpec) || defined?(Minitest)) && !debug_disabled?)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private def debug_enabled?
|
|
39
|
+
ENV.fetch("DEBUG", "0") != "0"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private def debug_disabled?
|
|
43
|
+
ENV.fetch("DEBUG", "1") == "0"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# require "grammy/tree"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# # The purpose of the Transformer is to provide a simpler DSL within `transform` blocks.
|
|
5
|
+
# # If the `transform` is for a ParseTree, you can use `name`, `children`, and `build(child)`.
|
|
6
|
+
# # If the `transform` is for a Token, you can use `name`, `token`, and `text`.
|
|
7
|
+
# module Grammy
|
|
8
|
+
# class AST < Tree
|
|
9
|
+
# class Transformer
|
|
10
|
+
|
|
11
|
+
# def initialize(node, node_class)
|
|
12
|
+
# @node = node
|
|
13
|
+
# @node_class = node_class
|
|
14
|
+
# end
|
|
15
|
+
|
|
16
|
+
# def build(child) = @node_class.build(child)
|
|
17
|
+
|
|
18
|
+
# def name = @node.name.to_sym
|
|
19
|
+
# def children = @node.children
|
|
20
|
+
# def token = @node
|
|
21
|
+
# def text = token.text
|
|
22
|
+
|
|
23
|
+
# end
|
|
24
|
+
# end
|
|
25
|
+
# end
|
data/lib/grammy/ast.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require "grammy/tree"
|
|
2
|
+
require "grammy/ast/transformer"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module Grammy
|
|
6
|
+
class AST < Tree
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
def transform(name, &blk) = transforms[name.to_sym] = blk
|
|
10
|
+
|
|
11
|
+
# The `node` will be a ParseTree or a Token. The code can handle other trees and leaves though.
|
|
12
|
+
def build(node)
|
|
13
|
+
blk = transforms[node.name.to_sym]
|
|
14
|
+
if blk
|
|
15
|
+
Transformer.new(node, self).instance_exec(&blk)
|
|
16
|
+
elsif node.respond_to?(:children)
|
|
17
|
+
node.children.flat_map(&method(:build))
|
|
18
|
+
else
|
|
19
|
+
node
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def transforms = @transforms ||= {}
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require "grammy/matcher/string"
|
|
2
|
+
require "grammy/matcher/regexp"
|
|
3
|
+
require "grammy/matcher/sequence"
|
|
4
|
+
require "grammy/matcher/alternative"
|
|
5
|
+
require "grammy/matcher/repetition"
|
|
6
|
+
|
|
7
|
+
require "grammy/matcher/eol"
|
|
8
|
+
require "grammy/matcher/sol"
|
|
9
|
+
require "grammy/matcher/eof"
|
|
10
|
+
require "grammy/matcher/sof"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
module Grammy
|
|
14
|
+
module Combinator
|
|
15
|
+
module Primitives
|
|
16
|
+
|
|
17
|
+
protected def str(pattern) = Grammy::Matcher::String.new(pattern)
|
|
18
|
+
protected def reg(pattern) = Grammy::Matcher::Regexp.new(pattern)
|
|
19
|
+
protected def seq(*matchers) = Grammy::Matcher::Sequence.new(*matchers)
|
|
20
|
+
protected def alt(*matchers) = Grammy::Matcher::Alternative.new(*matchers)
|
|
21
|
+
protected def rep(matcher, count_range) = Grammy::Matcher::Repetition.new(matcher, count_range)
|
|
22
|
+
alias lit str
|
|
23
|
+
|
|
24
|
+
protected def eol = Grammy::Matcher::EOL.new
|
|
25
|
+
protected def sol = Grammy::Matcher::SOL.new
|
|
26
|
+
protected def eof = Grammy::Matcher::EOF.new
|
|
27
|
+
protected def sof = Grammy::Matcher::SOF.new
|
|
28
|
+
|
|
29
|
+
protected def wsp = reg(/\p{Space}+/u) # If `\p` is not available, try `[[:space:]]+` or `\s+`.
|
|
30
|
+
|
|
31
|
+
# Use these aliases if you have naming conflicts with your grammar.
|
|
32
|
+
alias _seq seq
|
|
33
|
+
alias _alt alt
|
|
34
|
+
alias _rep rep
|
|
35
|
+
alias _str str
|
|
36
|
+
alias _reg reg
|
|
37
|
+
alias _lit str
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require "grammy/combinator/primitives"
|
|
2
|
+
require "grammy/errors"
|
|
3
|
+
require "grammy/scanner"
|
|
4
|
+
require "grammy/parse_tree"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
module Grammy
|
|
8
|
+
class Grammar
|
|
9
|
+
|
|
10
|
+
include Grammy::Combinator::Primitives
|
|
11
|
+
|
|
12
|
+
class << self
|
|
13
|
+
# DSL for defining grammar rules.
|
|
14
|
+
def start(rule_name) = @start_rule = rule_name
|
|
15
|
+
def rule(name, &)
|
|
16
|
+
rule_proc = lambda { |_|
|
|
17
|
+
results = instance_eval(&)
|
|
18
|
+
children = Array(results).flatten.map { |result|
|
|
19
|
+
result.is_a?(Grammy::Matcher) ? result.match(@scanner) : result
|
|
20
|
+
}
|
|
21
|
+
Grammy::ParseTree.new(name.to_s, children.flatten)
|
|
22
|
+
}
|
|
23
|
+
define_method(name, &rule_proc)
|
|
24
|
+
rules[name] = rule_proc
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Examples:
|
|
28
|
+
# terminal(:number, /\d+/)
|
|
29
|
+
# terminal(:number) { /\d+/ }
|
|
30
|
+
# terminal(:open_paren, "(")
|
|
31
|
+
# terminal(:open_paren) { "(" }
|
|
32
|
+
def terminal(name, pattern = nil, &block)
|
|
33
|
+
fail ArgumentError, "may only supply a pattern OR a block to #{__callee__}" if pattern && block
|
|
34
|
+
pattern ||= yield if block
|
|
35
|
+
if pattern.is_a?(Regexp)
|
|
36
|
+
terminal_proc = -> { Grammy::Matcher::Regexp.new(pattern) }
|
|
37
|
+
else
|
|
38
|
+
terminal_proc = -> { Grammy::Matcher::String.new(pattern) }
|
|
39
|
+
end
|
|
40
|
+
define_method(name, &terminal_proc)
|
|
41
|
+
rules[name] = terminal_proc
|
|
42
|
+
end
|
|
43
|
+
alias token terminal
|
|
44
|
+
|
|
45
|
+
# Access to the rules.
|
|
46
|
+
def start_rule = @start_rule || @rules.first || :start
|
|
47
|
+
def rules = @rules ||= {}
|
|
48
|
+
|
|
49
|
+
# Parse an input using the grammar.
|
|
50
|
+
def parse(input, start: start_rule)
|
|
51
|
+
scanner = Grammy::Scanner.new(input)
|
|
52
|
+
grammar = self.new(scanner)
|
|
53
|
+
result = grammar.execute_rule(start)
|
|
54
|
+
fail(Grammy::ParseError, "Parsing failed at location #{scanner.location}") if result.nil? || result.empty? && !scanner.input.empty?
|
|
55
|
+
result
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Primitive combinators will need access to the scanner.
|
|
60
|
+
def initialize(scanner) = @scanner = scanner
|
|
61
|
+
|
|
62
|
+
def execute_rule(rule_name) = instance_eval(&rules[rule_name])
|
|
63
|
+
def rules = self.class.rules
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Grammy
|
|
2
|
+
# row/column are 1-based.
|
|
3
|
+
# index is 0-based, with respect to the full input string.
|
|
4
|
+
class Location < Data.define(:row, :column, :index)
|
|
5
|
+
|
|
6
|
+
def self.new(row = 1, column = 1, index = 0)
|
|
7
|
+
super
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def advance(text)
|
|
11
|
+
newline_count = text.count("\n")
|
|
12
|
+
new_index = index + text.length
|
|
13
|
+
new_row = row + newline_count
|
|
14
|
+
if newline_count.zero?
|
|
15
|
+
new_column = column + text.length
|
|
16
|
+
else
|
|
17
|
+
new_column = text.length - text.rindex("\n")
|
|
18
|
+
end
|
|
19
|
+
Location.new(new_row, new_column, new_index)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def to_s = "(#{row},#{column})"
|
|
23
|
+
def inspect = to_s
|
|
24
|
+
def pretty_print(pp) = pp.text inspect # For IRB output.
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/grammy/match.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require "grammy/location"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# A Match represents a successful match of a pattern in the input string.
|
|
5
|
+
# It contains the matched text and its start and end locations.
|
|
6
|
+
module Grammy
|
|
7
|
+
# NOTE: start/end are a "closed interval", the locations of the first and last character of the match.
|
|
8
|
+
class Match < Data.define(:text, :start_location, :end_location)
|
|
9
|
+
|
|
10
|
+
def self.new(text, start_location = Grammy::Location.new, end_location = Grammy::Location.new)
|
|
11
|
+
super
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def to_s = text
|
|
15
|
+
def inspect = "#<Match #{text} (#{start_location}..#{end_location})>"
|
|
16
|
+
def pretty_print(pp) = pp.text inspect # For IRB output.
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Grammy
|
|
5
|
+
class Matcher
|
|
6
|
+
class Alternative < Matcher
|
|
7
|
+
|
|
8
|
+
def initialize(*alternatives) = @alternatives = alternatives
|
|
9
|
+
|
|
10
|
+
def match(scanner)
|
|
11
|
+
@alternatives.each do |matcher|
|
|
12
|
+
mark = scanner.mark
|
|
13
|
+
result = matcher.match(scanner)
|
|
14
|
+
return result if result
|
|
15
|
+
scanner.backtrack(mark)
|
|
16
|
+
end
|
|
17
|
+
nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
require "grammy/match"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module Grammy
|
|
6
|
+
class Matcher
|
|
7
|
+
class EOF < Matcher
|
|
8
|
+
|
|
9
|
+
def initialize = super(nil)
|
|
10
|
+
|
|
11
|
+
def match(scanner)
|
|
12
|
+
scanner.finished? ? Match.new(nil, scanner.location, scanner.location) : nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
|
|
3
|
+
module Grammy
|
|
4
|
+
class Matcher
|
|
5
|
+
class Repetition < Matcher
|
|
6
|
+
|
|
7
|
+
def initialize(submatcher, count_range)
|
|
8
|
+
@submatcher = submatcher
|
|
9
|
+
@count_range = count_range
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def match(scanner)
|
|
13
|
+
all_matches = (1..@count_range.end).lazy.map { @submatcher.match(scanner) }
|
|
14
|
+
successful_matches = all_matches.take_while { |match| !match.nil? }.to_a
|
|
15
|
+
successful_matches.tap do |results|
|
|
16
|
+
return nil unless @count_range.include?(results.size)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Grammy
|
|
5
|
+
class Matcher
|
|
6
|
+
class Sequence < Matcher
|
|
7
|
+
|
|
8
|
+
def initialize(*submatchers) = @submatchers = submatchers
|
|
9
|
+
|
|
10
|
+
def match(scanner)
|
|
11
|
+
@submatchers.map { |matcher| matcher.match(scanner) || (return nil) }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
require "grammy/match"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module Grammy
|
|
6
|
+
class Matcher
|
|
7
|
+
class SOF < Matcher
|
|
8
|
+
|
|
9
|
+
def initialize = super(nil)
|
|
10
|
+
|
|
11
|
+
def match(scanner)
|
|
12
|
+
scanner.location.index.zero? ? Match.new(nil, scanner.location, scanner.location) : nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require "grammy/matcher"
|
|
2
|
+
require "grammy/match"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module Grammy
|
|
6
|
+
class Matcher
|
|
7
|
+
class SOL < Matcher
|
|
8
|
+
|
|
9
|
+
def initialize = super(nil)
|
|
10
|
+
|
|
11
|
+
def match(scanner)
|
|
12
|
+
scanner.location.column == 1 ? Match.new(nil, scanner.location, scanner.location) : nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require "grammy/match"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
module Grammy
|
|
5
|
+
class Matcher
|
|
6
|
+
|
|
7
|
+
def initialize(pattern) = @pattern = pattern
|
|
8
|
+
|
|
9
|
+
def match(scanner) = fail NotImplementedError, "abstract method -- must override in derived classes"
|
|
10
|
+
|
|
11
|
+
# DSL for sequence, alternative, and repetition.
|
|
12
|
+
def +(other) = Matcher::Sequence.new(self, other)
|
|
13
|
+
def |(other) = Matcher::Alternative.new(self, other)
|
|
14
|
+
def [](range) = Matcher::Repetition.new(self, range)
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require "grammy/location"
|
|
2
|
+
require "grammy/match"
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# PEG parsers don't need a lexer, so this is basically half a lexer.
|
|
6
|
+
# It needs to implement backtracking for the PEG parser.
|
|
7
|
+
module Grammy
|
|
8
|
+
|
|
9
|
+
class Scanner
|
|
10
|
+
|
|
11
|
+
attr_reader :location, :input
|
|
12
|
+
|
|
13
|
+
def initialize(input)
|
|
14
|
+
@input = input.is_a?(String) ? input : input.read
|
|
15
|
+
@location = Location.new(1, 1, 0)
|
|
16
|
+
@marks = [] # stack of Locations
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Try to match given String at current location.
|
|
20
|
+
# Returns `nil` if pattern does not match.
|
|
21
|
+
# Otherwise, updates @location and returns a Match object.
|
|
22
|
+
def match_string(pattern)
|
|
23
|
+
return nil if @location.index >= @input.size
|
|
24
|
+
return nil unless remaining_input.start_with?(pattern)
|
|
25
|
+
match_text(pattern)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Try to match given Regexp at current location.
|
|
29
|
+
# Returns `nil` if pattern does not match.
|
|
30
|
+
# Otherwise, updates @location and returns a Match object.
|
|
31
|
+
def match_regexp(pattern)
|
|
32
|
+
return nil if @location.index >= @input.size
|
|
33
|
+
anchored_regex = Regexp.new("\\A(?:#{pattern.source})", pattern.options)
|
|
34
|
+
match = remaining_input.match(anchored_regex)
|
|
35
|
+
return nil unless match
|
|
36
|
+
match_text(match[0])
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def mark
|
|
40
|
+
@marks.push(@location)
|
|
41
|
+
@location
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def backtrack(mark)
|
|
45
|
+
fail ArgumentError, "can only backtrack the top mark" unless @marks.last == mark
|
|
46
|
+
@location = mark
|
|
47
|
+
@marks.pop
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Permanently consume input, confirming we'll never backtrack to mark again.
|
|
51
|
+
def consume(mark)
|
|
52
|
+
fail ArgumentError, "can only consume the top mark" unless @marks.last == mark
|
|
53
|
+
@marks.pop
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def finished?
|
|
57
|
+
@location.index == @input.size
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private def remaining_input
|
|
61
|
+
@input[@location.index..]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Returns matched text and its location (or nil).
|
|
65
|
+
# Moves the current location forward to the character *after* the match.
|
|
66
|
+
private def match_text(text)
|
|
67
|
+
return nil unless text && !text.empty?
|
|
68
|
+
start_pos = @location
|
|
69
|
+
end_pos = start_pos.advance(text[0...-1])
|
|
70
|
+
@location = end_pos.advance(text[-1])
|
|
71
|
+
Match.new(text, start_pos, end_pos)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
end
|
data/lib/grammy/token.rb
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require "grammy/match"
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# A Token is a Match, plus the name of the token rule it matched.
|
|
5
|
+
# It can also have a value, providing a more useful representation of the matched text.
|
|
6
|
+
|
|
7
|
+
module Grammy
|
|
8
|
+
class Token < Data.define(:name, :match, :value)
|
|
9
|
+
|
|
10
|
+
def self.new(name, match = nil, value = nil)
|
|
11
|
+
match = Grammy::Match.new(match) unless match.is_a?(Grammy::Match)
|
|
12
|
+
super
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# TODO: Extract this into a Data helper. Better yet, create a Value class for value objects.
|
|
16
|
+
def with(value:) = self.class.new(name, match, value)
|
|
17
|
+
|
|
18
|
+
def text = match.text
|
|
19
|
+
def start_location = match.start_location
|
|
20
|
+
def end_location = match.end_location
|
|
21
|
+
|
|
22
|
+
def to_s = text
|
|
23
|
+
def inspect = "#<Token #{name}: \"#{text}\"#{inspect_value}>"
|
|
24
|
+
def inspect_value = value ? " (#{value})" : ""
|
|
25
|
+
def pretty_print(pp) = pp.text inspect # For IRB output.
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Grammy
|
|
2
|
+
class Tree
|
|
3
|
+
module Transformation
|
|
4
|
+
def self.included(base)
|
|
5
|
+
base.extend(ClassMethods)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
module ClassMethods
|
|
9
|
+
def transform_rules
|
|
10
|
+
@transform_rules ||= {}
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def transform(name, &block)
|
|
14
|
+
transform_rules[name.to_sym] = block
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def transform(node)
|
|
19
|
+
rule = self.class.transform_rules[node.name.to_sym]
|
|
20
|
+
if rule
|
|
21
|
+
instance_exec(node, &rule)
|
|
22
|
+
elsif node.respond_to?(:children)
|
|
23
|
+
children = node.children.map { |child| transform(child) }
|
|
24
|
+
node.class.new(node.name, children)
|
|
25
|
+
else
|
|
26
|
+
node
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
data/lib/grammy/tree.rb
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Grammy
|
|
2
|
+
class Tree < Data.define(:name, :children)
|
|
3
|
+
INDENTATION = 4
|
|
4
|
+
|
|
5
|
+
# Make it easier to create nested trees.
|
|
6
|
+
def self.new(name, children = [], &block)
|
|
7
|
+
children = yield if block
|
|
8
|
+
super(name:, children: Array(children))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def empty? = children.flatten.compact.empty?
|
|
12
|
+
def leaves = children.flat_map { it.is_a?(self.class) ? it.leaves : it }.compact
|
|
13
|
+
|
|
14
|
+
def to_s(level = 0) = ([to_s_base(level)] + children.map{ to_s_child(it, level) }).join("\n")
|
|
15
|
+
|
|
16
|
+
def inspect(level = 0) = ([inspect_base(level)] + children.map{ to_s_child(it, level) }).join("\n")
|
|
17
|
+
def to_h = {name:, children: children.map(&:to_h)}
|
|
18
|
+
def pretty_print(pp) = pp.text inspect # For IRB output.
|
|
19
|
+
|
|
20
|
+
private def to_s_base(level) = "#{indent(level)}#{name}"
|
|
21
|
+
private def inspect_base(level) = "#{indent(level)}#<#{class_name} #{name.inspect}>"
|
|
22
|
+
private def to_s_child(child, level) = child.is_a?(self.class) ? child.to_s(level + 1) : to_s_leaf(child, level + 1)
|
|
23
|
+
private def to_s_leaf(leaf, level) = "#{indent(level)}#{leaf}"
|
|
24
|
+
private def indent(level) = " " * (level * INDENTATION)
|
|
25
|
+
private def class_name = self.class.name.split("::").last
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
data/lib/grammy.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: grammy
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: '0.10'
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Craig Buchek
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: Grammy is a parser with a simple DSL to define the grammar and tree transformations.
|
|
13
|
+
email:
|
|
14
|
+
- craig@boochtek.com
|
|
15
|
+
executables: []
|
|
16
|
+
extensions: []
|
|
17
|
+
extra_rdoc_files:
|
|
18
|
+
- README.md
|
|
19
|
+
- docs/CHANGELOG.md
|
|
20
|
+
- docs/LICENSE.md
|
|
21
|
+
- docs/TODO.md
|
|
22
|
+
files:
|
|
23
|
+
- README.md
|
|
24
|
+
- docs/CHANGELOG.md
|
|
25
|
+
- docs/LICENSE.md
|
|
26
|
+
- docs/TODO.md
|
|
27
|
+
- lib/extensions/debugging.rb
|
|
28
|
+
- lib/grammy.rb
|
|
29
|
+
- lib/grammy/ast.rb
|
|
30
|
+
- lib/grammy/ast/transformer.rb
|
|
31
|
+
- lib/grammy/combinator/primitives.rb
|
|
32
|
+
- lib/grammy/errors.rb
|
|
33
|
+
- lib/grammy/grammar.rb
|
|
34
|
+
- lib/grammy/location.rb
|
|
35
|
+
- lib/grammy/match.rb
|
|
36
|
+
- lib/grammy/matcher.rb
|
|
37
|
+
- lib/grammy/matcher/alternative.rb
|
|
38
|
+
- lib/grammy/matcher/eof.rb
|
|
39
|
+
- lib/grammy/matcher/eol.rb
|
|
40
|
+
- lib/grammy/matcher/regexp.rb
|
|
41
|
+
- lib/grammy/matcher/repetition.rb
|
|
42
|
+
- lib/grammy/matcher/sequence.rb
|
|
43
|
+
- lib/grammy/matcher/sof.rb
|
|
44
|
+
- lib/grammy/matcher/sol.rb
|
|
45
|
+
- lib/grammy/matcher/string.rb
|
|
46
|
+
- lib/grammy/parse_tree.rb
|
|
47
|
+
- lib/grammy/scanner.rb
|
|
48
|
+
- lib/grammy/token.rb
|
|
49
|
+
- lib/grammy/tree.rb
|
|
50
|
+
- lib/grammy/tree/transformation.rb
|
|
51
|
+
homepage: https://github.com/stone-lang/grammy
|
|
52
|
+
licenses:
|
|
53
|
+
- MIT
|
|
54
|
+
metadata:
|
|
55
|
+
changelog_uri: https://github.com/stone-lang/grammy/blob/main/docs/CHANGELOG.md
|
|
56
|
+
rubygems_mfa_required: 'true'
|
|
57
|
+
rdoc_options: []
|
|
58
|
+
require_paths:
|
|
59
|
+
- lib
|
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
|
+
requirements:
|
|
62
|
+
- - ">="
|
|
63
|
+
- !ruby/object:Gem::Version
|
|
64
|
+
version: '3.4'
|
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - ">="
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '0'
|
|
70
|
+
requirements: []
|
|
71
|
+
rubygems_version: 3.6.7
|
|
72
|
+
specification_version: 4
|
|
73
|
+
summary: A PEG parsing library with a simple DSL
|
|
74
|
+
test_files: []
|