packrat_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +133 -0
- data/examples/simple_calc.rb +41 -0
- data/lib/packrat_parser/base.rb +146 -0
- data/lib/packrat_parser/parser.rb +142 -0
- data/lib/packrat_parser/result.rb +53 -0
- data/lib/packrat_parser/version.rb +3 -0
- data/lib/packrat_parser.rb +9 -0
- metadata +52 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 3329c290118a8d488a68b9d83f645eee1095730ee11d51446bd1e76ff232b95e
|
|
4
|
+
data.tar.gz: d39e34dd833b9bb48079b6e342a5b36162396a257b239697bef76d5f796b0679
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 9d51853503af9f290bbb062f4a0b3e8543ddb032aeb69d3f3d1ec555c02101975780e0cad5e4ab364a267cfd6fd361259b0cd1ca4136dbae00ce81dea954a753
|
|
7
|
+
data.tar.gz: afd0c1223b0bc3d392b947a319cedc9696280c6cf6d5e10cfbc638aaa1fdf7669a5c88c532542bc9744b645312cab8b88fb559d3fa90be4f1c77d50145f23abd
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shugo Maeda
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# packrat_parser
|
|
2
|
+
|
|
3
|
+
A small [packrat](https://bford.info/packrat/) / PEG parser-combinator library
|
|
4
|
+
with a Scala-inspired, monadic API. Grammar rules are plain methods that return
|
|
5
|
+
parsers, and they can be written with the `for ... then` comprehension so the
|
|
6
|
+
grammar reads like a Scala for-comprehension.
|
|
7
|
+
|
|
8
|
+
```ruby
|
|
9
|
+
require "packrat_parser"
|
|
10
|
+
|
|
11
|
+
class SimpleCalcParser < PackratParser
|
|
12
|
+
def additive
|
|
13
|
+
(for x in multitive, _ in term("+"), y in additive then x + y end) |
|
|
14
|
+
(for x in multitive, _ in term("-"), y in additive then x - y end) |
|
|
15
|
+
multitive
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def multitive
|
|
19
|
+
(for x in primary, _ in term("*"), y in multitive then x * y end) |
|
|
20
|
+
(for x in primary, _ in term("/"), y in multitive then x / y end) |
|
|
21
|
+
primary
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def primary
|
|
25
|
+
(for _l in term("("), x in additive, _r in term(")") then x end) |
|
|
26
|
+
number
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def number
|
|
30
|
+
for s in term(/\d+/) then s.to_i end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
SimpleCalcParser.parse("1+2*3") # => 7
|
|
35
|
+
SimpleCalcParser.parse("(1+2)*3") # => 9
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
The `for ... then` comprehension is a feature of a Ruby fork and is only
|
|
41
|
+
recognized by its **legacy parser**. Run grammars with `--parser=parse.y`:
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
ruby --parser=parse.y -Ilib examples/simple_calc.rb
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The default Prism parser rejects `for ... then`. The library itself
|
|
48
|
+
(`lib/`) is ordinary Ruby — only files that *write* grammars need the flag.
|
|
49
|
+
|
|
50
|
+
## API
|
|
51
|
+
|
|
52
|
+
Subclass `PackratParser`. **Every method you define in the subclass is a grammar
|
|
53
|
+
rule** and must return a parser; rule methods are automatically made lazy and
|
|
54
|
+
memoized so they can reference one another (and themselves) recursively.
|
|
55
|
+
|
|
56
|
+
### Building blocks (available inside rule methods)
|
|
57
|
+
|
|
58
|
+
- `term(string)` — match an exact literal at the current position.
|
|
59
|
+
- `term(regexp)` — match a regexp anchored at the current position.
|
|
60
|
+
Both yield the matched substring (e.g. `term(/\d+/)`).
|
|
61
|
+
- `pure(value)` — succeed with `value` without consuming input.
|
|
62
|
+
|
|
63
|
+
### Combinators (methods on every parser)
|
|
64
|
+
|
|
65
|
+
- `flat_map { |v| parser }` — sequence: run `parser` after this one succeeds.
|
|
66
|
+
- `map { |v| new_value }` — transform the result.
|
|
67
|
+
- `filter { |v| bool }` — succeed only when the predicate holds.
|
|
68
|
+
- `a | b` — ordered choice: try `a`, and if it fails try `b`.
|
|
69
|
+
- `a + b` — sequence, keep **both** results (Scala's `~`): run `a` then `b`,
|
|
70
|
+
yield the pair `[a, b]`. Left-associative and nesting, so `a + b + c` yields
|
|
71
|
+
`[[a, b], c]`; Ruby's block-parameter destructuring takes them apart the way
|
|
72
|
+
Scala's `case a ~ b ~ c` does: `(a + b + c).map { |(x, y), z| ... }`.
|
|
73
|
+
- `a << b` — sequence, keep the **left** result (Scala's `<~`): run `a` then
|
|
74
|
+
`b`, yield `a`'s value and discard `b`'s.
|
|
75
|
+
- `a >> b` — sequence, keep the **right** result (Scala's `~>`): run `a` then
|
|
76
|
+
`b`, yield `b`'s value and discard `a`'s.
|
|
77
|
+
|
|
78
|
+
The arrow direction is a useful mnemonic: `<<`/`>>` keeps whichever side it
|
|
79
|
+
points to. They are handy for discarding punctuation, e.g. `( expr )` is
|
|
80
|
+
`term("(") >> expr << term(")")`. Ruby's precedence (`+` over `<<`/`>>` over
|
|
81
|
+
`|`) means sequencing binds tighter than ordered choice, as you'd want.
|
|
82
|
+
|
|
83
|
+
`flat_map`, `map`, and `filter` are exactly what the `for ... then`
|
|
84
|
+
comprehension desugars to (a non-final generator → `flat_map`, the final
|
|
85
|
+
generator → `map`, a `when` guard → `filter`), so:
|
|
86
|
+
|
|
87
|
+
```ruby
|
|
88
|
+
for x in p, y in q when y > 0 then x + y end
|
|
89
|
+
# == p.flat_map { |x| q.filter { |y| y > 0 }.map { |y| x + y } }
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Whitespace skipping (optional)
|
|
93
|
+
|
|
94
|
+
By default `term` matches exactly. To skip whitespace implicitly — like Scala's
|
|
95
|
+
`RegexParsers` — declare `skip_whitespace` at the class level. Each `term` then
|
|
96
|
+
consumes leading whitespace before matching, and `parse` consumes trailing
|
|
97
|
+
whitespace before requiring full input consumption:
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
class CalcParser < PackratParser
|
|
101
|
+
skip_whitespace # default pattern: /\s+/
|
|
102
|
+
# skip_whitespace(/[ \t]+/) # or a custom pattern (e.g. spaces/tabs only)
|
|
103
|
+
# ... rules using term(...) ...
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
CalcParser.parse(" 1 + 2 * 3 ") # => 7
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The setting is inherited by subclasses, so a base parser can enable it once.
|
|
110
|
+
|
|
111
|
+
### Entry point
|
|
112
|
+
|
|
113
|
+
- `start_symbol :name` (class level) — choose the rule to start from.
|
|
114
|
+
If omitted, the first defined method is used as the start symbol.
|
|
115
|
+
- `Klass.parse(input)` / `Klass.new.parse(input)` — parse, returning the value.
|
|
116
|
+
Raises `PackratParser::ParseError` on failure or leftover input.
|
|
117
|
+
|
|
118
|
+
## Notes / limitations
|
|
119
|
+
|
|
120
|
+
- **Classic packrat: no left recursion.** Write rules right-recursively. A
|
|
121
|
+
consequence is that `-` and `/` in the example calculator associate to the
|
|
122
|
+
right (`12/4/3` parses as `12/(4/3)` == `12`).
|
|
123
|
+
- **No implicit whitespace by default.** `term` matches exactly. Enable implicit
|
|
124
|
+
whitespace skipping with `skip_whitespace` (see above) when your grammar needs
|
|
125
|
+
it.
|
|
126
|
+
|
|
127
|
+
## Running the tests
|
|
128
|
+
|
|
129
|
+
```sh
|
|
130
|
+
bin/test
|
|
131
|
+
# or:
|
|
132
|
+
ruby --parser=parse.y -Ilib -Itest test/test_packrat_parser.rb
|
|
133
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Run with the Ruby fork's legacy parser, which understands `for ... then`:
|
|
2
|
+
#
|
|
3
|
+
# /workspace/ruby/ruby --disable-gems --parser=parse.y -Ilib examples/simple_calc.rb
|
|
4
|
+
#
|
|
5
|
+
require "packrat_parser"
|
|
6
|
+
|
|
7
|
+
# A four-function integer calculator.
|
|
8
|
+
#
|
|
9
|
+
# The grammar is right-recursive (additive -> multitive "+" additive), which is
|
|
10
|
+
# what classic packrat parsing supports without left-recursion handling. A
|
|
11
|
+
# consequence is that "-" and "/" associate to the right, e.g. 8-3-2 parses as
|
|
12
|
+
# 8-(3-2) == 7. That trade-off comes from the monadic-core / no-`rep` API.
|
|
13
|
+
class SimpleCalcParser < PackratParser
|
|
14
|
+
def additive
|
|
15
|
+
for x in multitive << term("+"), y in additive then x + y end |
|
|
16
|
+
for x in multitive << term("-"), y in additive then x - y end |
|
|
17
|
+
multitive
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def multitive
|
|
21
|
+
for x in primary << term("*"), y in multitive then x * y end |
|
|
22
|
+
for x in primary << term("/"), y in multitive then x / y end |
|
|
23
|
+
primary
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def primary
|
|
27
|
+
for x in term("(") >> additive << term(")") then x end |
|
|
28
|
+
number
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def number
|
|
32
|
+
for s in term(/\d+/) then s.to_i end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
if __FILE__ == $PROGRAM_NAME
|
|
37
|
+
examples = ["1+2*3", "(1+2)*3", "2*3+4", "12/4/3", "((7))"]
|
|
38
|
+
examples.each do |src|
|
|
39
|
+
puts "#{src} => #{SimpleCalcParser.parse(src)}"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
class PackratParser
|
|
2
|
+
# Subclass this and define grammar rules as plain methods that return parsers.
|
|
3
|
+
# Every method defined in a subclass is automatically treated as a rule: it is
|
|
4
|
+
# rewritten to return a lazy, memoizing Rule (see Rule), so rules may reference
|
|
5
|
+
# each other (and themselves) without infinitely recursing while the combinator
|
|
6
|
+
# graph is built.
|
|
7
|
+
#
|
|
8
|
+
# class SimpleCalcParser < PackratParser
|
|
9
|
+
# def additive
|
|
10
|
+
# (for x in multitive, _ in term("+"), y in additive then x + y end) | multitive
|
|
11
|
+
# end
|
|
12
|
+
# ...
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# SimpleCalcParser.parse("1+2*3") # => 7
|
|
16
|
+
|
|
17
|
+
# Set (or read) the rule the parser starts from.
|
|
18
|
+
# If omitted, the first defined method is used as the start symbol.
|
|
19
|
+
def self.start_symbol(name = nil)
|
|
20
|
+
if name
|
|
21
|
+
@start_symbol = name
|
|
22
|
+
else
|
|
23
|
+
@start_symbol
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Enable implicit whitespace skipping (Scala's RegexParsers mode). When set,
|
|
28
|
+
# every +term+ skips leading whitespace matching +pattern+ before attempting
|
|
29
|
+
# its match, and +parse+ also consumes trailing whitespace before requiring
|
|
30
|
+
# full input consumption. Off by default (terminals match exactly).
|
|
31
|
+
#
|
|
32
|
+
# class CalcParser < PackratParser
|
|
33
|
+
# skip_whitespace # default /\s+/
|
|
34
|
+
# # skip_whitespace(/[ \t]+/) # or a custom pattern
|
|
35
|
+
# end
|
|
36
|
+
def self.skip_whitespace(pattern = /\s+/)
|
|
37
|
+
@__whitespace = pattern
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# The configured whitespace pattern, or nil when skipping is disabled.
|
|
41
|
+
# Inherited by subclasses so a base parser can turn the mode on once.
|
|
42
|
+
def self.whitespace
|
|
43
|
+
return @__whitespace if defined?(@__whitespace)
|
|
44
|
+
superclass.respond_to?(:whitespace) ? superclass.whitespace : nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Convenience: parse +input+ with a fresh instance.
|
|
48
|
+
def self.parse(input)
|
|
49
|
+
new.parse(input)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Rewrite every method defined on a subclass into a rule that returns a lazy
|
|
53
|
+
# Rule. Guards against rewriting the base class's own infrastructure and
|
|
54
|
+
# against re-entering while we install the replacement (define_method itself
|
|
55
|
+
# fires method_added).
|
|
56
|
+
def self.method_added(name)
|
|
57
|
+
return if self == PackratParser
|
|
58
|
+
return if name == :initialize
|
|
59
|
+
return if @__defining_rule
|
|
60
|
+
|
|
61
|
+
@__defining_rule = true
|
|
62
|
+
@start_symbol ||= name
|
|
63
|
+
begin
|
|
64
|
+
body = instance_method(name)
|
|
65
|
+
define_method(name) do
|
|
66
|
+
Rule.new(self, name, body)
|
|
67
|
+
end
|
|
68
|
+
ensure
|
|
69
|
+
@__defining_rule = false
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Per-input packrat memo table, keyed by [rule_name, pos].
|
|
74
|
+
def __memo
|
|
75
|
+
@__memo ||= {}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# A terminal parser. A String matches that exact literal at the current
|
|
79
|
+
# position; a Regexp is matched anchored at the current position. The matched
|
|
80
|
+
# substring is the parser's value.
|
|
81
|
+
#
|
|
82
|
+
# When the class enables +skip_whitespace+, leading whitespace is consumed
|
|
83
|
+
# before the match is attempted, mirroring Scala's RegexParsers.
|
|
84
|
+
def term(pattern)
|
|
85
|
+
ws = self.class.whitespace
|
|
86
|
+
ws = /\G(?:#{ws})/ if ws
|
|
87
|
+
case pattern
|
|
88
|
+
when String
|
|
89
|
+
Parser.new do |input, pos|
|
|
90
|
+
pos = __skip_ws(ws, input, pos)
|
|
91
|
+
if input[pos, pattern.length] == pattern
|
|
92
|
+
Success.new(pattern, pos + pattern.length)
|
|
93
|
+
else
|
|
94
|
+
Failure.new(pos, "expected #{pattern.inspect}")
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
when Regexp
|
|
98
|
+
anchored = /\G(?:#{pattern})/
|
|
99
|
+
Parser.new do |input, pos|
|
|
100
|
+
pos = __skip_ws(ws, input, pos)
|
|
101
|
+
if (m = anchored.match(input, pos))
|
|
102
|
+
Success.new(m[0], pos + m[0].length)
|
|
103
|
+
else
|
|
104
|
+
Failure.new(pos, "expected #{pattern.inspect}")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
else
|
|
108
|
+
raise ArgumentError, "term expects a String or Regexp, got #{pattern.class}"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Advance +pos+ past whitespace matched by the anchored regexp +ws+ (nil when
|
|
113
|
+
# skipping is disabled). Returns the new position.
|
|
114
|
+
def __skip_ws(ws, input, pos)
|
|
115
|
+
return pos unless ws
|
|
116
|
+
(m = ws.match(input, pos)) ? pos + m[0].length : pos
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# A parser that succeeds with +value+ without consuming any input (monadic
|
|
120
|
+
# unit / Scala's `success`).
|
|
121
|
+
def pure(value)
|
|
122
|
+
Parser.new { |_input, pos| Success.new(value, pos) }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Parse +input+ starting from the configured start symbol. Returns the parsed
|
|
126
|
+
# value on success; raises ParseError on failure or on leftover input.
|
|
127
|
+
def parse(input)
|
|
128
|
+
@__memo = {}
|
|
129
|
+
name = self.class.start_symbol
|
|
130
|
+
raise ParseError.new("no start symbol defined", 0) unless name
|
|
131
|
+
|
|
132
|
+
result = send(name).call(input, 0)
|
|
133
|
+
unless result.success?
|
|
134
|
+
raise ParseError.new(result.message, result.pos)
|
|
135
|
+
end
|
|
136
|
+
# The last terminal skips only *leading* whitespace, so trailing whitespace
|
|
137
|
+
# after the final token is left for parse to consume before requiring that
|
|
138
|
+
# all input was used.
|
|
139
|
+
ws = self.class.whitespace
|
|
140
|
+
end_pos = __skip_ws(ws && /\G(?:#{ws})/, input, result.pos)
|
|
141
|
+
if end_pos < input.length
|
|
142
|
+
raise ParseError.new("unexpected trailing input", end_pos)
|
|
143
|
+
end
|
|
144
|
+
result.value
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
class PackratParser
|
|
2
|
+
# A parser combinator. Wraps a function (input, pos) -> Success | Failure.
|
|
3
|
+
#
|
|
4
|
+
# The four monadic operations (+flat_map+, +map+, +filter+, +pure+) are what
|
|
5
|
+
# the `for ... then` comprehension in the Ruby fork desugars to, so grammar
|
|
6
|
+
# rules can be written with comprehension syntax:
|
|
7
|
+
#
|
|
8
|
+
# for x in multitive, _ in term("+"), y in additive then x + y end
|
|
9
|
+
# # => multitive.flat_map { |x| term("+").flat_map { |_| additive.map { |y| x + y } } }
|
|
10
|
+
class Parser
|
|
11
|
+
def initialize(&fn)
|
|
12
|
+
@fn = fn
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Run this parser against +input+ starting at +pos+.
|
|
16
|
+
def call(input, pos)
|
|
17
|
+
@fn.call(input, pos)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Sequencing / monadic bind. On success, +yield+ the value to obtain the
|
|
21
|
+
# next parser and run it where this one stopped. Failures short-circuit.
|
|
22
|
+
def flat_map
|
|
23
|
+
Parser.new do |input, pos|
|
|
24
|
+
result = call(input, pos)
|
|
25
|
+
if result.success?
|
|
26
|
+
next_parser = yield(result.value)
|
|
27
|
+
next_parser.call(input, result.pos)
|
|
28
|
+
else
|
|
29
|
+
result
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Transform the successful value without consuming further input.
|
|
35
|
+
def map
|
|
36
|
+
Parser.new do |input, pos|
|
|
37
|
+
result = call(input, pos)
|
|
38
|
+
result.success? ? Success.new(yield(result.value), result.pos) : result
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Succeed only when the block returns a truthy value for the parsed result;
|
|
43
|
+
# otherwise fail at the position where this parser started. This is what a
|
|
44
|
+
# `when` guard in a comprehension desugars to.
|
|
45
|
+
def filter
|
|
46
|
+
Parser.new do |input, pos|
|
|
47
|
+
result = call(input, pos)
|
|
48
|
+
if result.success? && yield(result.value)
|
|
49
|
+
result
|
|
50
|
+
else
|
|
51
|
+
Failure.new(pos, "guard failed")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Ordered choice (PEG `/`). Try this parser; if it fails, try +other+ at the
|
|
57
|
+
# same position. Reports whichever failure reached furthest into the input.
|
|
58
|
+
def |(other)
|
|
59
|
+
Parser.new do |input, pos|
|
|
60
|
+
result = call(input, pos)
|
|
61
|
+
if result.success?
|
|
62
|
+
result
|
|
63
|
+
else
|
|
64
|
+
alt = other.call(input, pos)
|
|
65
|
+
if alt.success?
|
|
66
|
+
alt
|
|
67
|
+
else
|
|
68
|
+
alt.pos >= result.pos ? alt : result
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Sequence, keeping the *left* result (Scala's `<~`). Run this parser, then
|
|
75
|
+
# +other+, and on success return this parser's value, discarding +other+'s.
|
|
76
|
+
#
|
|
77
|
+
# number << term(";") # parse a number followed by ";", yield the number
|
|
78
|
+
def <<(other)
|
|
79
|
+
flat_map { |x| other.map { |_| x } }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Sequence, keeping the *right* result (Scala's `~>`). Run this parser, then
|
|
83
|
+
# +other+, and on success return +other+'s value, discarding this one's.
|
|
84
|
+
#
|
|
85
|
+
# term("(") >> additive # skip "(", yield whatever additive produces
|
|
86
|
+
def >>(other)
|
|
87
|
+
flat_map { |_| other }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Sequence, keeping *both* results (Scala's `~`). Run this parser, then
|
|
91
|
+
# +other+, and on success return the pair +[left, right]+. Like Scala's `~`
|
|
92
|
+
# this is left-associative and nests, so `p + q + r` yields `[[a, b], c]`;
|
|
93
|
+
# Ruby's block-parameter destructuring takes them apart the way Scala's
|
|
94
|
+
# `case a ~ b ~ c` does:
|
|
95
|
+
#
|
|
96
|
+
# (p + q + r).map { |(a, b), c| ... }
|
|
97
|
+
def +(other)
|
|
98
|
+
flat_map { |x| other.map { |y| [x, y] } }
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# A lazy, memoizing reference to a named grammar rule.
|
|
103
|
+
#
|
|
104
|
+
# Rule methods on a PackratParser subclass return a Rule instead of building
|
|
105
|
+
# their combinator immediately. This is essential: a comprehension evaluates
|
|
106
|
+
# its generator *receivers* eagerly (to call flat_map/map on them), so a
|
|
107
|
+
# self-referential rule like `additive` would recurse forever at build time if
|
|
108
|
+
# the body ran on every reference. Returning a lazy Rule breaks that cycle.
|
|
109
|
+
#
|
|
110
|
+
# Memoizing the *result* per (rule, pos) gives the packrat property: each rule
|
|
111
|
+
# is evaluated at most once per input position, so parsing stays linear.
|
|
112
|
+
#
|
|
113
|
+
# The combinator graph is rebuilt on every (memo-missed) entry rather than
|
|
114
|
+
# cached, and that is deliberate. The `for ... then` comprehension's loop
|
|
115
|
+
# variables currently leak into the enclosing rule-method scope instead of
|
|
116
|
+
# being block-local, so a single built closure shares those slots. If the same
|
|
117
|
+
# closure were reused for a recursive activation (e.g. additive calling
|
|
118
|
+
# additive), the inner activation would clobber the outer's leaked variables.
|
|
119
|
+
# Building fresh gives each activation its own scope. The rebuild is bounded:
|
|
120
|
+
# result memoization means a build happens at most once per (rule, pos).
|
|
121
|
+
#
|
|
122
|
+
# NOTE: this rebuild is a workaround for the loop-variable leak in the fork's
|
|
123
|
+
# comprehension (parse.y: new_for_comp_gen leaves the loop var in the
|
|
124
|
+
# surrounding scope, like the legacy `for`). If the comprehension is changed to
|
|
125
|
+
# make loop variables block-local, this clobbering goes away and `built` can be
|
|
126
|
+
# cached once per (owner, name) again -- e.g. `@owner.__built[@name] ||= ...`.
|
|
127
|
+
class Rule < Parser
|
|
128
|
+
def initialize(owner, name, body)
|
|
129
|
+
@owner = owner
|
|
130
|
+
@name = name
|
|
131
|
+
@body = body
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def call(input, pos)
|
|
135
|
+
memo = @owner.__memo
|
|
136
|
+
key = [@name, pos]
|
|
137
|
+
return memo[key] if memo.key?(key)
|
|
138
|
+
combinator = @body.bind(@owner).call
|
|
139
|
+
memo[key] = combinator.call(input, pos)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
class PackratParser
|
|
2
|
+
# The result of running a parser at some position in the input.
|
|
3
|
+
#
|
|
4
|
+
# +Success+ carries the parsed +value+ and +pos+, the index of the next
|
|
5
|
+
# unconsumed character. +Failure+ carries the +pos+ where parsing failed and
|
|
6
|
+
# a human-readable +message+.
|
|
7
|
+
class Success
|
|
8
|
+
attr_reader :value, :pos
|
|
9
|
+
|
|
10
|
+
def initialize(value, pos)
|
|
11
|
+
@value = value
|
|
12
|
+
@pos = pos
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def success?
|
|
16
|
+
true
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_s
|
|
20
|
+
"Success(#{@value.inspect} @#{@pos})"
|
|
21
|
+
end
|
|
22
|
+
alias inspect to_s
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class Failure
|
|
26
|
+
attr_reader :pos, :message
|
|
27
|
+
|
|
28
|
+
def initialize(pos, message)
|
|
29
|
+
@pos = pos
|
|
30
|
+
@message = message
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def success?
|
|
34
|
+
false
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def to_s
|
|
38
|
+
"Failure(#{@message} @#{@pos})"
|
|
39
|
+
end
|
|
40
|
+
alias inspect to_s
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Raised by PackratParser#parse when the input cannot be parsed, or when the
|
|
44
|
+
# start symbol succeeds but does not consume the whole input.
|
|
45
|
+
class ParseError < StandardError
|
|
46
|
+
attr_reader :pos
|
|
47
|
+
|
|
48
|
+
def initialize(message, pos)
|
|
49
|
+
@pos = pos
|
|
50
|
+
super("#{message} (at position #{pos})")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# A small packrat / PEG parser-combinator library whose grammar rules can be
|
|
2
|
+
# written with the `for ... then` comprehension from the Ruby fork.
|
|
3
|
+
class PackratParser
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
require_relative "packrat_parser/version"
|
|
7
|
+
require_relative "packrat_parser/result"
|
|
8
|
+
require_relative "packrat_parser/parser"
|
|
9
|
+
require_relative "packrat_parser/base"
|
metadata
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: packrat_parser
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Shugo Maeda
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: |
|
|
13
|
+
packrat_parser is a small PEG/packrat parser-combinator library. Grammar
|
|
14
|
+
rules are plain methods that return parsers and can be written using the
|
|
15
|
+
`for ... then` comprehension (flat_map/map/filter), giving a Scala-style
|
|
16
|
+
for-comprehension feel.
|
|
17
|
+
email:
|
|
18
|
+
- shugo.maeda@gmail.com
|
|
19
|
+
executables: []
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- LICENSE
|
|
24
|
+
- README.md
|
|
25
|
+
- examples/simple_calc.rb
|
|
26
|
+
- lib/packrat_parser.rb
|
|
27
|
+
- lib/packrat_parser/base.rb
|
|
28
|
+
- lib/packrat_parser/parser.rb
|
|
29
|
+
- lib/packrat_parser/result.rb
|
|
30
|
+
- lib/packrat_parser/version.rb
|
|
31
|
+
homepage: https://github.com/shugo/packrat_parser
|
|
32
|
+
licenses:
|
|
33
|
+
- MIT
|
|
34
|
+
metadata: {}
|
|
35
|
+
rdoc_options: []
|
|
36
|
+
require_paths:
|
|
37
|
+
- lib
|
|
38
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
39
|
+
requirements:
|
|
40
|
+
- - ">="
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: 3.0.0
|
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ">="
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '0'
|
|
48
|
+
requirements: []
|
|
49
|
+
rubygems_version: 4.0.10
|
|
50
|
+
specification_version: 4
|
|
51
|
+
summary: A packrat / PEG parser-combinator library with a Scala-inspired API.
|
|
52
|
+
test_files: []
|