parser_combinator_dsl 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/base_parsers.rb +203 -0
- data/lib/combinators.rb +112 -0
- data/lib/grammar.rb +35 -0
- data/lib/parser.rb +15 -0
- data/lib/parser_combinator_dsl.rb +1 -0
- data/lib/parser_result.rb +28 -0
- data/test/spec_helpers.rb +13 -0
- data/test/test_base_parsers.rb +186 -0
- data/test/test_combinators.rb +129 -0
- data/test/test_json_demo.rb +78 -0
- data/test/test_tutorial.rb +31 -0
- metadata +57 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8ebac413cd45e47ff7de3b0b5d4aeb6c98dd265434620206e2ffd5c2d9f910ea
|
4
|
+
data.tar.gz: 9ef20b739416d15763b8a1c9451245a54a509ebd9e0436046a735d3db0200575
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c357783a6d1dd1009f7f3a329f7647764de90378e8ddc671f01d644080336886e950a009388c822772fd8fb05366a1025241966be7930e508dca6dd06878fcac
|
7
|
+
data.tar.gz: 6073c6130917b7eeeb21d597224f4349732e4075454d6634cafcb2248bed3a95c627d687fa4638604158ce3dc62a07ed730e616a799f2c5530609919a358e84c
|
data/lib/base_parsers.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
require_relative "parser_result"
|
2
|
+
|
3
|
+
module BaseParsers
|
4
|
+
def eof
|
5
|
+
Parser.new do |input|
|
6
|
+
if input == "" || input.nil?
|
7
|
+
ParserResult.ok(matched: "", remaining: input)
|
8
|
+
else
|
9
|
+
ParserResult.fail(input)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty
|
15
|
+
Parser.new do |input|
|
16
|
+
ParserResult.ok(matched: "", remaining: input)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def whitespace
|
21
|
+
many0 { anyChar([' '] + %w[\b \f \n \r \t]) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def one(char)
|
25
|
+
Parser.new do |input|
|
26
|
+
if input[0] == char
|
27
|
+
ParserResult.ok(matched: char, remaining: input[1..-1])
|
28
|
+
else
|
29
|
+
ParserResult.fail(input)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def str(string)
|
35
|
+
Parser.new do |input|
|
36
|
+
if input.start_with?(string)
|
37
|
+
ParserResult.ok(matched: string, remaining: input[string.length..-1])
|
38
|
+
else
|
39
|
+
ParserResult.fail(input)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def anyLetter
|
45
|
+
anyChar(('a'..'z').to_a + ('A'..'Z').to_a)
|
46
|
+
end
|
47
|
+
|
48
|
+
def anyNumber
|
49
|
+
anyChar ('0'..'9').to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
def many1(&wrapper)
|
53
|
+
Parser.new do |input|
|
54
|
+
matched = ""
|
55
|
+
remaining = input
|
56
|
+
parser = wrapper.call
|
57
|
+
|
58
|
+
loop do
|
59
|
+
result = parser.run(remaining)
|
60
|
+
break if remaining.nil? || result.fail?
|
61
|
+
matched = matched + result.matched
|
62
|
+
remaining = result.remaining
|
63
|
+
end
|
64
|
+
|
65
|
+
ParserResult.new(!matched.empty?, remaining, matched)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def many0(&wrapper)
|
70
|
+
Parser.new do |input|
|
71
|
+
if input.nil? || input == ""
|
72
|
+
ParserResult.ok(matched: "", remaining: input)
|
73
|
+
else
|
74
|
+
many1(&wrapper).run(input)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def seq(*args)
|
80
|
+
callback = args[-1]
|
81
|
+
parsers = args[0..(args.length - 2)]
|
82
|
+
|
83
|
+
raise "Seq expects at least a parser and a callback." if callback.nil? || parsers.empty?
|
84
|
+
|
85
|
+
Parser.new do |input|
|
86
|
+
remaining = input
|
87
|
+
matched = ""
|
88
|
+
|
89
|
+
new_args = parsers.map do |parser|
|
90
|
+
result = parser.run(remaining)
|
91
|
+
return ParserResult.fail(input) unless result.ok?
|
92
|
+
remaining = result.remaining
|
93
|
+
result.matched
|
94
|
+
end
|
95
|
+
|
96
|
+
callback.call(*new_args)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# This is just an alias of lambda in the DSL. See specs for more on this.
|
101
|
+
#
|
102
|
+
def satisfy(&wrapper)
|
103
|
+
Parser.new do |input|
|
104
|
+
wrapper.call(input)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def regex(re)
|
109
|
+
Parser.new do |input|
|
110
|
+
test regex: re, with: input
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def match(rule, between:)
|
115
|
+
first, last = between
|
116
|
+
Parser.new do |input|
|
117
|
+
lhs = first.run(input)
|
118
|
+
if lhs.ok?
|
119
|
+
middle = rule.run(lhs.remaining)
|
120
|
+
if middle.ok?
|
121
|
+
rhs = last.run(middle.remaining)
|
122
|
+
if rhs.ok?
|
123
|
+
rhs
|
124
|
+
else
|
125
|
+
ParserResult.fail(input)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
ParserResult.fail(input)
|
129
|
+
end
|
130
|
+
else
|
131
|
+
ParserResult.fail(input)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def anyChar(chars)
|
137
|
+
Parser.new do |input|
|
138
|
+
first_char = input[0]
|
139
|
+
result = ParserResult.fail(input)
|
140
|
+
|
141
|
+
chars.each do |char|
|
142
|
+
if first_char == char
|
143
|
+
result = ParserResult.ok(matched: char, remaining: input[1..-1])
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
result
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def anyCharBut(chars)
|
153
|
+
Parser.new do |input|
|
154
|
+
first_char = input[0]
|
155
|
+
result = ParserResult.ok(matched: first_char, remaining: input[1..-1])
|
156
|
+
|
157
|
+
chars.each do |char|
|
158
|
+
if first_char == char
|
159
|
+
result = ParserResult.fail(input)
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
result
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def exactly(n, &wrapper)
|
169
|
+
parser = wrapper.call
|
170
|
+
Parser.new do |input|
|
171
|
+
matched = ""
|
172
|
+
remaining = input
|
173
|
+
success = true
|
174
|
+
|
175
|
+
n.to_i.times do
|
176
|
+
result = parser.run(remaining)
|
177
|
+
if result.fail?
|
178
|
+
success = false
|
179
|
+
break
|
180
|
+
end
|
181
|
+
matched = matched + result.matched
|
182
|
+
remaining = result.remaining
|
183
|
+
end
|
184
|
+
|
185
|
+
if success
|
186
|
+
ParserResult.ok(matched: matched, remaining: remaining)
|
187
|
+
else
|
188
|
+
ParserResult.fail(input)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
private
|
194
|
+
|
195
|
+
# Test against a simple regex, no groups. It would be possible to pass a callback
|
196
|
+
# to the regex, in order to work with groups. #MAYBE #TODO
|
197
|
+
def test(regex:, with:)
|
198
|
+
match = regex.match(with)
|
199
|
+
return ParserResult.fail(with) if match.nil?
|
200
|
+
matched = match[0]
|
201
|
+
ParserResult.ok(matched: matched, remaining: with[matched.length..-1])
|
202
|
+
end
|
203
|
+
end
|
data/lib/combinators.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# Combinators allow us to "combine" parsers together.
|
2
|
+
# For example: run this parser first, if it fails, run this other one
|
3
|
+
# run this parser first, and then run this other parser
|
4
|
+
module Combinators
|
5
|
+
# Logical OR.
|
6
|
+
# Usage:
|
7
|
+
# myParser | otherParser
|
8
|
+
#
|
9
|
+
def |(other)
|
10
|
+
Parser.new do |input|
|
11
|
+
first = run(input)
|
12
|
+
if first.ok?
|
13
|
+
first
|
14
|
+
else
|
15
|
+
other.run(input)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Logical AND.
|
21
|
+
# Usage:
|
22
|
+
# myParser >> otherParser
|
23
|
+
#
|
24
|
+
def >>(other)
|
25
|
+
Parser.new do |input|
|
26
|
+
first = run(input)
|
27
|
+
matched = ""
|
28
|
+
if first.ok?
|
29
|
+
matched = matched + first.matched
|
30
|
+
second = other.run(first.remaining)
|
31
|
+
if second.ok?
|
32
|
+
matched = matched + second.matched
|
33
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
34
|
+
else
|
35
|
+
ParserResult.fail(input)
|
36
|
+
end
|
37
|
+
else
|
38
|
+
first
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Match this, other is optional
|
44
|
+
def >(other)
|
45
|
+
Parser.new do |input|
|
46
|
+
first = run(input)
|
47
|
+
matched = ""
|
48
|
+
if first.ok?
|
49
|
+
matched = first.matched
|
50
|
+
second = other.run(first.remaining)
|
51
|
+
if second.ok?
|
52
|
+
matched = matched + second.matched
|
53
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
54
|
+
else
|
55
|
+
first
|
56
|
+
end
|
57
|
+
else
|
58
|
+
ParserResult.fail(input)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Match other, this is optional
|
64
|
+
def <(other)
|
65
|
+
Parser.new do |input|
|
66
|
+
first = run(input)
|
67
|
+
matched = ""
|
68
|
+
remaining = input
|
69
|
+
|
70
|
+
if first.ok?
|
71
|
+
matched = first.matched
|
72
|
+
remaining = first.remaining
|
73
|
+
end
|
74
|
+
|
75
|
+
second = other.run(remaining)
|
76
|
+
if second.ok?
|
77
|
+
matched = matched + second.matched
|
78
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
79
|
+
else
|
80
|
+
ParserResult.fail(input)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Match this, other is ignored but consumed
|
86
|
+
def >=(other)
|
87
|
+
Parser.new do |input|
|
88
|
+
first = run(input)
|
89
|
+
if first.ok?
|
90
|
+
second = other.run(first.remaining)
|
91
|
+
if second.ok?
|
92
|
+
ParserResult.ok(matched: first.matched, remaining: second.remaining)
|
93
|
+
else
|
94
|
+
first
|
95
|
+
end
|
96
|
+
else
|
97
|
+
ParserResult.fail(input)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Match other, this is ignored but consumed
|
103
|
+
def <=(other)
|
104
|
+
Parser.new do |input|
|
105
|
+
first = run(input)
|
106
|
+
remaining = input
|
107
|
+
remaining = first.remaining if first.ok?
|
108
|
+
second = other.run(remaining)
|
109
|
+
second.ok? ? second : ParserResult.fail(input)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
data/lib/grammar.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative "base_parsers"
|
2
|
+
require_relative "parser"
|
3
|
+
|
4
|
+
# This is the main DSL interface. It builds up grammar rules and sets up the
|
5
|
+
# DSL.
|
6
|
+
#
|
7
|
+
class Grammar
|
8
|
+
class << self
|
9
|
+
include BaseParsers
|
10
|
+
|
11
|
+
def build(&block)
|
12
|
+
raise "Must provide a block" unless block_given?
|
13
|
+
@rules = {}
|
14
|
+
instance_eval &block
|
15
|
+
end
|
16
|
+
|
17
|
+
def rule(name, &wrapper)
|
18
|
+
return @rules.fetch(name.to_sym) { raise "Could not find rule: #{name}"} if wrapper.nil?
|
19
|
+
@rules[name.to_sym] = Parser.new { |input| wrapper.call.run(input) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def start(name)
|
23
|
+
@rules[name]
|
24
|
+
end
|
25
|
+
|
26
|
+
# Aliases for DSL
|
27
|
+
def ok(*args)
|
28
|
+
ParserResult.ok(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def fail(*args)
|
32
|
+
ParserResult.fail(*args)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative "combinators"
|
2
|
+
|
3
|
+
class Parser
|
4
|
+
include Combinators
|
5
|
+
|
6
|
+
attr_reader :parser
|
7
|
+
def initialize(&block)
|
8
|
+
raise "Invalid argument, must provide a block" unless block_given?
|
9
|
+
@parser = block
|
10
|
+
end
|
11
|
+
|
12
|
+
def run(input)
|
13
|
+
parser.call(input)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'grammar'
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class ParserResult
|
2
|
+
attr_reader :success, :remaining, :matched
|
3
|
+
def initialize(success, remaining, matched)
|
4
|
+
@success = success
|
5
|
+
@remaining = remaining
|
6
|
+
@matched = matched
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.ok(matched:, remaining:)
|
10
|
+
ParserResult.new(true, remaining, matched)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.fail(remaining)
|
14
|
+
ParserResult.new(false, remaining, "")
|
15
|
+
end
|
16
|
+
|
17
|
+
def ok?
|
18
|
+
success
|
19
|
+
end
|
20
|
+
|
21
|
+
def fail?
|
22
|
+
success == false
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(other)
|
26
|
+
return other.instance_of?(self.class) && other.success == success && other.remaining == remaining && other.matched == matched
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
def assert_parses(parser, with:, remaining:, matched: nil, should_fail: false)
|
2
|
+
result = parser.run(with)
|
3
|
+
assert_equal !should_fail, result.success
|
4
|
+
assert_equal remaining, result.remaining
|
5
|
+
assert_equal matched, result.matched unless matched.nil?
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_parser(parser, with:, should_fail: false)
|
9
|
+
assert_equal !should_fail, parser.run(with).success
|
10
|
+
end
|
11
|
+
|
12
|
+
# Require everything in `/lib`
|
13
|
+
Dir[File.join(File.dirname(__FILE__), '../lib/**/*.rb')].each { |f| require f }
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
describe Grammar do
|
6
|
+
describe "Built-in combinators" do
|
7
|
+
it "matches eof" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:foo) { eof }
|
10
|
+
start(:foo)
|
11
|
+
end
|
12
|
+
|
13
|
+
assert_parses parser, with: "", remaining: ""
|
14
|
+
assert_parses parser, with: "asd", remaining: "asd", should_fail: true
|
15
|
+
end
|
16
|
+
|
17
|
+
it "matches empty" do
|
18
|
+
parser = Grammar.build do
|
19
|
+
rule(:foo) { empty }
|
20
|
+
start(:foo)
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_parses parser, with: "asd", remaining: "asd"
|
24
|
+
assert_parses parser, with: "", remaining: ""
|
25
|
+
end
|
26
|
+
|
27
|
+
it "matches whitespace" do
|
28
|
+
parser = Grammar.build do
|
29
|
+
rule(:foo) { whitespace }
|
30
|
+
start(:foo)
|
31
|
+
end
|
32
|
+
|
33
|
+
assert_parses parser, with: " asd", remaining: "asd"
|
34
|
+
assert_parses parser, with: "", remaining: ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it "must parse one" do
|
38
|
+
parser = Grammar.build do
|
39
|
+
rule(:one) { one "a" }
|
40
|
+
start(:one)
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_parses parser, with: "abc", remaining: "bc"
|
44
|
+
end
|
45
|
+
|
46
|
+
it "must parse str" do
|
47
|
+
parser = Grammar.build do
|
48
|
+
rule(:foo) { str "foo" }
|
49
|
+
start(:foo)
|
50
|
+
end
|
51
|
+
|
52
|
+
assert_parses parser, with: "foo", remaining: ""
|
53
|
+
assert_parses parser, with: "foobar", remaining: "bar"
|
54
|
+
assert_parses parser, with: "fobar", remaining: "fobar", should_fail: true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
it "can make rules by hand" do
|
59
|
+
parser = Grammar.build do
|
60
|
+
rule(:foo) { Parser.new { |input| input == "foo" ? ok(matched: "foo", remaining: "") : fail(input) } }
|
61
|
+
start(:foo)
|
62
|
+
end
|
63
|
+
|
64
|
+
assert_parses parser, with: "foo", remaining: ""
|
65
|
+
end
|
66
|
+
|
67
|
+
it "matching rules by hand is the same as satisfy" do
|
68
|
+
parser = Grammar.build do
|
69
|
+
rule(:foo) { satisfy { |input| input == "foo" ? ok(matched: "foo", remaining: "") : fail(input) } }
|
70
|
+
start(:foo)
|
71
|
+
end
|
72
|
+
|
73
|
+
assert_parses parser, with: "foo", remaining: ""
|
74
|
+
end
|
75
|
+
|
76
|
+
it "matches anyLetter" do
|
77
|
+
parser = Grammar.build do
|
78
|
+
rule(:any) { anyLetter }
|
79
|
+
start(:any)
|
80
|
+
end
|
81
|
+
|
82
|
+
assert_parses parser, with: "abzx", remaining: "bzx"
|
83
|
+
assert_parses parser, with: "Znasd", remaining: "nasd"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "matches anyNumber" do
|
87
|
+
parser = Grammar.build do
|
88
|
+
rule(:any) { anyNumber }
|
89
|
+
start(:any)
|
90
|
+
end
|
91
|
+
|
92
|
+
assert_parses parser, with: "12asd3", remaining: "2asd3"
|
93
|
+
assert_parses parser, with: "32asd", remaining: "2asd"
|
94
|
+
end
|
95
|
+
|
96
|
+
it "matches many1" do
|
97
|
+
parser = Grammar.build do
|
98
|
+
rule(:word) { many1 { anyLetter } }
|
99
|
+
start(:word)
|
100
|
+
end
|
101
|
+
|
102
|
+
assert_parses parser, with: "asd123", remaining: "123"
|
103
|
+
end
|
104
|
+
|
105
|
+
it "matches many0" do
|
106
|
+
parser = Grammar.build do
|
107
|
+
rule(:word) { many0 { anyLetter } }
|
108
|
+
start(:word)
|
109
|
+
end
|
110
|
+
|
111
|
+
assert_parses parser, with: "", remaining: ""
|
112
|
+
assert_parses parser, with: "abcde", remaining: ""
|
113
|
+
end
|
114
|
+
|
115
|
+
it "matches using seq" do
|
116
|
+
parser = Grammar.build do
|
117
|
+
rule(:letter) { many1 { anyLetter } }
|
118
|
+
rule(:number) { many0 { anyNumber } }
|
119
|
+
rule(:letterOrNumber) { seq rule(:letter), rule(:number), lambda { |letter, number| [letter, number] } }
|
120
|
+
start(:letterOrNumber)
|
121
|
+
end
|
122
|
+
|
123
|
+
assert_equal ["w", "8"], parser.run("w8")
|
124
|
+
|
125
|
+
parser = Grammar.build do
|
126
|
+
rule(:letter) { many1 { anyLetter } }
|
127
|
+
rule(:letterOrNumber) { seq rule(:letter), anyNumber, lambda { |letter, number| [letter, number] } }
|
128
|
+
start(:letterOrNumber)
|
129
|
+
end
|
130
|
+
|
131
|
+
assert_equal ["w", "8"], parser.run("w8")
|
132
|
+
end
|
133
|
+
|
134
|
+
it "uses regex" do
|
135
|
+
parser = Grammar.build do
|
136
|
+
rule(:foo) { regex /foo/ }
|
137
|
+
start(:foo)
|
138
|
+
end
|
139
|
+
|
140
|
+
assert_parses parser, with: "foo", remaining: ""
|
141
|
+
end
|
142
|
+
|
143
|
+
it "matches between" do
|
144
|
+
parser = Grammar.build do
|
145
|
+
rule(:quote) { one '"' }
|
146
|
+
rule(:foo) { match (many1 { anyLetter }), between: [rule(:quote), rule(:quote)] }
|
147
|
+
end
|
148
|
+
|
149
|
+
assert_parses parser, with: '"hi"', remaining: ''
|
150
|
+
end
|
151
|
+
|
152
|
+
it "matches anyChar" do
|
153
|
+
parser = Grammar.build do
|
154
|
+
rule(:foo) { anyChar ['a', 'b'] }
|
155
|
+
start(:foo)
|
156
|
+
end
|
157
|
+
|
158
|
+
assert_parses parser, with: "asd", remaining: "sd"
|
159
|
+
assert_parses parser, with: "bsd", remaining: "sd"
|
160
|
+
assert_parses parser, with: "c", remaining: "c", should_fail: true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "matches anyCharBut" do
|
164
|
+
parser = Grammar.build do
|
165
|
+
rule(:foo) { anyCharBut ['a', 'b'] }
|
166
|
+
start(:foo)
|
167
|
+
end
|
168
|
+
|
169
|
+
assert_parses parser, with: "c", remaining: ""
|
170
|
+
assert_parses parser, with: "d", remaining: ""
|
171
|
+
assert_parses parser, with: "a", remaining: "a", should_fail: true
|
172
|
+
assert_parses parser, with: "b", remaining: "b", should_fail: true
|
173
|
+
end
|
174
|
+
|
175
|
+
it "matches exactly n times" do
|
176
|
+
parser = Grammar.build do
|
177
|
+
rule(:foo) { exactly(4) { anyLetter } }
|
178
|
+
start(:foo)
|
179
|
+
end
|
180
|
+
|
181
|
+
assert_parses parser, with: "abcde", remaining: "e"
|
182
|
+
assert_parses parser, with: "abcd", remaining: ""
|
183
|
+
assert_parses parser, with: "a", remaining: "a", should_fail: true
|
184
|
+
assert_parses parser, with: "abc", remaining: "abc", should_fail: true
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
describe Grammar do
|
6
|
+
describe "|" do
|
7
|
+
it "works with a single branch" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:letter) { many1 { anyLetter } }
|
10
|
+
rule(:number) { many0 { anyNumber } }
|
11
|
+
rule(:letterOrNumber) { rule(:letter) | rule(:number) }
|
12
|
+
start(:letterOrNumber)
|
13
|
+
end
|
14
|
+
|
15
|
+
assert_parses parser, with: "n", remaining: "", matched: "n"
|
16
|
+
assert_parses parser, with: "6", remaining: "", matched: "6"
|
17
|
+
assert_parses parser, with: "", remaining: "", matched: ""
|
18
|
+
end
|
19
|
+
|
20
|
+
it "works with multiple branches" do
|
21
|
+
parser = Grammar.build do
|
22
|
+
rule(:letter) { many1 { anyLetter } }
|
23
|
+
rule(:number) { many1 { anyNumber } }
|
24
|
+
rule(:letterOrNumber) { rule(:letter) | rule(:number) | eof }
|
25
|
+
start(:letterOrNumber)
|
26
|
+
end
|
27
|
+
|
28
|
+
assert_parses parser, with: "n", remaining: "", matched: "n"
|
29
|
+
assert_parses parser, with: "6", remaining: "", matched: "6"
|
30
|
+
assert_parses parser, with: "", remaining: "", matched: ""
|
31
|
+
end
|
32
|
+
|
33
|
+
it "works with satisfy" do
|
34
|
+
parser = Grammar.build do
|
35
|
+
rule(:letter) { many1 { anyLetter } }
|
36
|
+
rule(:letterOr1) { rule(:letter) | (satisfy { |input| input == "1" ? ok(matched: "1", remaining: "") : fail(input) }) }
|
37
|
+
start(:letterOr1)
|
38
|
+
end
|
39
|
+
|
40
|
+
assert_parses parser, with: "n", remaining: ""
|
41
|
+
assert_parses parser, with: "1", remaining: ""
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe ">>" do
|
46
|
+
it "works with a single branch" do
|
47
|
+
parser = Grammar.build do
|
48
|
+
rule(:letter) { many1 { anyLetter } }
|
49
|
+
rule(:number) { many0 { anyNumber } }
|
50
|
+
rule(:letterAndNumber) { rule(:letter) >> rule(:number) }
|
51
|
+
start(:letterAndNumber)
|
52
|
+
end
|
53
|
+
|
54
|
+
assert_parses parser, with: "foo123", remaining: "", matched: "foo123"
|
55
|
+
end
|
56
|
+
|
57
|
+
it "works with multiple branches" do
|
58
|
+
parser = Grammar.build do
|
59
|
+
rule(:letter) { many1 { anyLetter } }
|
60
|
+
rule(:number) { many0 { anyNumber } }
|
61
|
+
rule(:foo) { rule(:letter) >> rule(:number) >> rule(:letter) }
|
62
|
+
start(:foo)
|
63
|
+
end
|
64
|
+
|
65
|
+
assert_parses parser, with: "foo123asd", remaining: "", matched: "foo123asd"
|
66
|
+
assert_parses parser, with: "foo123", remaining: "foo123", should_fail: true
|
67
|
+
end
|
68
|
+
|
69
|
+
it "works with rules and satisfies" do
|
70
|
+
parser = Grammar.build do
|
71
|
+
rule(:letter) { many1 { anyLetter } }
|
72
|
+
rule(:letterAndNumber) { rule(:letter) >> many0 { anyNumber } }
|
73
|
+
start(:letterAndNumber)
|
74
|
+
end
|
75
|
+
|
76
|
+
assert_parses parser, with: "foo123", remaining: ""
|
77
|
+
assert_parses parser, with: "foo", remaining: ""
|
78
|
+
assert_parses parser, with: "123a", remaining: "123a", should_fail: true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
describe ">" do
|
83
|
+
it "works with a single branch" do
|
84
|
+
parser = Grammar.build do
|
85
|
+
rule(:letter) { many1 { anyLetter } }
|
86
|
+
rule(:foo) { (rule :letter) > whitespace }
|
87
|
+
start(:foo)
|
88
|
+
end
|
89
|
+
|
90
|
+
assert_parses parser, with: "foo ", remaining: "", matched: "foo "
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "<" do
|
95
|
+
it "works with a single branch" do
|
96
|
+
parser = Grammar.build do
|
97
|
+
rule(:letter) { many1 { anyLetter } }
|
98
|
+
rule(:foo) { whitespace < (rule :letter) }
|
99
|
+
start(:foo)
|
100
|
+
end
|
101
|
+
|
102
|
+
assert_parses parser, with: " foo", remaining: "", matched: " foo"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe ">=" do
|
107
|
+
it "works with a single branch" do
|
108
|
+
parser = Grammar.build do
|
109
|
+
rule(:letter) { many1 { anyLetter } }
|
110
|
+
rule(:foo) { (rule :letter) >= whitespace }
|
111
|
+
start(:foo)
|
112
|
+
end
|
113
|
+
|
114
|
+
assert_parses parser, with: "foo ", remaining: "", matched: "foo"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "<=" do
|
119
|
+
it "matches second, first is ignored but consumed" do
|
120
|
+
parser = Grammar.build do
|
121
|
+
rule(:letter) { many1 { anyLetter } }
|
122
|
+
rule(:foo) { whitespace <= (rule :letter) }
|
123
|
+
start(:foo)
|
124
|
+
end
|
125
|
+
|
126
|
+
assert_parses parser, with: " foo", remaining: "", matched: "foo"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
# Build a grammar parsing JSON.
|
6
|
+
describe Grammar do
|
7
|
+
it "parses JSON" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
# =======================================================================
|
10
|
+
# Here `>` means right hand size is optional. `<` means left size is
|
11
|
+
# optional.
|
12
|
+
# You can think of `>` and `<` as an open duck mouth, the duck eats the
|
13
|
+
# mandatory part, ignores the other. #PrimarySchoolHacks
|
14
|
+
#
|
15
|
+
# `>>` means "and then" and `|` means "or else try this".
|
16
|
+
#
|
17
|
+
# Something similar happens with `>=` and `<=`, see `README.md` for more
|
18
|
+
# info on binary combinators.
|
19
|
+
# =======================================================================
|
20
|
+
|
21
|
+
# Simple stuff
|
22
|
+
rule(:bopen) { (one "{") > whitespace }
|
23
|
+
rule(:bclose) { whitespace < (one "}") }
|
24
|
+
rule(:semicolon) { whitespace < (one ":") > whitespace }
|
25
|
+
rule(:comma) { whitespace < (one ",") > whitespace }
|
26
|
+
rule(:quote) { one '"' }
|
27
|
+
rule(:true) { str "true" }
|
28
|
+
rule(:false) { str "false" }
|
29
|
+
rule(:null) { str "null" }
|
30
|
+
|
31
|
+
# string
|
32
|
+
rule(:hexdigit) { anyChar %w[0 1 2 3 4 5 6 7 8 9 a b c d e f] }
|
33
|
+
rule(:hexdigits) { (one "u") >> (exactly(4) { (rule :hexdigit) }) }
|
34
|
+
rule(:any_escaped_char) { (one "\\") >> ((anyChar %w[" \\ / b f n r t]) | (rule :hexdigits)) }
|
35
|
+
rule(:any_unescaped_char) { (anyCharBut %w[" \\]) }
|
36
|
+
rule(:string_char) { (rule :any_unescaped_char) | (rule :any_escaped_char) }
|
37
|
+
rule(:string) { match (many0 { (rule :string_char) }), between: [(rule :quote), (rule :quote)] }
|
38
|
+
|
39
|
+
# number
|
40
|
+
rule(:decimal) { (one '.') >> many1 { anyNumber } }
|
41
|
+
rule(:cientific) { (anyChar %w[e E]) >> (anyChar %w[+ -]) >> many1 { anyNumber } }
|
42
|
+
rule(:decimal_or_cientific) { (rule :decimal) > (rule :cientific) }
|
43
|
+
rule(:positive_number) { ((one "0") | many1 { anyNumber }) > (rule :decimal_or_cientific) }
|
44
|
+
rule(:number) { (one "-") < (rule :positive_number) }
|
45
|
+
|
46
|
+
# array
|
47
|
+
rule(:array_body) { (rule :value_group) | empty }
|
48
|
+
rule(:array) { match (rule :array_body), between: [(one "["), (one "]")] }
|
49
|
+
|
50
|
+
# other stuff
|
51
|
+
rule(:value_group) { ((rule :value) >> (rule :comma) >> (rule :value_group)) | (rule :value) }
|
52
|
+
rule(:value) { (rule :string) | (rule :number) | (rule :object) | (rule :array) | (rule :true) | (rule :false) | (rule :null) }
|
53
|
+
|
54
|
+
rule(:pair) { (rule :string) >> (rule :semicolon) >> (rule :value) }
|
55
|
+
rule(:pair_group) { ((rule :pair) >> (rule :comma) >> (rule :pair_group)) | (rule :pair) }
|
56
|
+
rule(:pair_body) { (rule :pair_group) | empty }
|
57
|
+
rule(:object) { match (rule :pair_body), between: [(rule :bopen), (rule :bclose)] }
|
58
|
+
|
59
|
+
# The last rule is always the starting rule, but let's make things clear
|
60
|
+
start(:object)
|
61
|
+
end
|
62
|
+
|
63
|
+
test_parser parser, with: '{}'
|
64
|
+
test_parser parser, with: '{ "foo": 123 }'
|
65
|
+
test_parser parser, with: '{ "foo": 0.321 }'
|
66
|
+
test_parser parser, with: '{ "foo": 1.5 }'
|
67
|
+
test_parser parser, with: '{ "foo": 1.5e-5 }'
|
68
|
+
test_parser parser, with: '{ "foo": false,"b\\nar" : true }'
|
69
|
+
test_parser parser, with: '{ "foo": { "bar": "baz\\u1235" } }'
|
70
|
+
test_parser parser, with: '{ "foo": [] }'
|
71
|
+
test_parser parser, with: '{ "foo": [1] }'
|
72
|
+
test_parser parser, with: '{ "foo": [1, 2, 3, 4] }'
|
73
|
+
# Some error cases
|
74
|
+
test_parser parser, with: '{ "foo": { "bar": "baz\\u125" } }', should_fail: true
|
75
|
+
test_parser parser, with: '{ "foo": [1, 2, 3, 4,] }', should_fail: true
|
76
|
+
test_parser parser, with: '{ "foo": 123, }', should_fail: true
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
# Build a grammar parsing JSON.
|
6
|
+
describe Grammar do
|
7
|
+
it "step 1" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:assign) { many1 { anyLetter } >> (str " = ") >> anyNumber }
|
10
|
+
|
11
|
+
start(:assign)
|
12
|
+
end
|
13
|
+
|
14
|
+
parser.run("foo = 1").ok?.must_equal true
|
15
|
+
parser.run("bar = 3").ok?.must_equal true
|
16
|
+
parser.run("baz = 9").ok?.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "step 2" do
|
20
|
+
parser = Grammar.build do
|
21
|
+
rule(:equals) { whitespace < (one "=") > whitespace }
|
22
|
+
rule(:assign) { many1 { anyLetter } >> (rule :equals) >> anyNumber }
|
23
|
+
|
24
|
+
start(:assign)
|
25
|
+
end
|
26
|
+
|
27
|
+
parser.run("foo = 1").ok?.must_equal true
|
28
|
+
parser.run("bar =3").ok?.must_equal true
|
29
|
+
parser.run("baz= 9").ok?.must_equal true
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parser_combinator_dsl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Federico Ramirez
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: "\n\tThis library provides a DSL which you can use to easily generate
|
14
|
+
parsers in Ruby.\n\n\tAt it's core, it's a parser combinator library, but you don't
|
15
|
+
need to worry about that. You build more complex expression based on simple ones,
|
16
|
+
and match any formal language you want.\n\t"
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/base_parsers.rb
|
23
|
+
- lib/combinators.rb
|
24
|
+
- lib/grammar.rb
|
25
|
+
- lib/parser.rb
|
26
|
+
- lib/parser_combinator_dsl.rb
|
27
|
+
- lib/parser_result.rb
|
28
|
+
- test/spec_helpers.rb
|
29
|
+
- test/test_base_parsers.rb
|
30
|
+
- test/test_combinators.rb
|
31
|
+
- test/test_json_demo.rb
|
32
|
+
- test/test_tutorial.rb
|
33
|
+
homepage: https://github.com/moddx/parser-combinator
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 2.7.6
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: A parser combinator in Ruby, with a pretty DSL
|
57
|
+
test_files: []
|