parser_combinator_dsl 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/base_parsers.rb +203 -0
- data/lib/combinators.rb +112 -0
- data/lib/grammar.rb +35 -0
- data/lib/parser.rb +15 -0
- data/lib/parser_combinator_dsl.rb +1 -0
- data/lib/parser_result.rb +28 -0
- data/test/spec_helpers.rb +13 -0
- data/test/test_base_parsers.rb +186 -0
- data/test/test_combinators.rb +129 -0
- data/test/test_json_demo.rb +78 -0
- data/test/test_tutorial.rb +31 -0
- metadata +57 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8ebac413cd45e47ff7de3b0b5d4aeb6c98dd265434620206e2ffd5c2d9f910ea
|
4
|
+
data.tar.gz: 9ef20b739416d15763b8a1c9451245a54a509ebd9e0436046a735d3db0200575
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c357783a6d1dd1009f7f3a329f7647764de90378e8ddc671f01d644080336886e950a009388c822772fd8fb05366a1025241966be7930e508dca6dd06878fcac
|
7
|
+
data.tar.gz: 6073c6130917b7eeeb21d597224f4349732e4075454d6634cafcb2248bed3a95c627d687fa4638604158ce3dc62a07ed730e616a799f2c5530609919a358e84c
|
data/lib/base_parsers.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
require_relative "parser_result"
|
2
|
+
|
3
|
+
module BaseParsers
|
4
|
+
def eof
|
5
|
+
Parser.new do |input|
|
6
|
+
if input == "" || input.nil?
|
7
|
+
ParserResult.ok(matched: "", remaining: input)
|
8
|
+
else
|
9
|
+
ParserResult.fail(input)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def empty
|
15
|
+
Parser.new do |input|
|
16
|
+
ParserResult.ok(matched: "", remaining: input)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def whitespace
|
21
|
+
many0 { anyChar([' '] + %w[\b \f \n \r \t]) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def one(char)
|
25
|
+
Parser.new do |input|
|
26
|
+
if input[0] == char
|
27
|
+
ParserResult.ok(matched: char, remaining: input[1..-1])
|
28
|
+
else
|
29
|
+
ParserResult.fail(input)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def str(string)
|
35
|
+
Parser.new do |input|
|
36
|
+
if input.start_with?(string)
|
37
|
+
ParserResult.ok(matched: string, remaining: input[string.length..-1])
|
38
|
+
else
|
39
|
+
ParserResult.fail(input)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def anyLetter
|
45
|
+
anyChar(('a'..'z').to_a + ('A'..'Z').to_a)
|
46
|
+
end
|
47
|
+
|
48
|
+
def anyNumber
|
49
|
+
anyChar ('0'..'9').to_a
|
50
|
+
end
|
51
|
+
|
52
|
+
def many1(&wrapper)
|
53
|
+
Parser.new do |input|
|
54
|
+
matched = ""
|
55
|
+
remaining = input
|
56
|
+
parser = wrapper.call
|
57
|
+
|
58
|
+
loop do
|
59
|
+
result = parser.run(remaining)
|
60
|
+
break if remaining.nil? || result.fail?
|
61
|
+
matched = matched + result.matched
|
62
|
+
remaining = result.remaining
|
63
|
+
end
|
64
|
+
|
65
|
+
ParserResult.new(!matched.empty?, remaining, matched)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def many0(&wrapper)
|
70
|
+
Parser.new do |input|
|
71
|
+
if input.nil? || input == ""
|
72
|
+
ParserResult.ok(matched: "", remaining: input)
|
73
|
+
else
|
74
|
+
many1(&wrapper).run(input)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def seq(*args)
|
80
|
+
callback = args[-1]
|
81
|
+
parsers = args[0..(args.length - 2)]
|
82
|
+
|
83
|
+
raise "Seq expects at least a parser and a callback." if callback.nil? || parsers.empty?
|
84
|
+
|
85
|
+
Parser.new do |input|
|
86
|
+
remaining = input
|
87
|
+
matched = ""
|
88
|
+
|
89
|
+
new_args = parsers.map do |parser|
|
90
|
+
result = parser.run(remaining)
|
91
|
+
return ParserResult.fail(input) unless result.ok?
|
92
|
+
remaining = result.remaining
|
93
|
+
result.matched
|
94
|
+
end
|
95
|
+
|
96
|
+
callback.call(*new_args)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# This is just an alias of lambda in the DSL. See specs for more on this.
|
101
|
+
#
|
102
|
+
def satisfy(&wrapper)
|
103
|
+
Parser.new do |input|
|
104
|
+
wrapper.call(input)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def regex(re)
|
109
|
+
Parser.new do |input|
|
110
|
+
test regex: re, with: input
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def match(rule, between:)
|
115
|
+
first, last = between
|
116
|
+
Parser.new do |input|
|
117
|
+
lhs = first.run(input)
|
118
|
+
if lhs.ok?
|
119
|
+
middle = rule.run(lhs.remaining)
|
120
|
+
if middle.ok?
|
121
|
+
rhs = last.run(middle.remaining)
|
122
|
+
if rhs.ok?
|
123
|
+
rhs
|
124
|
+
else
|
125
|
+
ParserResult.fail(input)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
ParserResult.fail(input)
|
129
|
+
end
|
130
|
+
else
|
131
|
+
ParserResult.fail(input)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def anyChar(chars)
|
137
|
+
Parser.new do |input|
|
138
|
+
first_char = input[0]
|
139
|
+
result = ParserResult.fail(input)
|
140
|
+
|
141
|
+
chars.each do |char|
|
142
|
+
if first_char == char
|
143
|
+
result = ParserResult.ok(matched: char, remaining: input[1..-1])
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
result
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def anyCharBut(chars)
|
153
|
+
Parser.new do |input|
|
154
|
+
first_char = input[0]
|
155
|
+
result = ParserResult.ok(matched: first_char, remaining: input[1..-1])
|
156
|
+
|
157
|
+
chars.each do |char|
|
158
|
+
if first_char == char
|
159
|
+
result = ParserResult.fail(input)
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
result
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def exactly(n, &wrapper)
|
169
|
+
parser = wrapper.call
|
170
|
+
Parser.new do |input|
|
171
|
+
matched = ""
|
172
|
+
remaining = input
|
173
|
+
success = true
|
174
|
+
|
175
|
+
n.to_i.times do
|
176
|
+
result = parser.run(remaining)
|
177
|
+
if result.fail?
|
178
|
+
success = false
|
179
|
+
break
|
180
|
+
end
|
181
|
+
matched = matched + result.matched
|
182
|
+
remaining = result.remaining
|
183
|
+
end
|
184
|
+
|
185
|
+
if success
|
186
|
+
ParserResult.ok(matched: matched, remaining: remaining)
|
187
|
+
else
|
188
|
+
ParserResult.fail(input)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
private
|
194
|
+
|
195
|
+
# Test against a simple regex, no groups. It would be possible to pass a callback
|
196
|
+
# to the regex, in order to work with groups. #MAYBE #TODO
|
197
|
+
def test(regex:, with:)
|
198
|
+
match = regex.match(with)
|
199
|
+
return ParserResult.fail(with) if match.nil?
|
200
|
+
matched = match[0]
|
201
|
+
ParserResult.ok(matched: matched, remaining: with[matched.length..-1])
|
202
|
+
end
|
203
|
+
end
|
data/lib/combinators.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# Combinators allow us to "combine" parsers together.
|
2
|
+
# For example: run this parser first, if it fails, run this other one
|
3
|
+
# run this parser first, and then run this other parser
|
4
|
+
module Combinators
|
5
|
+
# Logical OR.
|
6
|
+
# Usage:
|
7
|
+
# myParser | otherParser
|
8
|
+
#
|
9
|
+
def |(other)
|
10
|
+
Parser.new do |input|
|
11
|
+
first = run(input)
|
12
|
+
if first.ok?
|
13
|
+
first
|
14
|
+
else
|
15
|
+
other.run(input)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Logical AND.
|
21
|
+
# Usage:
|
22
|
+
# myParser >> otherParser
|
23
|
+
#
|
24
|
+
def >>(other)
|
25
|
+
Parser.new do |input|
|
26
|
+
first = run(input)
|
27
|
+
matched = ""
|
28
|
+
if first.ok?
|
29
|
+
matched = matched + first.matched
|
30
|
+
second = other.run(first.remaining)
|
31
|
+
if second.ok?
|
32
|
+
matched = matched + second.matched
|
33
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
34
|
+
else
|
35
|
+
ParserResult.fail(input)
|
36
|
+
end
|
37
|
+
else
|
38
|
+
first
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Match this, other is optional
|
44
|
+
def >(other)
|
45
|
+
Parser.new do |input|
|
46
|
+
first = run(input)
|
47
|
+
matched = ""
|
48
|
+
if first.ok?
|
49
|
+
matched = first.matched
|
50
|
+
second = other.run(first.remaining)
|
51
|
+
if second.ok?
|
52
|
+
matched = matched + second.matched
|
53
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
54
|
+
else
|
55
|
+
first
|
56
|
+
end
|
57
|
+
else
|
58
|
+
ParserResult.fail(input)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Match other, this is optional
|
64
|
+
def <(other)
|
65
|
+
Parser.new do |input|
|
66
|
+
first = run(input)
|
67
|
+
matched = ""
|
68
|
+
remaining = input
|
69
|
+
|
70
|
+
if first.ok?
|
71
|
+
matched = first.matched
|
72
|
+
remaining = first.remaining
|
73
|
+
end
|
74
|
+
|
75
|
+
second = other.run(remaining)
|
76
|
+
if second.ok?
|
77
|
+
matched = matched + second.matched
|
78
|
+
ParserResult.ok(matched: matched, remaining: second.remaining)
|
79
|
+
else
|
80
|
+
ParserResult.fail(input)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Match this, other is ignored but consumed
|
86
|
+
def >=(other)
|
87
|
+
Parser.new do |input|
|
88
|
+
first = run(input)
|
89
|
+
if first.ok?
|
90
|
+
second = other.run(first.remaining)
|
91
|
+
if second.ok?
|
92
|
+
ParserResult.ok(matched: first.matched, remaining: second.remaining)
|
93
|
+
else
|
94
|
+
first
|
95
|
+
end
|
96
|
+
else
|
97
|
+
ParserResult.fail(input)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Match other, this is ignored but consumed
|
103
|
+
def <=(other)
|
104
|
+
Parser.new do |input|
|
105
|
+
first = run(input)
|
106
|
+
remaining = input
|
107
|
+
remaining = first.remaining if first.ok?
|
108
|
+
second = other.run(remaining)
|
109
|
+
second.ok? ? second : ParserResult.fail(input)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
data/lib/grammar.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative "base_parsers"
|
2
|
+
require_relative "parser"
|
3
|
+
|
4
|
+
# This is the main DSL interface. It builds up grammar rules and sets up the
|
5
|
+
# DSL.
|
6
|
+
#
|
7
|
+
class Grammar
|
8
|
+
class << self
|
9
|
+
include BaseParsers
|
10
|
+
|
11
|
+
def build(&block)
|
12
|
+
raise "Must provide a block" unless block_given?
|
13
|
+
@rules = {}
|
14
|
+
instance_eval &block
|
15
|
+
end
|
16
|
+
|
17
|
+
def rule(name, &wrapper)
|
18
|
+
return @rules.fetch(name.to_sym) { raise "Could not find rule: #{name}"} if wrapper.nil?
|
19
|
+
@rules[name.to_sym] = Parser.new { |input| wrapper.call.run(input) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def start(name)
|
23
|
+
@rules[name]
|
24
|
+
end
|
25
|
+
|
26
|
+
# Aliases for DSL
|
27
|
+
def ok(*args)
|
28
|
+
ParserResult.ok(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def fail(*args)
|
32
|
+
ParserResult.fail(*args)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative "combinators"
|
2
|
+
|
3
|
+
class Parser
|
4
|
+
include Combinators
|
5
|
+
|
6
|
+
attr_reader :parser
|
7
|
+
def initialize(&block)
|
8
|
+
raise "Invalid argument, must provide a block" unless block_given?
|
9
|
+
@parser = block
|
10
|
+
end
|
11
|
+
|
12
|
+
def run(input)
|
13
|
+
parser.call(input)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'grammar'
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class ParserResult
|
2
|
+
attr_reader :success, :remaining, :matched
|
3
|
+
def initialize(success, remaining, matched)
|
4
|
+
@success = success
|
5
|
+
@remaining = remaining
|
6
|
+
@matched = matched
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.ok(matched:, remaining:)
|
10
|
+
ParserResult.new(true, remaining, matched)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.fail(remaining)
|
14
|
+
ParserResult.new(false, remaining, "")
|
15
|
+
end
|
16
|
+
|
17
|
+
def ok?
|
18
|
+
success
|
19
|
+
end
|
20
|
+
|
21
|
+
def fail?
|
22
|
+
success == false
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(other)
|
26
|
+
return other.instance_of?(self.class) && other.success == success && other.remaining == remaining && other.matched == matched
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
def assert_parses(parser, with:, remaining:, matched: nil, should_fail: false)
|
2
|
+
result = parser.run(with)
|
3
|
+
assert_equal !should_fail, result.success
|
4
|
+
assert_equal remaining, result.remaining
|
5
|
+
assert_equal matched, result.matched unless matched.nil?
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_parser(parser, with:, should_fail: false)
|
9
|
+
assert_equal !should_fail, parser.run(with).success
|
10
|
+
end
|
11
|
+
|
12
|
+
# Require everything in `/lib`
|
13
|
+
Dir[File.join(File.dirname(__FILE__), '../lib/**/*.rb')].each { |f| require f }
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
describe Grammar do
|
6
|
+
describe "Built-in combinators" do
|
7
|
+
it "matches eof" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:foo) { eof }
|
10
|
+
start(:foo)
|
11
|
+
end
|
12
|
+
|
13
|
+
assert_parses parser, with: "", remaining: ""
|
14
|
+
assert_parses parser, with: "asd", remaining: "asd", should_fail: true
|
15
|
+
end
|
16
|
+
|
17
|
+
it "matches empty" do
|
18
|
+
parser = Grammar.build do
|
19
|
+
rule(:foo) { empty }
|
20
|
+
start(:foo)
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_parses parser, with: "asd", remaining: "asd"
|
24
|
+
assert_parses parser, with: "", remaining: ""
|
25
|
+
end
|
26
|
+
|
27
|
+
it "matches whitespace" do
|
28
|
+
parser = Grammar.build do
|
29
|
+
rule(:foo) { whitespace }
|
30
|
+
start(:foo)
|
31
|
+
end
|
32
|
+
|
33
|
+
assert_parses parser, with: " asd", remaining: "asd"
|
34
|
+
assert_parses parser, with: "", remaining: ""
|
35
|
+
end
|
36
|
+
|
37
|
+
it "must parse one" do
|
38
|
+
parser = Grammar.build do
|
39
|
+
rule(:one) { one "a" }
|
40
|
+
start(:one)
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_parses parser, with: "abc", remaining: "bc"
|
44
|
+
end
|
45
|
+
|
46
|
+
it "must parse str" do
|
47
|
+
parser = Grammar.build do
|
48
|
+
rule(:foo) { str "foo" }
|
49
|
+
start(:foo)
|
50
|
+
end
|
51
|
+
|
52
|
+
assert_parses parser, with: "foo", remaining: ""
|
53
|
+
assert_parses parser, with: "foobar", remaining: "bar"
|
54
|
+
assert_parses parser, with: "fobar", remaining: "fobar", should_fail: true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
it "can make rules by hand" do
|
59
|
+
parser = Grammar.build do
|
60
|
+
rule(:foo) { Parser.new { |input| input == "foo" ? ok(matched: "foo", remaining: "") : fail(input) } }
|
61
|
+
start(:foo)
|
62
|
+
end
|
63
|
+
|
64
|
+
assert_parses parser, with: "foo", remaining: ""
|
65
|
+
end
|
66
|
+
|
67
|
+
it "matching rules by hand is the same as satisfy" do
|
68
|
+
parser = Grammar.build do
|
69
|
+
rule(:foo) { satisfy { |input| input == "foo" ? ok(matched: "foo", remaining: "") : fail(input) } }
|
70
|
+
start(:foo)
|
71
|
+
end
|
72
|
+
|
73
|
+
assert_parses parser, with: "foo", remaining: ""
|
74
|
+
end
|
75
|
+
|
76
|
+
it "matches anyLetter" do
|
77
|
+
parser = Grammar.build do
|
78
|
+
rule(:any) { anyLetter }
|
79
|
+
start(:any)
|
80
|
+
end
|
81
|
+
|
82
|
+
assert_parses parser, with: "abzx", remaining: "bzx"
|
83
|
+
assert_parses parser, with: "Znasd", remaining: "nasd"
|
84
|
+
end
|
85
|
+
|
86
|
+
it "matches anyNumber" do
|
87
|
+
parser = Grammar.build do
|
88
|
+
rule(:any) { anyNumber }
|
89
|
+
start(:any)
|
90
|
+
end
|
91
|
+
|
92
|
+
assert_parses parser, with: "12asd3", remaining: "2asd3"
|
93
|
+
assert_parses parser, with: "32asd", remaining: "2asd"
|
94
|
+
end
|
95
|
+
|
96
|
+
it "matches many1" do
|
97
|
+
parser = Grammar.build do
|
98
|
+
rule(:word) { many1 { anyLetter } }
|
99
|
+
start(:word)
|
100
|
+
end
|
101
|
+
|
102
|
+
assert_parses parser, with: "asd123", remaining: "123"
|
103
|
+
end
|
104
|
+
|
105
|
+
it "matches many0" do
|
106
|
+
parser = Grammar.build do
|
107
|
+
rule(:word) { many0 { anyLetter } }
|
108
|
+
start(:word)
|
109
|
+
end
|
110
|
+
|
111
|
+
assert_parses parser, with: "", remaining: ""
|
112
|
+
assert_parses parser, with: "abcde", remaining: ""
|
113
|
+
end
|
114
|
+
|
115
|
+
it "matches using seq" do
|
116
|
+
parser = Grammar.build do
|
117
|
+
rule(:letter) { many1 { anyLetter } }
|
118
|
+
rule(:number) { many0 { anyNumber } }
|
119
|
+
rule(:letterOrNumber) { seq rule(:letter), rule(:number), lambda { |letter, number| [letter, number] } }
|
120
|
+
start(:letterOrNumber)
|
121
|
+
end
|
122
|
+
|
123
|
+
assert_equal ["w", "8"], parser.run("w8")
|
124
|
+
|
125
|
+
parser = Grammar.build do
|
126
|
+
rule(:letter) { many1 { anyLetter } }
|
127
|
+
rule(:letterOrNumber) { seq rule(:letter), anyNumber, lambda { |letter, number| [letter, number] } }
|
128
|
+
start(:letterOrNumber)
|
129
|
+
end
|
130
|
+
|
131
|
+
assert_equal ["w", "8"], parser.run("w8")
|
132
|
+
end
|
133
|
+
|
134
|
+
it "uses regex" do
|
135
|
+
parser = Grammar.build do
|
136
|
+
rule(:foo) { regex /foo/ }
|
137
|
+
start(:foo)
|
138
|
+
end
|
139
|
+
|
140
|
+
assert_parses parser, with: "foo", remaining: ""
|
141
|
+
end
|
142
|
+
|
143
|
+
it "matches between" do
|
144
|
+
parser = Grammar.build do
|
145
|
+
rule(:quote) { one '"' }
|
146
|
+
rule(:foo) { match (many1 { anyLetter }), between: [rule(:quote), rule(:quote)] }
|
147
|
+
end
|
148
|
+
|
149
|
+
assert_parses parser, with: '"hi"', remaining: ''
|
150
|
+
end
|
151
|
+
|
152
|
+
it "matches anyChar" do
|
153
|
+
parser = Grammar.build do
|
154
|
+
rule(:foo) { anyChar ['a', 'b'] }
|
155
|
+
start(:foo)
|
156
|
+
end
|
157
|
+
|
158
|
+
assert_parses parser, with: "asd", remaining: "sd"
|
159
|
+
assert_parses parser, with: "bsd", remaining: "sd"
|
160
|
+
assert_parses parser, with: "c", remaining: "c", should_fail: true
|
161
|
+
end
|
162
|
+
|
163
|
+
it "matches anyCharBut" do
|
164
|
+
parser = Grammar.build do
|
165
|
+
rule(:foo) { anyCharBut ['a', 'b'] }
|
166
|
+
start(:foo)
|
167
|
+
end
|
168
|
+
|
169
|
+
assert_parses parser, with: "c", remaining: ""
|
170
|
+
assert_parses parser, with: "d", remaining: ""
|
171
|
+
assert_parses parser, with: "a", remaining: "a", should_fail: true
|
172
|
+
assert_parses parser, with: "b", remaining: "b", should_fail: true
|
173
|
+
end
|
174
|
+
|
175
|
+
it "matches exactly n times" do
|
176
|
+
parser = Grammar.build do
|
177
|
+
rule(:foo) { exactly(4) { anyLetter } }
|
178
|
+
start(:foo)
|
179
|
+
end
|
180
|
+
|
181
|
+
assert_parses parser, with: "abcde", remaining: "e"
|
182
|
+
assert_parses parser, with: "abcd", remaining: ""
|
183
|
+
assert_parses parser, with: "a", remaining: "a", should_fail: true
|
184
|
+
assert_parses parser, with: "abc", remaining: "abc", should_fail: true
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
describe Grammar do
|
6
|
+
describe "|" do
|
7
|
+
it "works with a single branch" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:letter) { many1 { anyLetter } }
|
10
|
+
rule(:number) { many0 { anyNumber } }
|
11
|
+
rule(:letterOrNumber) { rule(:letter) | rule(:number) }
|
12
|
+
start(:letterOrNumber)
|
13
|
+
end
|
14
|
+
|
15
|
+
assert_parses parser, with: "n", remaining: "", matched: "n"
|
16
|
+
assert_parses parser, with: "6", remaining: "", matched: "6"
|
17
|
+
assert_parses parser, with: "", remaining: "", matched: ""
|
18
|
+
end
|
19
|
+
|
20
|
+
it "works with multiple branches" do
|
21
|
+
parser = Grammar.build do
|
22
|
+
rule(:letter) { many1 { anyLetter } }
|
23
|
+
rule(:number) { many1 { anyNumber } }
|
24
|
+
rule(:letterOrNumber) { rule(:letter) | rule(:number) | eof }
|
25
|
+
start(:letterOrNumber)
|
26
|
+
end
|
27
|
+
|
28
|
+
assert_parses parser, with: "n", remaining: "", matched: "n"
|
29
|
+
assert_parses parser, with: "6", remaining: "", matched: "6"
|
30
|
+
assert_parses parser, with: "", remaining: "", matched: ""
|
31
|
+
end
|
32
|
+
|
33
|
+
it "works with satisfy" do
|
34
|
+
parser = Grammar.build do
|
35
|
+
rule(:letter) { many1 { anyLetter } }
|
36
|
+
rule(:letterOr1) { rule(:letter) | (satisfy { |input| input == "1" ? ok(matched: "1", remaining: "") : fail(input) }) }
|
37
|
+
start(:letterOr1)
|
38
|
+
end
|
39
|
+
|
40
|
+
assert_parses parser, with: "n", remaining: ""
|
41
|
+
assert_parses parser, with: "1", remaining: ""
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe ">>" do
|
46
|
+
it "works with a single branch" do
|
47
|
+
parser = Grammar.build do
|
48
|
+
rule(:letter) { many1 { anyLetter } }
|
49
|
+
rule(:number) { many0 { anyNumber } }
|
50
|
+
rule(:letterAndNumber) { rule(:letter) >> rule(:number) }
|
51
|
+
start(:letterAndNumber)
|
52
|
+
end
|
53
|
+
|
54
|
+
assert_parses parser, with: "foo123", remaining: "", matched: "foo123"
|
55
|
+
end
|
56
|
+
|
57
|
+
it "works with multiple branches" do
|
58
|
+
parser = Grammar.build do
|
59
|
+
rule(:letter) { many1 { anyLetter } }
|
60
|
+
rule(:number) { many0 { anyNumber } }
|
61
|
+
rule(:foo) { rule(:letter) >> rule(:number) >> rule(:letter) }
|
62
|
+
start(:foo)
|
63
|
+
end
|
64
|
+
|
65
|
+
assert_parses parser, with: "foo123asd", remaining: "", matched: "foo123asd"
|
66
|
+
assert_parses parser, with: "foo123", remaining: "foo123", should_fail: true
|
67
|
+
end
|
68
|
+
|
69
|
+
it "works with rules and satisfies" do
|
70
|
+
parser = Grammar.build do
|
71
|
+
rule(:letter) { many1 { anyLetter } }
|
72
|
+
rule(:letterAndNumber) { rule(:letter) >> many0 { anyNumber } }
|
73
|
+
start(:letterAndNumber)
|
74
|
+
end
|
75
|
+
|
76
|
+
assert_parses parser, with: "foo123", remaining: ""
|
77
|
+
assert_parses parser, with: "foo", remaining: ""
|
78
|
+
assert_parses parser, with: "123a", remaining: "123a", should_fail: true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
describe ">" do
|
83
|
+
it "works with a single branch" do
|
84
|
+
parser = Grammar.build do
|
85
|
+
rule(:letter) { many1 { anyLetter } }
|
86
|
+
rule(:foo) { (rule :letter) > whitespace }
|
87
|
+
start(:foo)
|
88
|
+
end
|
89
|
+
|
90
|
+
assert_parses parser, with: "foo ", remaining: "", matched: "foo "
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "<" do
|
95
|
+
it "works with a single branch" do
|
96
|
+
parser = Grammar.build do
|
97
|
+
rule(:letter) { many1 { anyLetter } }
|
98
|
+
rule(:foo) { whitespace < (rule :letter) }
|
99
|
+
start(:foo)
|
100
|
+
end
|
101
|
+
|
102
|
+
assert_parses parser, with: " foo", remaining: "", matched: " foo"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
describe ">=" do
|
107
|
+
it "works with a single branch" do
|
108
|
+
parser = Grammar.build do
|
109
|
+
rule(:letter) { many1 { anyLetter } }
|
110
|
+
rule(:foo) { (rule :letter) >= whitespace }
|
111
|
+
start(:foo)
|
112
|
+
end
|
113
|
+
|
114
|
+
assert_parses parser, with: "foo ", remaining: "", matched: "foo"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "<=" do
|
119
|
+
it "matches second, first is ignored but consumed" do
|
120
|
+
parser = Grammar.build do
|
121
|
+
rule(:letter) { many1 { anyLetter } }
|
122
|
+
rule(:foo) { whitespace <= (rule :letter) }
|
123
|
+
start(:foo)
|
124
|
+
end
|
125
|
+
|
126
|
+
assert_parses parser, with: " foo", remaining: "", matched: "foo"
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
# Build a grammar parsing JSON.
|
6
|
+
describe Grammar do
|
7
|
+
it "parses JSON" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
# =======================================================================
|
10
|
+
# Here `>` means right hand size is optional. `<` means left size is
|
11
|
+
# optional.
|
12
|
+
# You can think of `>` and `<` as an open duck mouth, the duck eats the
|
13
|
+
# mandatory part, ignores the other. #PrimarySchoolHacks
|
14
|
+
#
|
15
|
+
# `>>` means "and then" and `|` means "or else try this".
|
16
|
+
#
|
17
|
+
# Something similar happens with `>=` and `<=`, see `README.md` for more
|
18
|
+
# info on binary combinators.
|
19
|
+
# =======================================================================
|
20
|
+
|
21
|
+
# Simple stuff
|
22
|
+
rule(:bopen) { (one "{") > whitespace }
|
23
|
+
rule(:bclose) { whitespace < (one "}") }
|
24
|
+
rule(:semicolon) { whitespace < (one ":") > whitespace }
|
25
|
+
rule(:comma) { whitespace < (one ",") > whitespace }
|
26
|
+
rule(:quote) { one '"' }
|
27
|
+
rule(:true) { str "true" }
|
28
|
+
rule(:false) { str "false" }
|
29
|
+
rule(:null) { str "null" }
|
30
|
+
|
31
|
+
# string
|
32
|
+
rule(:hexdigit) { anyChar %w[0 1 2 3 4 5 6 7 8 9 a b c d e f] }
|
33
|
+
rule(:hexdigits) { (one "u") >> (exactly(4) { (rule :hexdigit) }) }
|
34
|
+
rule(:any_escaped_char) { (one "\\") >> ((anyChar %w[" \\ / b f n r t]) | (rule :hexdigits)) }
|
35
|
+
rule(:any_unescaped_char) { (anyCharBut %w[" \\]) }
|
36
|
+
rule(:string_char) { (rule :any_unescaped_char) | (rule :any_escaped_char) }
|
37
|
+
rule(:string) { match (many0 { (rule :string_char) }), between: [(rule :quote), (rule :quote)] }
|
38
|
+
|
39
|
+
# number
|
40
|
+
rule(:decimal) { (one '.') >> many1 { anyNumber } }
|
41
|
+
rule(:cientific) { (anyChar %w[e E]) >> (anyChar %w[+ -]) >> many1 { anyNumber } }
|
42
|
+
rule(:decimal_or_cientific) { (rule :decimal) > (rule :cientific) }
|
43
|
+
rule(:positive_number) { ((one "0") | many1 { anyNumber }) > (rule :decimal_or_cientific) }
|
44
|
+
rule(:number) { (one "-") < (rule :positive_number) }
|
45
|
+
|
46
|
+
# array
|
47
|
+
rule(:array_body) { (rule :value_group) | empty }
|
48
|
+
rule(:array) { match (rule :array_body), between: [(one "["), (one "]")] }
|
49
|
+
|
50
|
+
# other stuff
|
51
|
+
rule(:value_group) { ((rule :value) >> (rule :comma) >> (rule :value_group)) | (rule :value) }
|
52
|
+
rule(:value) { (rule :string) | (rule :number) | (rule :object) | (rule :array) | (rule :true) | (rule :false) | (rule :null) }
|
53
|
+
|
54
|
+
rule(:pair) { (rule :string) >> (rule :semicolon) >> (rule :value) }
|
55
|
+
rule(:pair_group) { ((rule :pair) >> (rule :comma) >> (rule :pair_group)) | (rule :pair) }
|
56
|
+
rule(:pair_body) { (rule :pair_group) | empty }
|
57
|
+
rule(:object) { match (rule :pair_body), between: [(rule :bopen), (rule :bclose)] }
|
58
|
+
|
59
|
+
# The last rule is always the starting rule, but let's make things clear
|
60
|
+
start(:object)
|
61
|
+
end
|
62
|
+
|
63
|
+
test_parser parser, with: '{}'
|
64
|
+
test_parser parser, with: '{ "foo": 123 }'
|
65
|
+
test_parser parser, with: '{ "foo": 0.321 }'
|
66
|
+
test_parser parser, with: '{ "foo": 1.5 }'
|
67
|
+
test_parser parser, with: '{ "foo": 1.5e-5 }'
|
68
|
+
test_parser parser, with: '{ "foo": false,"b\\nar" : true }'
|
69
|
+
test_parser parser, with: '{ "foo": { "bar": "baz\\u1235" } }'
|
70
|
+
test_parser parser, with: '{ "foo": [] }'
|
71
|
+
test_parser parser, with: '{ "foo": [1] }'
|
72
|
+
test_parser parser, with: '{ "foo": [1, 2, 3, 4] }'
|
73
|
+
# Some error cases
|
74
|
+
test_parser parser, with: '{ "foo": { "bar": "baz\\u125" } }', should_fail: true
|
75
|
+
test_parser parser, with: '{ "foo": [1, 2, 3, 4,] }', should_fail: true
|
76
|
+
test_parser parser, with: '{ "foo": 123, }', should_fail: true
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "pry"
|
3
|
+
require_relative "spec_helpers"
|
4
|
+
|
5
|
+
# Build a grammar parsing JSON.
|
6
|
+
describe Grammar do
|
7
|
+
it "step 1" do
|
8
|
+
parser = Grammar.build do
|
9
|
+
rule(:assign) { many1 { anyLetter } >> (str " = ") >> anyNumber }
|
10
|
+
|
11
|
+
start(:assign)
|
12
|
+
end
|
13
|
+
|
14
|
+
parser.run("foo = 1").ok?.must_equal true
|
15
|
+
parser.run("bar = 3").ok?.must_equal true
|
16
|
+
parser.run("baz = 9").ok?.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "step 2" do
|
20
|
+
parser = Grammar.build do
|
21
|
+
rule(:equals) { whitespace < (one "=") > whitespace }
|
22
|
+
rule(:assign) { many1 { anyLetter } >> (rule :equals) >> anyNumber }
|
23
|
+
|
24
|
+
start(:assign)
|
25
|
+
end
|
26
|
+
|
27
|
+
parser.run("foo = 1").ok?.must_equal true
|
28
|
+
parser.run("bar =3").ok?.must_equal true
|
29
|
+
parser.run("baz= 9").ok?.must_equal true
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parser_combinator_dsl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Federico Ramirez
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: "\n\tThis library provides a DSL which you can use to easily generate
|
14
|
+
parsers in Ruby.\n\n\tAt it's core, it's a parser combinator library, but you don't
|
15
|
+
need to worry about that. You build more complex expression based on simple ones,
|
16
|
+
and match any formal language you want.\n\t"
|
17
|
+
email:
|
18
|
+
executables: []
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- lib/base_parsers.rb
|
23
|
+
- lib/combinators.rb
|
24
|
+
- lib/grammar.rb
|
25
|
+
- lib/parser.rb
|
26
|
+
- lib/parser_combinator_dsl.rb
|
27
|
+
- lib/parser_result.rb
|
28
|
+
- test/spec_helpers.rb
|
29
|
+
- test/test_base_parsers.rb
|
30
|
+
- test/test_combinators.rb
|
31
|
+
- test/test_json_demo.rb
|
32
|
+
- test/test_tutorial.rb
|
33
|
+
homepage: https://github.com/moddx/parser-combinator
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 2.7.6
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: A parser combinator in Ruby, with a pretty DSL
|
57
|
+
test_files: []
|