parsby 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +41 -0
- data/README.md +607 -0
- data/Rakefile +6 -0
- data/bin/all-methods +35 -0
- data/bin/console +40 -0
- data/bin/methods-with-pending-documentation +49 -0
- data/bin/setup +8 -0
- data/bin/tested-methods +47 -0
- data/bin/vestigial-methods +30 -0
- data/lib/parsby.rb +804 -0
- data/lib/parsby/combinators.rb +384 -0
- data/lib/parsby/example/arithmetic_parser.rb +96 -0
- data/lib/parsby/example/csv_parser.rb +41 -0
- data/lib/parsby/example/json_parser.rb +92 -0
- data/lib/parsby/example/lisp_parser.rb +135 -0
- data/lib/parsby/version.rb +3 -0
- data/parsby.gemspec +42 -0
- metadata +121 -0
@@ -0,0 +1,384 @@
|
|
1
|
+
class Parsby
|
2
|
+
module Combinators
|
3
|
+
extend self
|
4
|
+
|
5
|
+
module ModuleMethods
|
6
|
+
# The only reason to use this over regular def syntax is to get
|
7
|
+
# automatic labels. For combinators defined with this, you'll get
|
8
|
+
# labels that resemble the corresponding ruby expression.
|
9
|
+
def define_combinator(name, wrap: true, &b)
|
10
|
+
# Convert block to method. This is necessary not only to convert
|
11
|
+
# the proc to something that'll verify arity, but also to get
|
12
|
+
# super() in b to work.
|
13
|
+
define_method(name, &b)
|
14
|
+
m = if defined? instance_method
|
15
|
+
instance_method name
|
16
|
+
else
|
17
|
+
# self is probably main
|
18
|
+
method(name).unbind
|
19
|
+
end
|
20
|
+
|
21
|
+
# Lambda used to access private module method from instance method.
|
22
|
+
inspectable_labels_lambda = lambda {|x| inspectable_labels(x) }
|
23
|
+
|
24
|
+
define_method name do |*args, &b2|
|
25
|
+
inspected_args = inspectable_labels_lambda.call(args).map(&:inspect)
|
26
|
+
label = name.to_s
|
27
|
+
label += "(#{inspected_args.join(", ")})" unless inspected_args.empty?
|
28
|
+
# Wrap in new parser so we don't overwrite another automatic
|
29
|
+
# label.
|
30
|
+
p = m.bind(self).call(*args, &b2)
|
31
|
+
if wrap
|
32
|
+
Parsby.new(label) {|c| p.parse c }
|
33
|
+
else
|
34
|
+
p % label
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# Returns an object whose #inspect representation is exactly as given
|
42
|
+
# in the argument string.
|
43
|
+
def inspectable_as(s)
|
44
|
+
Object.new.tap do |obj|
|
45
|
+
obj.define_singleton_method :inspect do
|
46
|
+
s
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Deeply traverses arrays and hashes changing each Parsby object to
|
52
|
+
# another object that returns their label on #inspect. The point of
|
53
|
+
# this is to be able to inspect the result and get something
|
54
|
+
# resembling the original combinator expression. Instead of writing
|
55
|
+
# this method, I could also just have redefined #inspect on Parsby to
|
56
|
+
# return the label, but I like ruby's default #inspect in general.
|
57
|
+
def inspectable_labels(arg)
|
58
|
+
case arg
|
59
|
+
when Parsby
|
60
|
+
inspectable_as arg.label
|
61
|
+
when Array # for methods like group() that accept arguments spliced or not
|
62
|
+
arg.map(&method(:inspectable_labels))
|
63
|
+
when Hash # for key arguments
|
64
|
+
arg.map {|k, v| [k, inspectable_labels(v)] }.to_h
|
65
|
+
else
|
66
|
+
arg
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def included(base)
|
71
|
+
base.extend ModuleMethods
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
extend ModuleMethods
|
76
|
+
|
77
|
+
# Parses the string as literally provided.
|
78
|
+
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
79
|
+
Parsby.new(e.inspect) { |c|
|
80
|
+
a = c.bio.read e.length
|
81
|
+
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
82
|
+
a
|
83
|
+
else
|
84
|
+
raise ExpectationFailed.new c
|
85
|
+
end
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
define_combinator :ilit do |e|
|
90
|
+
lit e, case_sensitive: false
|
91
|
+
end
|
92
|
+
|
93
|
+
# Same as <tt>p * n</tt>
|
94
|
+
define_combinator :count do |n, p|
|
95
|
+
p * n % "count(#{n}, #{p.label})"
|
96
|
+
end
|
97
|
+
|
98
|
+
# Uses =~ for matching. Only compares one char.
|
99
|
+
define_combinator :char_matching, wrap: false do |r|
|
100
|
+
Parsby.new r.inspect do |c|
|
101
|
+
char = any_char.parse c
|
102
|
+
unless char =~ r
|
103
|
+
raise ExpectationFailed.new c
|
104
|
+
end
|
105
|
+
char
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Parses a decimal number as matched by \d+.
|
110
|
+
define_combinator :decimal do
|
111
|
+
many_1(decimal_digit).fmap {|ds| ds.join.to_i } % token("number")
|
112
|
+
end
|
113
|
+
|
114
|
+
# This is taken from the Json subparser for numbers.
|
115
|
+
define_combinator :decimal_fraction do
|
116
|
+
sign = lit("-") | lit("+")
|
117
|
+
group(
|
118
|
+
optional(sign),
|
119
|
+
decimal,
|
120
|
+
optional(group(
|
121
|
+
lit("."),
|
122
|
+
decimal,
|
123
|
+
)),
|
124
|
+
optional(group(
|
125
|
+
ilit("e"),
|
126
|
+
optional(sign),
|
127
|
+
decimal,
|
128
|
+
)),
|
129
|
+
).fmap do |(sign, whole, (_, fractional), (_, exponent_sign, exponent))|
|
130
|
+
n = whole
|
131
|
+
n += fractional.to_f / 10 ** fractional.to_s.length if fractional
|
132
|
+
n *= -1 if sign == "-"
|
133
|
+
if exponent
|
134
|
+
e = exponent
|
135
|
+
e *= -1 if exponent_sign == "-"
|
136
|
+
n *= 10 ** e
|
137
|
+
end
|
138
|
+
n
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Parses single digit in range 0-9. Returns string, not number.
|
143
|
+
define_combinator :decimal_digit do
|
144
|
+
char_matching /[0-9]/
|
145
|
+
end
|
146
|
+
|
147
|
+
# Parses single hex digit. Optional argument lettercase can be one of
|
148
|
+
# :insensitive, :upper, or :lower.
|
149
|
+
define_combinator :hex_digit do |lettercase = :insensitive|
|
150
|
+
decimal_digit | case lettercase
|
151
|
+
when :insensitive
|
152
|
+
char_matching /[a-fA-F]/
|
153
|
+
when :upper
|
154
|
+
char_matching /[A-F]/
|
155
|
+
when :lower
|
156
|
+
char_matching /[a-f]/
|
157
|
+
else
|
158
|
+
raise ArgumentError.new(
|
159
|
+
"#{lettercase.inspect}: unrecognized; argument should be one of " \
|
160
|
+
":insensitive, :upper, or :lower"
|
161
|
+
)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Parser that always fails without consuming input. We use it for at
|
166
|
+
# least <tt>choice</tt>, for when it's supplied an empty list. It
|
167
|
+
# corresponds with mzero in Haskell's Parsec.
|
168
|
+
define_combinator :unparseable, wrap: false do
|
169
|
+
Parsby.new {|c| raise ExpectationFailed.new c }
|
170
|
+
end
|
171
|
+
|
172
|
+
# Tries each provided parser until one succeeds. Providing an empty
|
173
|
+
# list causes parser to always fail, like how [].any? is false.
|
174
|
+
define_combinator :choice, wrap: false do |*ps|
|
175
|
+
ps = ps.flatten
|
176
|
+
|
177
|
+
splicer.start do |m|
|
178
|
+
ps.reduce(unparseable) do |a, p|
|
179
|
+
a | m.end(p)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def splicer
|
185
|
+
Parsby::Splicer
|
186
|
+
end
|
187
|
+
|
188
|
+
# Parses a single char from those contained in the string argument.
|
189
|
+
define_combinator :char_in do |s|
|
190
|
+
~splicer.start do
|
191
|
+
Parsby.new do |c|
|
192
|
+
char = any_char.parse c
|
193
|
+
unless s.chars.include? char
|
194
|
+
raise ExpectationFailed.new c
|
195
|
+
end
|
196
|
+
char
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Parses string of 0 or more continuous whitespace characters (" ",
|
202
|
+
# "\t", "\n", "\r")
|
203
|
+
define_combinator :whitespace do
|
204
|
+
whitespace_1 | pure("")
|
205
|
+
end
|
206
|
+
|
207
|
+
alias_method :ws, :whitespace
|
208
|
+
|
209
|
+
# Parses string of 1 or more continuous whitespace characters (" ",
|
210
|
+
# "\t", "\n", "\r")
|
211
|
+
define_combinator :whitespace_1 do
|
212
|
+
~splicer.start { join(many_1(char_in(" \t\n\r"))) }
|
213
|
+
end
|
214
|
+
|
215
|
+
alias_method :ws_1, :whitespace_1
|
216
|
+
|
217
|
+
# Expects p to be surrounded by optional whitespace.
|
218
|
+
define_combinator :spaced do |p|
|
219
|
+
~splicer.start {|m| ws > m.end(p) < ws }
|
220
|
+
end
|
221
|
+
|
222
|
+
# Convinient substitute of <tt>left > p < right</tt> for when
|
223
|
+
# <tt>p</tt> is large to write.
|
224
|
+
define_combinator :between do |left, right, p|
|
225
|
+
left > p < right
|
226
|
+
end
|
227
|
+
|
228
|
+
# Turns parser into one that doesn't consume input.
|
229
|
+
define_combinator :peek, wrap: false do |p|
|
230
|
+
Parsby.new {|c| p.peek c }
|
231
|
+
end
|
232
|
+
|
233
|
+
# Parser that returns provided value without consuming any input.
|
234
|
+
define_combinator :pure, wrap: false do |x|
|
235
|
+
Parsby.new { x }
|
236
|
+
end
|
237
|
+
|
238
|
+
# Delays construction of parser until parsing-time. This allows one to
|
239
|
+
# construct recursive parsers, which would otherwise result in a
|
240
|
+
# stack-overflow in construction-time.
|
241
|
+
define_combinator :lazy, wrap: false do |&b|
|
242
|
+
# Can't have a better label, because we can't know what the parser is
|
243
|
+
# until parsing time.
|
244
|
+
Parsby.new {|c| b.call.parse c }
|
245
|
+
end
|
246
|
+
|
247
|
+
# Make a recursive parser. Block shall take an argument and return a
|
248
|
+
# parser. The block's argument is the parser it returns.
|
249
|
+
#
|
250
|
+
# Example:
|
251
|
+
#
|
252
|
+
# recursive {|p|
|
253
|
+
# single(lit("(") > optional(p) < lit(")"))
|
254
|
+
# }.parse "((()))"
|
255
|
+
# #=> [[[nil]]]
|
256
|
+
#
|
257
|
+
# This is analogous to Haskell's fix function.
|
258
|
+
define_combinator :recursive, wrap: false do |&b|
|
259
|
+
p = lazy { b.call p }
|
260
|
+
end
|
261
|
+
|
262
|
+
# Similar to Enumerable's #reduce. Takes parser as argument, passes the
|
263
|
+
# parsing result to the block, parses using result of block, the result
|
264
|
+
# of the parse is passed again to the block, and so on until the
|
265
|
+
# returned parser fails. Returns the last result before failure.
|
266
|
+
#
|
267
|
+
# The only way for this parser to fail is if the initial parser passed
|
268
|
+
# as argument fails.
|
269
|
+
#
|
270
|
+
# This combinator is meant to make shift-reduce parsers for LR
|
271
|
+
# grammars.
|
272
|
+
define_combinator :reduce, wrap: false do |init, &b|
|
273
|
+
init.then do |accum|
|
274
|
+
Parsby.new do |c|
|
275
|
+
begin
|
276
|
+
accum = b.call(accum).parse(c) while true
|
277
|
+
rescue ExpectationFailed
|
278
|
+
accum
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
define_combinator :fmap do |p, &b|
|
285
|
+
p.fmap(&b)
|
286
|
+
end
|
287
|
+
|
288
|
+
# Results in empty array without consuming input. This is meant to be
|
289
|
+
# used to start off use of <<.
|
290
|
+
#
|
291
|
+
# Example:
|
292
|
+
#
|
293
|
+
# (empty << string("foo") << string("bar")).parse "foobar"
|
294
|
+
# => ["foo", "bar"]
|
295
|
+
define_combinator :empty do
|
296
|
+
pure []
|
297
|
+
end
|
298
|
+
|
299
|
+
# Groups results into an array.
|
300
|
+
define_combinator :group do |*ps|
|
301
|
+
ps = ps.flatten
|
302
|
+
~splicer.start do |m|
|
303
|
+
ps.reduce(empty) do |a, p|
|
304
|
+
a << m.end(p)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# Wraps result in a list. This is to be able to do
|
310
|
+
#
|
311
|
+
# single(...) + many(...)
|
312
|
+
define_combinator :single do |p|
|
313
|
+
p.fmap {|x| [x]}
|
314
|
+
end
|
315
|
+
|
316
|
+
# Runs parser until it fails and returns an array of the results. Because
|
317
|
+
# it can return an empty array, this parser can never fail.
|
318
|
+
define_combinator :many, wrap: false do |p|
|
319
|
+
Parsby.new do |c|
|
320
|
+
rs = []
|
321
|
+
while true
|
322
|
+
break if c.bio.eof?
|
323
|
+
begin
|
324
|
+
rs << p.parse(c)
|
325
|
+
rescue Error
|
326
|
+
break
|
327
|
+
end
|
328
|
+
end
|
329
|
+
rs
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# Same as many, but fails if it can't match even once.
|
334
|
+
define_combinator :many_1 do |p|
|
335
|
+
single(p) + many(p)
|
336
|
+
end
|
337
|
+
|
338
|
+
# Like many, but accepts another parser for separators. It returns a list
|
339
|
+
# of the results of the first argument. Returns an empty list if it
|
340
|
+
# didn't match even once, so it never fails.
|
341
|
+
define_combinator :sep_by do |s, p|
|
342
|
+
sep_by_1(s, p) | empty
|
343
|
+
end
|
344
|
+
|
345
|
+
# Like sep_by, but fails if it can't match even once.
|
346
|
+
define_combinator :sep_by_1 do |s, p|
|
347
|
+
single(p) + many(s > p)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Join the Array result of p.
|
351
|
+
define_combinator :join do |p|
|
352
|
+
p.fmap(&:join)
|
353
|
+
end
|
354
|
+
|
355
|
+
# Tries the given parser and returns nil if it fails.
|
356
|
+
define_combinator :optional do |p|
|
357
|
+
p | pure(nil)
|
358
|
+
end
|
359
|
+
|
360
|
+
# Parses any char. Only fails on EOF.
|
361
|
+
define_combinator :any_char, wrap: false do
|
362
|
+
Parsby.new do |c|
|
363
|
+
if c.bio.eof?
|
364
|
+
raise ExpectationFailed.new c
|
365
|
+
end
|
366
|
+
c.bio.read 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# Matches EOF, fails otherwise. Returns nil.
|
371
|
+
define_combinator :eof, wrap: false do
|
372
|
+
Parsby.new :eof do |c|
|
373
|
+
unless c.bio.eof?
|
374
|
+
raise ExpectationFailed.new c
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# Makes a token with the given name.
|
380
|
+
def token(name)
|
381
|
+
Parsby::Token.new name
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require "parsby"
|
2
|
+
|
3
|
+
module Parsby::Example
|
4
|
+
module ArithmeticParser
|
5
|
+
include Parsby::Combinators
|
6
|
+
extend self
|
7
|
+
|
8
|
+
def parse(io)
|
9
|
+
expr.parse io
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.define_binary_op(name, op)
|
13
|
+
define_combinator name do |left_subexpr, right_subexpr|
|
14
|
+
group(left_subexpr, spaced(ilit(op)), right_subexpr)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
define_binary_op :add_op, "+"
|
19
|
+
define_binary_op :sub_op, "-"
|
20
|
+
define_binary_op :mul_op, "*"
|
21
|
+
define_binary_op :div_op, "/"
|
22
|
+
define_binary_op :exp_op, "^"
|
23
|
+
|
24
|
+
def self.define_unary_op(name, op)
|
25
|
+
define_combinator name do |subexpr|
|
26
|
+
group(ilit(op), ws > subexpr)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
define_unary_op :neg_op, "-"
|
31
|
+
define_unary_op :pos_op, "+"
|
32
|
+
|
33
|
+
# hpe - higher precedence level
|
34
|
+
# spe - same precedence level
|
35
|
+
|
36
|
+
def right_associative_binary_precedence_level(hpe, operators)
|
37
|
+
recursive do |spe|
|
38
|
+
choice(
|
39
|
+
*operators.map do |op|
|
40
|
+
send(op, hpe, spe)
|
41
|
+
end,
|
42
|
+
hpe,
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def left_associative_binary_precedence_level(hpe, operators)
|
48
|
+
reduce hpe do |left_expr|
|
49
|
+
choice(
|
50
|
+
*operators.map do |op|
|
51
|
+
send(op, pure(left_expr), hpe)
|
52
|
+
end
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def unary_precedence_level(hpe, operators)
|
58
|
+
recursive do |spe|
|
59
|
+
choice(
|
60
|
+
*operators.map do |op|
|
61
|
+
send(op, spe)
|
62
|
+
end,
|
63
|
+
hpe,
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
define_combinator :expr do
|
69
|
+
lazy do
|
70
|
+
e = choice(
|
71
|
+
decimal_fraction,
|
72
|
+
between(lit("("), lit(")"), expr),
|
73
|
+
)
|
74
|
+
|
75
|
+
e = right_associative_binary_precedence_level(e, [
|
76
|
+
:exp_op,
|
77
|
+
])
|
78
|
+
|
79
|
+
e = unary_precedence_level(e, [
|
80
|
+
:neg_op,
|
81
|
+
:pos_op,
|
82
|
+
])
|
83
|
+
|
84
|
+
e = left_associative_binary_precedence_level(e, [
|
85
|
+
:mul_op,
|
86
|
+
:div_op,
|
87
|
+
])
|
88
|
+
|
89
|
+
e = left_associative_binary_precedence_level(e, [
|
90
|
+
:add_op,
|
91
|
+
:sub_op,
|
92
|
+
])
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|