parsby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +41 -0
- data/README.md +607 -0
- data/Rakefile +6 -0
- data/bin/all-methods +35 -0
- data/bin/console +40 -0
- data/bin/methods-with-pending-documentation +49 -0
- data/bin/setup +8 -0
- data/bin/tested-methods +47 -0
- data/bin/vestigial-methods +30 -0
- data/lib/parsby.rb +804 -0
- data/lib/parsby/combinators.rb +384 -0
- data/lib/parsby/example/arithmetic_parser.rb +96 -0
- data/lib/parsby/example/csv_parser.rb +41 -0
- data/lib/parsby/example/json_parser.rb +92 -0
- data/lib/parsby/example/lisp_parser.rb +135 -0
- data/lib/parsby/version.rb +3 -0
- data/parsby.gemspec +42 -0
- metadata +121 -0
@@ -0,0 +1,384 @@
|
|
1
|
+
class Parsby
|
2
|
+
module Combinators
|
3
|
+
extend self
|
4
|
+
|
5
|
+
module ModuleMethods
|
6
|
+
# The only reason to use this over regular def syntax is to get
|
7
|
+
# automatic labels. For combinators defined with this, you'll get
|
8
|
+
# labels that resemble the corresponding ruby expression.
|
9
|
+
def define_combinator(name, wrap: true, &b)
|
10
|
+
# Convert block to method. This is necessary not only to convert
|
11
|
+
# the proc to something that'll verify arity, but also to get
|
12
|
+
# super() in b to work.
|
13
|
+
define_method(name, &b)
|
14
|
+
m = if defined? instance_method
|
15
|
+
instance_method name
|
16
|
+
else
|
17
|
+
# self is probably main
|
18
|
+
method(name).unbind
|
19
|
+
end
|
20
|
+
|
21
|
+
# Lambda used to access private module method from instance method.
|
22
|
+
inspectable_labels_lambda = lambda {|x| inspectable_labels(x) }
|
23
|
+
|
24
|
+
define_method name do |*args, &b2|
|
25
|
+
inspected_args = inspectable_labels_lambda.call(args).map(&:inspect)
|
26
|
+
label = name.to_s
|
27
|
+
label += "(#{inspected_args.join(", ")})" unless inspected_args.empty?
|
28
|
+
# Wrap in new parser so we don't overwrite another automatic
|
29
|
+
# label.
|
30
|
+
p = m.bind(self).call(*args, &b2)
|
31
|
+
if wrap
|
32
|
+
Parsby.new(label) {|c| p.parse c }
|
33
|
+
else
|
34
|
+
p % label
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# Returns an object whose #inspect representation is exactly as given
|
42
|
+
# in the argument string.
|
43
|
+
def inspectable_as(s)
|
44
|
+
Object.new.tap do |obj|
|
45
|
+
obj.define_singleton_method :inspect do
|
46
|
+
s
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Deeply traverses arrays and hashes changing each Parsby object to
|
52
|
+
# another object that returns their label on #inspect. The point of
|
53
|
+
# this is to be able to inspect the result and get something
|
54
|
+
# resembling the original combinator expression. Instead of writing
|
55
|
+
# this method, I could also just have redefined #inspect on Parsby to
|
56
|
+
# return the label, but I like ruby's default #inspect in general.
|
57
|
+
def inspectable_labels(arg)
|
58
|
+
case arg
|
59
|
+
when Parsby
|
60
|
+
inspectable_as arg.label
|
61
|
+
when Array # for methods like group() that accept arguments spliced or not
|
62
|
+
arg.map(&method(:inspectable_labels))
|
63
|
+
when Hash # for key arguments
|
64
|
+
arg.map {|k, v| [k, inspectable_labels(v)] }.to_h
|
65
|
+
else
|
66
|
+
arg
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def included(base)
|
71
|
+
base.extend ModuleMethods
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
extend ModuleMethods
|
76
|
+
|
77
|
+
# Parses the string as literally provided.
|
78
|
+
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
79
|
+
Parsby.new(e.inspect) { |c|
|
80
|
+
a = c.bio.read e.length
|
81
|
+
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
82
|
+
a
|
83
|
+
else
|
84
|
+
raise ExpectationFailed.new c
|
85
|
+
end
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
define_combinator :ilit do |e|
|
90
|
+
lit e, case_sensitive: false
|
91
|
+
end
|
92
|
+
|
93
|
+
# Same as <tt>p * n</tt>
|
94
|
+
define_combinator :count do |n, p|
|
95
|
+
p * n % "count(#{n}, #{p.label})"
|
96
|
+
end
|
97
|
+
|
98
|
+
# Uses =~ for matching. Only compares one char.
|
99
|
+
define_combinator :char_matching, wrap: false do |r|
|
100
|
+
Parsby.new r.inspect do |c|
|
101
|
+
char = any_char.parse c
|
102
|
+
unless char =~ r
|
103
|
+
raise ExpectationFailed.new c
|
104
|
+
end
|
105
|
+
char
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Parses a decimal number as matched by \d+.
|
110
|
+
define_combinator :decimal do
|
111
|
+
many_1(decimal_digit).fmap {|ds| ds.join.to_i } % token("number")
|
112
|
+
end
|
113
|
+
|
114
|
+
# This is taken from the Json subparser for numbers.
|
115
|
+
define_combinator :decimal_fraction do
|
116
|
+
sign = lit("-") | lit("+")
|
117
|
+
group(
|
118
|
+
optional(sign),
|
119
|
+
decimal,
|
120
|
+
optional(group(
|
121
|
+
lit("."),
|
122
|
+
decimal,
|
123
|
+
)),
|
124
|
+
optional(group(
|
125
|
+
ilit("e"),
|
126
|
+
optional(sign),
|
127
|
+
decimal,
|
128
|
+
)),
|
129
|
+
).fmap do |(sign, whole, (_, fractional), (_, exponent_sign, exponent))|
|
130
|
+
n = whole
|
131
|
+
n += fractional.to_f / 10 ** fractional.to_s.length if fractional
|
132
|
+
n *= -1 if sign == "-"
|
133
|
+
if exponent
|
134
|
+
e = exponent
|
135
|
+
e *= -1 if exponent_sign == "-"
|
136
|
+
n *= 10 ** e
|
137
|
+
end
|
138
|
+
n
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Parses single digit in range 0-9. Returns string, not number.
|
143
|
+
define_combinator :decimal_digit do
|
144
|
+
char_matching /[0-9]/
|
145
|
+
end
|
146
|
+
|
147
|
+
# Parses single hex digit. Optional argument lettercase can be one of
|
148
|
+
# :insensitive, :upper, or :lower.
|
149
|
+
define_combinator :hex_digit do |lettercase = :insensitive|
|
150
|
+
decimal_digit | case lettercase
|
151
|
+
when :insensitive
|
152
|
+
char_matching /[a-fA-F]/
|
153
|
+
when :upper
|
154
|
+
char_matching /[A-F]/
|
155
|
+
when :lower
|
156
|
+
char_matching /[a-f]/
|
157
|
+
else
|
158
|
+
raise ArgumentError.new(
|
159
|
+
"#{lettercase.inspect}: unrecognized; argument should be one of " \
|
160
|
+
":insensitive, :upper, or :lower"
|
161
|
+
)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Parser that always fails without consuming input. We use it for at
|
166
|
+
# least <tt>choice</tt>, for when it's supplied an empty list. It
|
167
|
+
# corresponds with mzero in Haskell's Parsec.
|
168
|
+
define_combinator :unparseable, wrap: false do
|
169
|
+
Parsby.new {|c| raise ExpectationFailed.new c }
|
170
|
+
end
|
171
|
+
|
172
|
+
# Tries each provided parser until one succeeds. Providing an empty
|
173
|
+
# list causes parser to always fail, like how [].any? is false.
|
174
|
+
define_combinator :choice, wrap: false do |*ps|
|
175
|
+
ps = ps.flatten
|
176
|
+
|
177
|
+
splicer.start do |m|
|
178
|
+
ps.reduce(unparseable) do |a, p|
|
179
|
+
a | m.end(p)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def splicer
|
185
|
+
Parsby::Splicer
|
186
|
+
end
|
187
|
+
|
188
|
+
# Parses a single char from those contained in the string argument.
|
189
|
+
define_combinator :char_in do |s|
|
190
|
+
~splicer.start do
|
191
|
+
Parsby.new do |c|
|
192
|
+
char = any_char.parse c
|
193
|
+
unless s.chars.include? char
|
194
|
+
raise ExpectationFailed.new c
|
195
|
+
end
|
196
|
+
char
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Parses string of 0 or more continuous whitespace characters (" ",
|
202
|
+
# "\t", "\n", "\r")
|
203
|
+
define_combinator :whitespace do
|
204
|
+
whitespace_1 | pure("")
|
205
|
+
end
|
206
|
+
|
207
|
+
alias_method :ws, :whitespace
|
208
|
+
|
209
|
+
# Parses string of 1 or more continuous whitespace characters (" ",
|
210
|
+
# "\t", "\n", "\r")
|
211
|
+
define_combinator :whitespace_1 do
|
212
|
+
~splicer.start { join(many_1(char_in(" \t\n\r"))) }
|
213
|
+
end
|
214
|
+
|
215
|
+
alias_method :ws_1, :whitespace_1
|
216
|
+
|
217
|
+
# Expects p to be surrounded by optional whitespace.
|
218
|
+
define_combinator :spaced do |p|
|
219
|
+
~splicer.start {|m| ws > m.end(p) < ws }
|
220
|
+
end
|
221
|
+
|
222
|
+
# Convinient substitute of <tt>left > p < right</tt> for when
|
223
|
+
# <tt>p</tt> is large to write.
|
224
|
+
define_combinator :between do |left, right, p|
|
225
|
+
left > p < right
|
226
|
+
end
|
227
|
+
|
228
|
+
# Turns parser into one that doesn't consume input.
|
229
|
+
define_combinator :peek, wrap: false do |p|
|
230
|
+
Parsby.new {|c| p.peek c }
|
231
|
+
end
|
232
|
+
|
233
|
+
# Parser that returns provided value without consuming any input.
|
234
|
+
define_combinator :pure, wrap: false do |x|
|
235
|
+
Parsby.new { x }
|
236
|
+
end
|
237
|
+
|
238
|
+
# Delays construction of parser until parsing-time. This allows one to
|
239
|
+
# construct recursive parsers, which would otherwise result in a
|
240
|
+
# stack-overflow in construction-time.
|
241
|
+
define_combinator :lazy, wrap: false do |&b|
|
242
|
+
# Can't have a better label, because we can't know what the parser is
|
243
|
+
# until parsing time.
|
244
|
+
Parsby.new {|c| b.call.parse c }
|
245
|
+
end
|
246
|
+
|
247
|
+
# Make a recursive parser. Block shall take an argument and return a
|
248
|
+
# parser. The block's argument is the parser it returns.
|
249
|
+
#
|
250
|
+
# Example:
|
251
|
+
#
|
252
|
+
# recursive {|p|
|
253
|
+
# single(lit("(") > optional(p) < lit(")"))
|
254
|
+
# }.parse "((()))"
|
255
|
+
# #=> [[[nil]]]
|
256
|
+
#
|
257
|
+
# This is analogous to Haskell's fix function.
|
258
|
+
define_combinator :recursive, wrap: false do |&b|
|
259
|
+
p = lazy { b.call p }
|
260
|
+
end
|
261
|
+
|
262
|
+
# Similar to Enumerable's #reduce. Takes parser as argument, passes the
|
263
|
+
# parsing result to the block, parses using result of block, the result
|
264
|
+
# of the parse is passed again to the block, and so on until the
|
265
|
+
# returned parser fails. Returns the last result before failure.
|
266
|
+
#
|
267
|
+
# The only way for this parser to fail is if the initial parser passed
|
268
|
+
# as argument fails.
|
269
|
+
#
|
270
|
+
# This combinator is meant to make shift-reduce parsers for LR
|
271
|
+
# grammars.
|
272
|
+
define_combinator :reduce, wrap: false do |init, &b|
|
273
|
+
init.then do |accum|
|
274
|
+
Parsby.new do |c|
|
275
|
+
begin
|
276
|
+
accum = b.call(accum).parse(c) while true
|
277
|
+
rescue ExpectationFailed
|
278
|
+
accum
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
define_combinator :fmap do |p, &b|
|
285
|
+
p.fmap(&b)
|
286
|
+
end
|
287
|
+
|
288
|
+
# Results in empty array without consuming input. This is meant to be
|
289
|
+
# used to start off use of <<.
|
290
|
+
#
|
291
|
+
# Example:
|
292
|
+
#
|
293
|
+
# (empty << string("foo") << string("bar")).parse "foobar"
|
294
|
+
# => ["foo", "bar"]
|
295
|
+
define_combinator :empty do
|
296
|
+
pure []
|
297
|
+
end
|
298
|
+
|
299
|
+
# Groups results into an array.
|
300
|
+
define_combinator :group do |*ps|
|
301
|
+
ps = ps.flatten
|
302
|
+
~splicer.start do |m|
|
303
|
+
ps.reduce(empty) do |a, p|
|
304
|
+
a << m.end(p)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# Wraps result in a list. This is to be able to do
|
310
|
+
#
|
311
|
+
# single(...) + many(...)
|
312
|
+
define_combinator :single do |p|
|
313
|
+
p.fmap {|x| [x]}
|
314
|
+
end
|
315
|
+
|
316
|
+
# Runs parser until it fails and returns an array of the results. Because
|
317
|
+
# it can return an empty array, this parser can never fail.
|
318
|
+
define_combinator :many, wrap: false do |p|
|
319
|
+
Parsby.new do |c|
|
320
|
+
rs = []
|
321
|
+
while true
|
322
|
+
break if c.bio.eof?
|
323
|
+
begin
|
324
|
+
rs << p.parse(c)
|
325
|
+
rescue Error
|
326
|
+
break
|
327
|
+
end
|
328
|
+
end
|
329
|
+
rs
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# Same as many, but fails if it can't match even once.
|
334
|
+
define_combinator :many_1 do |p|
|
335
|
+
single(p) + many(p)
|
336
|
+
end
|
337
|
+
|
338
|
+
# Like many, but accepts another parser for separators. It returns a list
|
339
|
+
# of the results of the first argument. Returns an empty list if it
|
340
|
+
# didn't match even once, so it never fails.
|
341
|
+
define_combinator :sep_by do |s, p|
|
342
|
+
sep_by_1(s, p) | empty
|
343
|
+
end
|
344
|
+
|
345
|
+
# Like sep_by, but fails if it can't match even once.
|
346
|
+
define_combinator :sep_by_1 do |s, p|
|
347
|
+
single(p) + many(s > p)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Join the Array result of p.
|
351
|
+
define_combinator :join do |p|
|
352
|
+
p.fmap(&:join)
|
353
|
+
end
|
354
|
+
|
355
|
+
# Tries the given parser and returns nil if it fails.
|
356
|
+
define_combinator :optional do |p|
|
357
|
+
p | pure(nil)
|
358
|
+
end
|
359
|
+
|
360
|
+
# Parses any char. Only fails on EOF.
|
361
|
+
define_combinator :any_char, wrap: false do
|
362
|
+
Parsby.new do |c|
|
363
|
+
if c.bio.eof?
|
364
|
+
raise ExpectationFailed.new c
|
365
|
+
end
|
366
|
+
c.bio.read 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# Matches EOF, fails otherwise. Returns nil.
|
371
|
+
define_combinator :eof, wrap: false do
|
372
|
+
Parsby.new :eof do |c|
|
373
|
+
unless c.bio.eof?
|
374
|
+
raise ExpectationFailed.new c
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# Makes a token with the given name.
|
380
|
+
def token(name)
|
381
|
+
Parsby::Token.new name
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require "parsby"
|
2
|
+
|
3
|
+
module Parsby::Example
|
4
|
+
module ArithmeticParser
|
5
|
+
include Parsby::Combinators
|
6
|
+
extend self
|
7
|
+
|
8
|
+
def parse(io)
|
9
|
+
expr.parse io
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.define_binary_op(name, op)
|
13
|
+
define_combinator name do |left_subexpr, right_subexpr|
|
14
|
+
group(left_subexpr, spaced(ilit(op)), right_subexpr)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
define_binary_op :add_op, "+"
|
19
|
+
define_binary_op :sub_op, "-"
|
20
|
+
define_binary_op :mul_op, "*"
|
21
|
+
define_binary_op :div_op, "/"
|
22
|
+
define_binary_op :exp_op, "^"
|
23
|
+
|
24
|
+
def self.define_unary_op(name, op)
|
25
|
+
define_combinator name do |subexpr|
|
26
|
+
group(ilit(op), ws > subexpr)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
define_unary_op :neg_op, "-"
|
31
|
+
define_unary_op :pos_op, "+"
|
32
|
+
|
33
|
+
# hpe - higher precedence level
|
34
|
+
# spe - same precedence level
|
35
|
+
|
36
|
+
def right_associative_binary_precedence_level(hpe, operators)
|
37
|
+
recursive do |spe|
|
38
|
+
choice(
|
39
|
+
*operators.map do |op|
|
40
|
+
send(op, hpe, spe)
|
41
|
+
end,
|
42
|
+
hpe,
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def left_associative_binary_precedence_level(hpe, operators)
|
48
|
+
reduce hpe do |left_expr|
|
49
|
+
choice(
|
50
|
+
*operators.map do |op|
|
51
|
+
send(op, pure(left_expr), hpe)
|
52
|
+
end
|
53
|
+
)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def unary_precedence_level(hpe, operators)
|
58
|
+
recursive do |spe|
|
59
|
+
choice(
|
60
|
+
*operators.map do |op|
|
61
|
+
send(op, spe)
|
62
|
+
end,
|
63
|
+
hpe,
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
define_combinator :expr do
|
69
|
+
lazy do
|
70
|
+
e = choice(
|
71
|
+
decimal_fraction,
|
72
|
+
between(lit("("), lit(")"), expr),
|
73
|
+
)
|
74
|
+
|
75
|
+
e = right_associative_binary_precedence_level(e, [
|
76
|
+
:exp_op,
|
77
|
+
])
|
78
|
+
|
79
|
+
e = unary_precedence_level(e, [
|
80
|
+
:neg_op,
|
81
|
+
:pos_op,
|
82
|
+
])
|
83
|
+
|
84
|
+
e = left_associative_binary_precedence_level(e, [
|
85
|
+
:mul_op,
|
86
|
+
:div_op,
|
87
|
+
])
|
88
|
+
|
89
|
+
e = left_associative_binary_precedence_level(e, [
|
90
|
+
:add_op,
|
91
|
+
:sub_op,
|
92
|
+
])
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|