parsby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,384 @@
1
+ class Parsby
2
+ module Combinators
3
+ extend self
4
+
5
+ module ModuleMethods
6
+ # The only reason to use this over regular def syntax is to get
7
+ # automatic labels. For combinators defined with this, you'll get
8
+ # labels that resemble the corresponding ruby expression.
9
+ def define_combinator(name, wrap: true, &b)
10
+ # Convert block to method. This is necessary not only to convert
11
+ # the proc to something that'll verify arity, but also to get
12
+ # super() in b to work.
13
+ define_method(name, &b)
14
+ m = if defined? instance_method
15
+ instance_method name
16
+ else
17
+ # self is probably main
18
+ method(name).unbind
19
+ end
20
+
21
+ # Lambda used to access private module method from instance method.
22
+ inspectable_labels_lambda = lambda {|x| inspectable_labels(x) }
23
+
24
+ define_method name do |*args, &b2|
25
+ inspected_args = inspectable_labels_lambda.call(args).map(&:inspect)
26
+ label = name.to_s
27
+ label += "(#{inspected_args.join(", ")})" unless inspected_args.empty?
28
+ # Wrap in new parser so we don't overwrite another automatic
29
+ # label.
30
+ p = m.bind(self).call(*args, &b2)
31
+ if wrap
32
+ Parsby.new(label) {|c| p.parse c }
33
+ else
34
+ p % label
35
+ end
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ # Returns an object whose #inspect representation is exactly as given
42
+ # in the argument string.
43
+ def inspectable_as(s)
44
+ Object.new.tap do |obj|
45
+ obj.define_singleton_method :inspect do
46
+ s
47
+ end
48
+ end
49
+ end
50
+
51
+ # Deeply traverses arrays and hashes changing each Parsby object to
52
+ # another object that returns their label on #inspect. The point of
53
+ # this is to be able to inspect the result and get something
54
+ # resembling the original combinator expression. Instead of writing
55
+ # this method, I could also just have redefined #inspect on Parsby to
56
+ # return the label, but I like ruby's default #inspect in general.
57
+ def inspectable_labels(arg)
58
+ case arg
59
+ when Parsby
60
+ inspectable_as arg.label
61
+ when Array # for methods like group() that accept arguments spliced or not
62
+ arg.map(&method(:inspectable_labels))
63
+ when Hash # for key arguments
64
+ arg.map {|k, v| [k, inspectable_labels(v)] }.to_h
65
+ else
66
+ arg
67
+ end
68
+ end
69
+
70
+ def included(base)
71
+ base.extend ModuleMethods
72
+ end
73
+ end
74
+
75
+ extend ModuleMethods
76
+
77
+ # Parses the string as literally provided.
78
+ define_combinator :lit, wrap: false do |e, case_sensitive: true|
79
+ Parsby.new(e.inspect) { |c|
80
+ a = c.bio.read e.length
81
+ if case_sensitive ? a == e : a.to_s.downcase == e.downcase
82
+ a
83
+ else
84
+ raise ExpectationFailed.new c
85
+ end
86
+ }
87
+ end
88
+
89
+ define_combinator :ilit do |e|
90
+ lit e, case_sensitive: false
91
+ end
92
+
93
+ # Same as <tt>p * n</tt>
94
+ define_combinator :count do |n, p|
95
+ p * n % "count(#{n}, #{p.label})"
96
+ end
97
+
98
+ # Uses =~ for matching. Only compares one char.
99
+ define_combinator :char_matching, wrap: false do |r|
100
+ Parsby.new r.inspect do |c|
101
+ char = any_char.parse c
102
+ unless char =~ r
103
+ raise ExpectationFailed.new c
104
+ end
105
+ char
106
+ end
107
+ end
108
+
109
+ # Parses a decimal number as matched by \d+.
110
+ define_combinator :decimal do
111
+ many_1(decimal_digit).fmap {|ds| ds.join.to_i } % token("number")
112
+ end
113
+
114
+ # This is taken from the Json subparser for numbers.
115
+ define_combinator :decimal_fraction do
116
+ sign = lit("-") | lit("+")
117
+ group(
118
+ optional(sign),
119
+ decimal,
120
+ optional(group(
121
+ lit("."),
122
+ decimal,
123
+ )),
124
+ optional(group(
125
+ ilit("e"),
126
+ optional(sign),
127
+ decimal,
128
+ )),
129
+ ).fmap do |(sign, whole, (_, fractional), (_, exponent_sign, exponent))|
130
+ n = whole
131
+ n += fractional.to_f / 10 ** fractional.to_s.length if fractional
132
+ n *= -1 if sign == "-"
133
+ if exponent
134
+ e = exponent
135
+ e *= -1 if exponent_sign == "-"
136
+ n *= 10 ** e
137
+ end
138
+ n
139
+ end
140
+ end
141
+
142
+ # Parses single digit in range 0-9. Returns string, not number.
143
+ define_combinator :decimal_digit do
144
+ char_matching /[0-9]/
145
+ end
146
+
147
+ # Parses single hex digit. Optional argument lettercase can be one of
148
+ # :insensitive, :upper, or :lower.
149
+ define_combinator :hex_digit do |lettercase = :insensitive|
150
+ decimal_digit | case lettercase
151
+ when :insensitive
152
+ char_matching /[a-fA-F]/
153
+ when :upper
154
+ char_matching /[A-F]/
155
+ when :lower
156
+ char_matching /[a-f]/
157
+ else
158
+ raise ArgumentError.new(
159
+ "#{lettercase.inspect}: unrecognized; argument should be one of " \
160
+ ":insensitive, :upper, or :lower"
161
+ )
162
+ end
163
+ end
164
+
165
+ # Parser that always fails without consuming input. We use it for at
166
+ # least <tt>choice</tt>, for when it's supplied an empty list. It
167
+ # corresponds with mzero in Haskell's Parsec.
168
+ define_combinator :unparseable, wrap: false do
169
+ Parsby.new {|c| raise ExpectationFailed.new c }
170
+ end
171
+
172
+ # Tries each provided parser until one succeeds. Providing an empty
173
+ # list causes parser to always fail, like how [].any? is false.
174
+ define_combinator :choice, wrap: false do |*ps|
175
+ ps = ps.flatten
176
+
177
+ splicer.start do |m|
178
+ ps.reduce(unparseable) do |a, p|
179
+ a | m.end(p)
180
+ end
181
+ end
182
+ end
183
+
184
+ def splicer
185
+ Parsby::Splicer
186
+ end
187
+
188
+ # Parses a single char from those contained in the string argument.
189
+ define_combinator :char_in do |s|
190
+ ~splicer.start do
191
+ Parsby.new do |c|
192
+ char = any_char.parse c
193
+ unless s.chars.include? char
194
+ raise ExpectationFailed.new c
195
+ end
196
+ char
197
+ end
198
+ end
199
+ end
200
+
201
+ # Parses string of 0 or more continuous whitespace characters (" ",
202
+ # "\t", "\n", "\r")
203
+ define_combinator :whitespace do
204
+ whitespace_1 | pure("")
205
+ end
206
+
207
+ alias_method :ws, :whitespace
208
+
209
+ # Parses string of 1 or more continuous whitespace characters (" ",
210
+ # "\t", "\n", "\r")
211
+ define_combinator :whitespace_1 do
212
+ ~splicer.start { join(many_1(char_in(" \t\n\r"))) }
213
+ end
214
+
215
+ alias_method :ws_1, :whitespace_1
216
+
217
+ # Expects p to be surrounded by optional whitespace.
218
+ define_combinator :spaced do |p|
219
+ ~splicer.start {|m| ws > m.end(p) < ws }
220
+ end
221
+
222
+ # Convinient substitute of <tt>left > p < right</tt> for when
223
+ # <tt>p</tt> is large to write.
224
+ define_combinator :between do |left, right, p|
225
+ left > p < right
226
+ end
227
+
228
+ # Turns parser into one that doesn't consume input.
229
+ define_combinator :peek, wrap: false do |p|
230
+ Parsby.new {|c| p.peek c }
231
+ end
232
+
233
+ # Parser that returns provided value without consuming any input.
234
+ define_combinator :pure, wrap: false do |x|
235
+ Parsby.new { x }
236
+ end
237
+
238
+ # Delays construction of parser until parsing-time. This allows one to
239
+ # construct recursive parsers, which would otherwise result in a
240
+ # stack-overflow in construction-time.
241
+ define_combinator :lazy, wrap: false do |&b|
242
+ # Can't have a better label, because we can't know what the parser is
243
+ # until parsing time.
244
+ Parsby.new {|c| b.call.parse c }
245
+ end
246
+
247
+ # Make a recursive parser. Block shall take an argument and return a
248
+ # parser. The block's argument is the parser it returns.
249
+ #
250
+ # Example:
251
+ #
252
+ # recursive {|p|
253
+ # single(lit("(") > optional(p) < lit(")"))
254
+ # }.parse "((()))"
255
+ # #=> [[[nil]]]
256
+ #
257
+ # This is analogous to Haskell's fix function.
258
+ define_combinator :recursive, wrap: false do |&b|
259
+ p = lazy { b.call p }
260
+ end
261
+
262
+ # Similar to Enumerable's #reduce. Takes parser as argument, passes the
263
+ # parsing result to the block, parses using result of block, the result
264
+ # of the parse is passed again to the block, and so on until the
265
+ # returned parser fails. Returns the last result before failure.
266
+ #
267
+ # The only way for this parser to fail is if the initial parser passed
268
+ # as argument fails.
269
+ #
270
+ # This combinator is meant to make shift-reduce parsers for LR
271
+ # grammars.
272
+ define_combinator :reduce, wrap: false do |init, &b|
273
+ init.then do |accum|
274
+ Parsby.new do |c|
275
+ begin
276
+ accum = b.call(accum).parse(c) while true
277
+ rescue ExpectationFailed
278
+ accum
279
+ end
280
+ end
281
+ end
282
+ end
283
+
284
+ define_combinator :fmap do |p, &b|
285
+ p.fmap(&b)
286
+ end
287
+
288
+ # Results in empty array without consuming input. This is meant to be
289
+ # used to start off use of <<.
290
+ #
291
+ # Example:
292
+ #
293
+ # (empty << string("foo") << string("bar")).parse "foobar"
294
+ # => ["foo", "bar"]
295
+ define_combinator :empty do
296
+ pure []
297
+ end
298
+
299
+ # Groups results into an array.
300
+ define_combinator :group do |*ps|
301
+ ps = ps.flatten
302
+ ~splicer.start do |m|
303
+ ps.reduce(empty) do |a, p|
304
+ a << m.end(p)
305
+ end
306
+ end
307
+ end
308
+
309
+ # Wraps result in a list. This is to be able to do
310
+ #
311
+ # single(...) + many(...)
312
+ define_combinator :single do |p|
313
+ p.fmap {|x| [x]}
314
+ end
315
+
316
+ # Runs parser until it fails and returns an array of the results. Because
317
+ # it can return an empty array, this parser can never fail.
318
+ define_combinator :many, wrap: false do |p|
319
+ Parsby.new do |c|
320
+ rs = []
321
+ while true
322
+ break if c.bio.eof?
323
+ begin
324
+ rs << p.parse(c)
325
+ rescue Error
326
+ break
327
+ end
328
+ end
329
+ rs
330
+ end
331
+ end
332
+
333
+ # Same as many, but fails if it can't match even once.
334
+ define_combinator :many_1 do |p|
335
+ single(p) + many(p)
336
+ end
337
+
338
+ # Like many, but accepts another parser for separators. It returns a list
339
+ # of the results of the first argument. Returns an empty list if it
340
+ # didn't match even once, so it never fails.
341
+ define_combinator :sep_by do |s, p|
342
+ sep_by_1(s, p) | empty
343
+ end
344
+
345
+ # Like sep_by, but fails if it can't match even once.
346
+ define_combinator :sep_by_1 do |s, p|
347
+ single(p) + many(s > p)
348
+ end
349
+
350
+ # Join the Array result of p.
351
+ define_combinator :join do |p|
352
+ p.fmap(&:join)
353
+ end
354
+
355
+ # Tries the given parser and returns nil if it fails.
356
+ define_combinator :optional do |p|
357
+ p | pure(nil)
358
+ end
359
+
360
+ # Parses any char. Only fails on EOF.
361
+ define_combinator :any_char, wrap: false do
362
+ Parsby.new do |c|
363
+ if c.bio.eof?
364
+ raise ExpectationFailed.new c
365
+ end
366
+ c.bio.read 1
367
+ end
368
+ end
369
+
370
+ # Matches EOF, fails otherwise. Returns nil.
371
+ define_combinator :eof, wrap: false do
372
+ Parsby.new :eof do |c|
373
+ unless c.bio.eof?
374
+ raise ExpectationFailed.new c
375
+ end
376
+ end
377
+ end
378
+
379
+ # Makes a token with the given name.
380
+ def token(name)
381
+ Parsby::Token.new name
382
+ end
383
+ end
384
+ end
@@ -0,0 +1,96 @@
1
+ require "parsby"
2
+
3
+ module Parsby::Example
4
+ module ArithmeticParser
5
+ include Parsby::Combinators
6
+ extend self
7
+
8
+ def parse(io)
9
+ expr.parse io
10
+ end
11
+
12
+ def self.define_binary_op(name, op)
13
+ define_combinator name do |left_subexpr, right_subexpr|
14
+ group(left_subexpr, spaced(ilit(op)), right_subexpr)
15
+ end
16
+ end
17
+
18
+ define_binary_op :add_op, "+"
19
+ define_binary_op :sub_op, "-"
20
+ define_binary_op :mul_op, "*"
21
+ define_binary_op :div_op, "/"
22
+ define_binary_op :exp_op, "^"
23
+
24
+ def self.define_unary_op(name, op)
25
+ define_combinator name do |subexpr|
26
+ group(ilit(op), ws > subexpr)
27
+ end
28
+ end
29
+
30
+ define_unary_op :neg_op, "-"
31
+ define_unary_op :pos_op, "+"
32
+
33
+ # hpe - higher precedence level
34
+ # spe - same precedence level
35
+
36
+ def right_associative_binary_precedence_level(hpe, operators)
37
+ recursive do |spe|
38
+ choice(
39
+ *operators.map do |op|
40
+ send(op, hpe, spe)
41
+ end,
42
+ hpe,
43
+ )
44
+ end
45
+ end
46
+
47
+ def left_associative_binary_precedence_level(hpe, operators)
48
+ reduce hpe do |left_expr|
49
+ choice(
50
+ *operators.map do |op|
51
+ send(op, pure(left_expr), hpe)
52
+ end
53
+ )
54
+ end
55
+ end
56
+
57
+ def unary_precedence_level(hpe, operators)
58
+ recursive do |spe|
59
+ choice(
60
+ *operators.map do |op|
61
+ send(op, spe)
62
+ end,
63
+ hpe,
64
+ )
65
+ end
66
+ end
67
+
68
+ define_combinator :expr do
69
+ lazy do
70
+ e = choice(
71
+ decimal_fraction,
72
+ between(lit("("), lit(")"), expr),
73
+ )
74
+
75
+ e = right_associative_binary_precedence_level(e, [
76
+ :exp_op,
77
+ ])
78
+
79
+ e = unary_precedence_level(e, [
80
+ :neg_op,
81
+ :pos_op,
82
+ ])
83
+
84
+ e = left_associative_binary_precedence_level(e, [
85
+ :mul_op,
86
+ :div_op,
87
+ ])
88
+
89
+ e = left_associative_binary_precedence_level(e, [
90
+ :add_op,
91
+ :sub_op,
92
+ ])
93
+ end
94
+ end
95
+ end
96
+ end