parsby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,384 @@
1
+ class Parsby
2
+ module Combinators
3
+ extend self
4
+
5
+ module ModuleMethods
6
+ # The only reason to use this over regular def syntax is to get
7
+ # automatic labels. For combinators defined with this, you'll get
8
+ # labels that resemble the corresponding ruby expression.
9
+ def define_combinator(name, wrap: true, &b)
10
+ # Convert block to method. This is necessary not only to convert
11
+ # the proc to something that'll verify arity, but also to get
12
+ # super() in b to work.
13
+ define_method(name, &b)
14
+ m = if defined? instance_method
15
+ instance_method name
16
+ else
17
+ # self is probably main
18
+ method(name).unbind
19
+ end
20
+
21
+ # Lambda used to access private module method from instance method.
22
+ inspectable_labels_lambda = lambda {|x| inspectable_labels(x) }
23
+
24
+ define_method name do |*args, &b2|
25
+ inspected_args = inspectable_labels_lambda.call(args).map(&:inspect)
26
+ label = name.to_s
27
+ label += "(#{inspected_args.join(", ")})" unless inspected_args.empty?
28
+ # Wrap in new parser so we don't overwrite another automatic
29
+ # label.
30
+ p = m.bind(self).call(*args, &b2)
31
+ if wrap
32
+ Parsby.new(label) {|c| p.parse c }
33
+ else
34
+ p % label
35
+ end
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ # Returns an object whose #inspect representation is exactly as given
42
+ # in the argument string.
43
+ def inspectable_as(s)
44
+ Object.new.tap do |obj|
45
+ obj.define_singleton_method :inspect do
46
+ s
47
+ end
48
+ end
49
+ end
50
+
51
+ # Deeply traverses arrays and hashes changing each Parsby object to
52
+ # another object that returns their label on #inspect. The point of
53
+ # this is to be able to inspect the result and get something
54
+ # resembling the original combinator expression. Instead of writing
55
+ # this method, I could also just have redefined #inspect on Parsby to
56
+ # return the label, but I like ruby's default #inspect in general.
57
+ def inspectable_labels(arg)
58
+ case arg
59
+ when Parsby
60
+ inspectable_as arg.label
61
+ when Array # for methods like group() that accept arguments spliced or not
62
+ arg.map(&method(:inspectable_labels))
63
+ when Hash # for key arguments
64
+ arg.map {|k, v| [k, inspectable_labels(v)] }.to_h
65
+ else
66
+ arg
67
+ end
68
+ end
69
+
70
+ def included(base)
71
+ base.extend ModuleMethods
72
+ end
73
+ end
74
+
75
+ extend ModuleMethods
76
+
77
+ # Parses the string as literally provided.
78
+ define_combinator :lit, wrap: false do |e, case_sensitive: true|
79
+ Parsby.new(e.inspect) { |c|
80
+ a = c.bio.read e.length
81
+ if case_sensitive ? a == e : a.to_s.downcase == e.downcase
82
+ a
83
+ else
84
+ raise ExpectationFailed.new c
85
+ end
86
+ }
87
+ end
88
+
89
+ define_combinator :ilit do |e|
90
+ lit e, case_sensitive: false
91
+ end
92
+
93
+ # Same as <tt>p * n</tt>
94
+ define_combinator :count do |n, p|
95
+ p * n % "count(#{n}, #{p.label})"
96
+ end
97
+
98
+ # Uses =~ for matching. Only compares one char.
99
+ define_combinator :char_matching, wrap: false do |r|
100
+ Parsby.new r.inspect do |c|
101
+ char = any_char.parse c
102
+ unless char =~ r
103
+ raise ExpectationFailed.new c
104
+ end
105
+ char
106
+ end
107
+ end
108
+
109
+ # Parses a decimal number as matched by \d+.
110
+ define_combinator :decimal do
111
+ many_1(decimal_digit).fmap {|ds| ds.join.to_i } % token("number")
112
+ end
113
+
114
+ # This is taken from the Json subparser for numbers.
115
+ define_combinator :decimal_fraction do
116
+ sign = lit("-") | lit("+")
117
+ group(
118
+ optional(sign),
119
+ decimal,
120
+ optional(group(
121
+ lit("."),
122
+ decimal,
123
+ )),
124
+ optional(group(
125
+ ilit("e"),
126
+ optional(sign),
127
+ decimal,
128
+ )),
129
+ ).fmap do |(sign, whole, (_, fractional), (_, exponent_sign, exponent))|
130
+ n = whole
131
+ n += fractional.to_f / 10 ** fractional.to_s.length if fractional
132
+ n *= -1 if sign == "-"
133
+ if exponent
134
+ e = exponent
135
+ e *= -1 if exponent_sign == "-"
136
+ n *= 10 ** e
137
+ end
138
+ n
139
+ end
140
+ end
141
+
142
+ # Parses single digit in range 0-9. Returns string, not number.
143
+ define_combinator :decimal_digit do
144
+ char_matching /[0-9]/
145
+ end
146
+
147
+ # Parses single hex digit. Optional argument lettercase can be one of
148
+ # :insensitive, :upper, or :lower.
149
+ define_combinator :hex_digit do |lettercase = :insensitive|
150
+ decimal_digit | case lettercase
151
+ when :insensitive
152
+ char_matching /[a-fA-F]/
153
+ when :upper
154
+ char_matching /[A-F]/
155
+ when :lower
156
+ char_matching /[a-f]/
157
+ else
158
+ raise ArgumentError.new(
159
+ "#{lettercase.inspect}: unrecognized; argument should be one of " \
160
+ ":insensitive, :upper, or :lower"
161
+ )
162
+ end
163
+ end
164
+
165
+ # Parser that always fails without consuming input. We use it for at
166
+ # least <tt>choice</tt>, for when it's supplied an empty list. It
167
+ # corresponds with mzero in Haskell's Parsec.
168
+ define_combinator :unparseable, wrap: false do
169
+ Parsby.new {|c| raise ExpectationFailed.new c }
170
+ end
171
+
172
+ # Tries each provided parser until one succeeds. Providing an empty
173
+ # list causes parser to always fail, like how [].any? is false.
174
+ define_combinator :choice, wrap: false do |*ps|
175
+ ps = ps.flatten
176
+
177
+ splicer.start do |m|
178
+ ps.reduce(unparseable) do |a, p|
179
+ a | m.end(p)
180
+ end
181
+ end
182
+ end
183
+
184
+ def splicer
185
+ Parsby::Splicer
186
+ end
187
+
188
+ # Parses a single char from those contained in the string argument.
189
+ define_combinator :char_in do |s|
190
+ ~splicer.start do
191
+ Parsby.new do |c|
192
+ char = any_char.parse c
193
+ unless s.chars.include? char
194
+ raise ExpectationFailed.new c
195
+ end
196
+ char
197
+ end
198
+ end
199
+ end
200
+
201
+ # Parses string of 0 or more continuous whitespace characters (" ",
202
+ # "\t", "\n", "\r")
203
+ define_combinator :whitespace do
204
+ whitespace_1 | pure("")
205
+ end
206
+
207
+ alias_method :ws, :whitespace
208
+
209
+ # Parses string of 1 or more continuous whitespace characters (" ",
210
+ # "\t", "\n", "\r")
211
+ define_combinator :whitespace_1 do
212
+ ~splicer.start { join(many_1(char_in(" \t\n\r"))) }
213
+ end
214
+
215
+ alias_method :ws_1, :whitespace_1
216
+
217
+ # Expects p to be surrounded by optional whitespace.
218
+ define_combinator :spaced do |p|
219
+ ~splicer.start {|m| ws > m.end(p) < ws }
220
+ end
221
+
222
+ # Convinient substitute of <tt>left > p < right</tt> for when
223
+ # <tt>p</tt> is large to write.
224
+ define_combinator :between do |left, right, p|
225
+ left > p < right
226
+ end
227
+
228
+ # Turns parser into one that doesn't consume input.
229
+ define_combinator :peek, wrap: false do |p|
230
+ Parsby.new {|c| p.peek c }
231
+ end
232
+
233
+ # Parser that returns provided value without consuming any input.
234
+ define_combinator :pure, wrap: false do |x|
235
+ Parsby.new { x }
236
+ end
237
+
238
+ # Delays construction of parser until parsing-time. This allows one to
239
+ # construct recursive parsers, which would otherwise result in a
240
+ # stack-overflow in construction-time.
241
+ define_combinator :lazy, wrap: false do |&b|
242
+ # Can't have a better label, because we can't know what the parser is
243
+ # until parsing time.
244
+ Parsby.new {|c| b.call.parse c }
245
+ end
246
+
247
+ # Make a recursive parser. Block shall take an argument and return a
248
+ # parser. The block's argument is the parser it returns.
249
+ #
250
+ # Example:
251
+ #
252
+ # recursive {|p|
253
+ # single(lit("(") > optional(p) < lit(")"))
254
+ # }.parse "((()))"
255
+ # #=> [[[nil]]]
256
+ #
257
+ # This is analogous to Haskell's fix function.
258
+ define_combinator :recursive, wrap: false do |&b|
259
+ p = lazy { b.call p }
260
+ end
261
+
262
+ # Similar to Enumerable's #reduce. Takes parser as argument, passes the
263
+ # parsing result to the block, parses using result of block, the result
264
+ # of the parse is passed again to the block, and so on until the
265
+ # returned parser fails. Returns the last result before failure.
266
+ #
267
+ # The only way for this parser to fail is if the initial parser passed
268
+ # as argument fails.
269
+ #
270
+ # This combinator is meant to make shift-reduce parsers for LR
271
+ # grammars.
272
+ define_combinator :reduce, wrap: false do |init, &b|
273
+ init.then do |accum|
274
+ Parsby.new do |c|
275
+ begin
276
+ accum = b.call(accum).parse(c) while true
277
+ rescue ExpectationFailed
278
+ accum
279
+ end
280
+ end
281
+ end
282
+ end
283
+
284
+ define_combinator :fmap do |p, &b|
285
+ p.fmap(&b)
286
+ end
287
+
288
+ # Results in empty array without consuming input. This is meant to be
289
+ # used to start off use of <<.
290
+ #
291
+ # Example:
292
+ #
293
+ # (empty << string("foo") << string("bar")).parse "foobar"
294
+ # => ["foo", "bar"]
295
+ define_combinator :empty do
296
+ pure []
297
+ end
298
+
299
+ # Groups results into an array.
300
+ define_combinator :group do |*ps|
301
+ ps = ps.flatten
302
+ ~splicer.start do |m|
303
+ ps.reduce(empty) do |a, p|
304
+ a << m.end(p)
305
+ end
306
+ end
307
+ end
308
+
309
+ # Wraps result in a list. This is to be able to do
310
+ #
311
+ # single(...) + many(...)
312
+ define_combinator :single do |p|
313
+ p.fmap {|x| [x]}
314
+ end
315
+
316
+ # Runs parser until it fails and returns an array of the results. Because
317
+ # it can return an empty array, this parser can never fail.
318
+ define_combinator :many, wrap: false do |p|
319
+ Parsby.new do |c|
320
+ rs = []
321
+ while true
322
+ break if c.bio.eof?
323
+ begin
324
+ rs << p.parse(c)
325
+ rescue Error
326
+ break
327
+ end
328
+ end
329
+ rs
330
+ end
331
+ end
332
+
333
+ # Same as many, but fails if it can't match even once.
334
+ define_combinator :many_1 do |p|
335
+ single(p) + many(p)
336
+ end
337
+
338
+ # Like many, but accepts another parser for separators. It returns a list
339
+ # of the results of the first argument. Returns an empty list if it
340
+ # didn't match even once, so it never fails.
341
+ define_combinator :sep_by do |s, p|
342
+ sep_by_1(s, p) | empty
343
+ end
344
+
345
+ # Like sep_by, but fails if it can't match even once.
346
+ define_combinator :sep_by_1 do |s, p|
347
+ single(p) + many(s > p)
348
+ end
349
+
350
+ # Join the Array result of p.
351
+ define_combinator :join do |p|
352
+ p.fmap(&:join)
353
+ end
354
+
355
+ # Tries the given parser and returns nil if it fails.
356
+ define_combinator :optional do |p|
357
+ p | pure(nil)
358
+ end
359
+
360
+ # Parses any char. Only fails on EOF.
361
+ define_combinator :any_char, wrap: false do
362
+ Parsby.new do |c|
363
+ if c.bio.eof?
364
+ raise ExpectationFailed.new c
365
+ end
366
+ c.bio.read 1
367
+ end
368
+ end
369
+
370
+ # Matches EOF, fails otherwise. Returns nil.
371
+ define_combinator :eof, wrap: false do
372
+ Parsby.new :eof do |c|
373
+ unless c.bio.eof?
374
+ raise ExpectationFailed.new c
375
+ end
376
+ end
377
+ end
378
+
379
+ # Makes a token with the given name.
380
+ def token(name)
381
+ Parsby::Token.new name
382
+ end
383
+ end
384
+ end
@@ -0,0 +1,96 @@
1
+ require "parsby"
2
+
3
+ module Parsby::Example
4
+ module ArithmeticParser
5
+ include Parsby::Combinators
6
+ extend self
7
+
8
+ def parse(io)
9
+ expr.parse io
10
+ end
11
+
12
+ def self.define_binary_op(name, op)
13
+ define_combinator name do |left_subexpr, right_subexpr|
14
+ group(left_subexpr, spaced(ilit(op)), right_subexpr)
15
+ end
16
+ end
17
+
18
+ define_binary_op :add_op, "+"
19
+ define_binary_op :sub_op, "-"
20
+ define_binary_op :mul_op, "*"
21
+ define_binary_op :div_op, "/"
22
+ define_binary_op :exp_op, "^"
23
+
24
+ def self.define_unary_op(name, op)
25
+ define_combinator name do |subexpr|
26
+ group(ilit(op), ws > subexpr)
27
+ end
28
+ end
29
+
30
+ define_unary_op :neg_op, "-"
31
+ define_unary_op :pos_op, "+"
32
+
33
+ # hpe - higher precedence level
34
+ # spe - same precedence level
35
+
36
+ def right_associative_binary_precedence_level(hpe, operators)
37
+ recursive do |spe|
38
+ choice(
39
+ *operators.map do |op|
40
+ send(op, hpe, spe)
41
+ end,
42
+ hpe,
43
+ )
44
+ end
45
+ end
46
+
47
+ def left_associative_binary_precedence_level(hpe, operators)
48
+ reduce hpe do |left_expr|
49
+ choice(
50
+ *operators.map do |op|
51
+ send(op, pure(left_expr), hpe)
52
+ end
53
+ )
54
+ end
55
+ end
56
+
57
+ def unary_precedence_level(hpe, operators)
58
+ recursive do |spe|
59
+ choice(
60
+ *operators.map do |op|
61
+ send(op, spe)
62
+ end,
63
+ hpe,
64
+ )
65
+ end
66
+ end
67
+
68
+ define_combinator :expr do
69
+ lazy do
70
+ e = choice(
71
+ decimal_fraction,
72
+ between(lit("("), lit(")"), expr),
73
+ )
74
+
75
+ e = right_associative_binary_precedence_level(e, [
76
+ :exp_op,
77
+ ])
78
+
79
+ e = unary_precedence_level(e, [
80
+ :neg_op,
81
+ :pos_op,
82
+ ])
83
+
84
+ e = left_associative_binary_precedence_level(e, [
85
+ :mul_op,
86
+ :div_op,
87
+ ])
88
+
89
+ e = left_associative_binary_precedence_level(e, [
90
+ :add_op,
91
+ :sub_op,
92
+ ])
93
+ end
94
+ end
95
+ end
96
+ end