parsby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,35 @@
1
+ #!/bin/bash
2
+
3
+ # This lists all instance and class methods defined in the project. It's
4
+ # used along with the script tested-methods in a test in projects_spec.rb
5
+ # to ensure test-coverage.
6
+
7
+ grep -REh '^\s*(class|module|def|define_combinator|end)\b' lib | awk '
8
+ function get_indent_level() {
9
+ return match($0, "[^[:blank:]]") - 1
10
+ }
11
+
12
+ /^\s*(module|class)\>/ {
13
+ match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
14
+ indent_level = get_indent_level()
15
+ context[indent_level] = matches[2]
16
+ for (i in context) {
17
+ if (i > indent_level) {
18
+ delete context[i]
19
+ }
20
+ }
21
+ }
22
+
23
+ /^\s*(def|define_combinator)\>/ {
24
+ indent_level = get_indent_level()
25
+ first = 1
26
+ for (i in context) {
27
+ if (i < indent_level) {
28
+ printf(first ? "%s" : "::%s", context[i])
29
+ first = 0
30
+ }
31
+ }
32
+ match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^,([:blank:]]+)/, matches)
33
+ print (matches[2] == "self." ? "." : "#") matches[3]
34
+ }
35
+ ' | grep -v '[#.]included$' | sort
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "parsby"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+
12
+ def to_constant_representation(path)
13
+ path
14
+ .gsub("/", "::")
15
+ .gsub(/(\A|(?<=::)|_)./) {|x| x[-1].upcase }
16
+ .sub(/\.rb\z/, "")
17
+ end
18
+
19
+ def reload!
20
+ Dir["lib/**/*"]
21
+ .map {|p| Pathname.new(p).relative_path_from(Pathname.new("lib/")) }
22
+ .select {|p| p.to_s =~ /\.rb\z/ && p.to_s != "parsby/version.rb" }
23
+ .each do |p|
24
+ const = to_constant_representation(p.to_s)
25
+ # Don't bother removing children constants, since we'll remove the
26
+ # parents.
27
+ unless const =~ /::/ || !Object.const_defined?(const)
28
+ Object.send(:remove_const, const)
29
+ end
30
+ end
31
+ .each(&method(:load)) # Load everything only after having removed everything.
32
+
33
+ include Parsby::Combinators
34
+ extend Parsby::Combinators::ModuleMethods
35
+ nil
36
+ end
37
+
38
+ reload!
39
+
40
+ Pry.start
@@ -0,0 +1,49 @@
1
+ #!/bin/bash
2
+
3
+ # This script lists the methods that aren't immediately preceded by a
4
+ # comment. The output is empty and the exit status is successful if they
5
+ # all have a comment.
6
+ #
7
+ # This is used in a test in project_spec.rb to ensure documentation
8
+ # coverage.
9
+
10
+ find lib -type f -name \*.rb ! -path lib/parsby/example/\* \
11
+ | xargs grep -REh '^\s*(#|(class|module|def|define_combinator|end)\b)' \
12
+ | awk '
13
+ function get_indent_level() {
14
+ return match($0, "[^[:blank:]]") - 1
15
+ }
16
+
17
+ /^\s*(module|class)\>/ {
18
+ match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
19
+ indent_level = get_indent_level()
20
+ context[indent_level] = matches[2]
21
+ for (i in context) {
22
+ if (i > indent_level) {
23
+ delete context[i]
24
+ }
25
+ }
26
+ }
27
+
28
+ /^\s*(def|define_combinator)\>/ && !prev_line_is_comment {
29
+ indent_level = get_indent_level()
30
+ first = 1
31
+ for (i in context) {
32
+ if (i < indent_level) {
33
+ printf(first ? "%s" : "::%s", context[i])
34
+ first = 0
35
+ }
36
+ }
37
+ match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^([:blank:]]+)/, matches)
38
+ print (matches[2] == "self." ? "." : "#") matches[3]
39
+ at_least_one_missing = 1
40
+ }
41
+
42
+ {
43
+ prev_line_is_comment = /^\s*#/
44
+ }
45
+
46
+ END {
47
+ exit at_least_one_missing
48
+ }
49
+ ' | grep -v '[#.]included$'
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+
3
+ # This lists all tested instance and class methods defined in the project.
4
+ # It's used along with the script all-methods in a test in projects_spec.rb
5
+ # to ensure test-coverage.
6
+ #
7
+ # It depends on rspec contexts being done in a specific format, and
8
+ # properly indented. The format can be exemplified by:
9
+ #
10
+ # RSpec.describe Foo do
11
+ # describe Foo::Bar do
12
+ # describe "#foo" do
13
+ # ...
14
+ # end
15
+ # describe ".bar" do
16
+ # ...
17
+ # end
18
+ # end
19
+ # end
20
+
21
+ grep -REh '^\s*(RSpec\.)?describe\(?\s*([A-Z]|"[#.])' spec | awk '
22
+ function get_indent_level() {
23
+ return match($0, "[^[:blank:]]") - 1
24
+ }
25
+
26
+ /^\s*(RSpec\.)?describe\(?\s*[A-Z]/ {
27
+ match($0, /^\s*(RSpec\.)?describe\(?\s*([A-Za-z0-9:_]+)/, matches)
28
+ indent_level = get_indent_level()
29
+ context[indent_level] = matches[2]
30
+ for (i in context) {
31
+ if (i > indent_level) {
32
+ delete context[i]
33
+ }
34
+ }
35
+ }
36
+
37
+ /^\s*(RSpec\.)?describe\(?\s*["'\''][#.]/ {
38
+ indent_level = get_indent_level()
39
+ for (i in context) {
40
+ if (i < indent_level) {
41
+ ctx = context[i]
42
+ }
43
+ }
44
+ match($0, /^\s*(RSpec\.)?describe\(?\s*(["'\''])([#.].+)["'\'']/, matches)
45
+ print ctx matches[3]
46
+ }
47
+ ' | sort
@@ -0,0 +1,30 @@
1
+ #!/bin/bash
2
+
3
+ # Vestigial methods are those that are defined, but not used anywhere else
4
+ # lib/. This is to help us detect methods that we no longer use to
5
+ # determine whether to remove them. If a vestigial method is still useful
6
+ # for users of this library or otherwise, add it to the whitelist below.
7
+
8
+ for n in $(grep -Phro '^[^#]*(def (self\.)?|define_combinator :)\K\w+' lib | sort -u); do
9
+ printf "%s\t%s\n" \
10
+ "$(grep -Fwrn "$n" lib | grep -Ev '#[^{]|\b(def|define_combinator|require)\b' | wc -l)" \
11
+ "$n"
12
+ done | awk '
13
+ function is_ruby_hook() {
14
+ return $2 ~ /\<(included|initialize|message|method_missing)\>/
15
+ }
16
+
17
+ function whitelisted() {
18
+ return is_ruby_hook() \
19
+ || $2 == "define_combinator" \
20
+ || $2 == "parsby" \
21
+ || $2 == "ilit" \
22
+ || $2 == "recursive" \
23
+ || $2 == "decimal_fraction" \
24
+ || $2 == "left_tree_slice" \
25
+ || $2 == "all" \
26
+ ;
27
+ }
28
+
29
+ !$1 && !whitelisted() { print $2 }
30
+ '
@@ -0,0 +1,804 @@
1
+ require "parsby/version"
2
+ require "parsby/combinators"
3
+
4
+ class Parsby
5
+ include Combinators
6
+
7
+ class Error < StandardError; end
8
+
9
+ class PosRange
10
+ attr_accessor :start, :end
11
+
12
+ # PosRanges are constructed with a starting and ending position. We
13
+ # consider the starting position to be inside the range, and the ending
14
+ # position to be outside the range. So, if start is 1 and end is 2,
15
+ # then only position 1 is inside the range. If start is 1 and end is 1,
16
+ # then there is no position inside the range.
17
+ def initialize(pos_start, pos_end)
18
+ @start = pos_start
19
+ @end = pos_end
20
+ end
21
+
22
+ # Length of range.
23
+ def length
24
+ @end - @start
25
+ end
26
+
27
+ # Length of overlap. 0 for non-overlapping ranges.
28
+ def length_in(range)
29
+ (self & range)&.length || 0
30
+ end
31
+
32
+ # Intersection of two ranges. Touching ranges result in a range of
33
+ # length 0.
34
+ def &(range)
35
+ return nil unless overlaps?(range) || touching?(range)
36
+ PosRange.new [@start, range.start].max, [@end, range.end].min
37
+ end
38
+
39
+ # True when the end of one is the beginning of the other.
40
+ def touching?(range)
41
+ range.end == self.start || self.end == range.start
42
+ end
43
+
44
+ # True when one is not completely left of or right of the other.
45
+ # Touching ranges do not overlap, even though they have an intersection
46
+ # range of length 0.
47
+ def overlaps?(range)
48
+ !(completely_left_of?(range) || completely_right_of?(range))
49
+ end
50
+
51
+ def completely_left_of?(range)
52
+ @end <= range.start
53
+ end
54
+
55
+ def completely_right_of?(range)
56
+ range.end <= @start
57
+ end
58
+
59
+ def contains?(pos)
60
+ @start <= pos && pos < @end
61
+ end
62
+
63
+ def starts_inside_of?(range)
64
+ range.contains? @start
65
+ end
66
+
67
+ def ends_inside_of?(range)
68
+ range.contains?(@end) || range.end == @end
69
+ end
70
+
71
+ def completely_inside_of?(range)
72
+ starts_inside_of?(range) && ends_inside_of?(range)
73
+ end
74
+
75
+ def render_in(line_range)
76
+ return "<-" if completely_left_of?(line_range) && !starts_inside_of?(line_range)
77
+ return "->" if completely_right_of? line_range
78
+ indentation = " " * [0, start - line_range.start].max
79
+ r = "-" * length_in(line_range)
80
+ r[0] = "\\" if starts_inside_of? line_range
81
+ r[-1] = "/" if ends_inside_of? line_range
82
+ r[0] = "|" if length_in(line_range) == 0
83
+ r[0] = "V" if length_in(line_range) == 1 && completely_inside_of?(line_range)
84
+ indentation + r
85
+ end
86
+ end
87
+
88
+ class Splicer
89
+ def self.start(label = nil, &b)
90
+ m = new
91
+ p = b.call m
92
+ p % label if label
93
+ m.start p
94
+ end
95
+
96
+ def start(p)
97
+ Parsby.new("splicer.start(#{p.label})") { |c|
98
+ begin
99
+ p.parse c
100
+ ensure
101
+ c.parsed_ranges.splice_to! self
102
+ end
103
+ }
104
+ end
105
+
106
+ def end(p)
107
+ Parsby.new("splicer.end(#{p.label})") { |c|
108
+ begin
109
+ p.parse c
110
+ ensure
111
+ c.parsed_ranges.children[0].markers << self
112
+ end
113
+ }
114
+ end
115
+ end
116
+
117
+ module Tree
118
+ attr_accessor :parent
119
+ attr_reader :markers
120
+ attr_writer :children
121
+
122
+ def markers
123
+ @markers ||= []
124
+ end
125
+
126
+ def splice_to!(marker)
127
+ splice!(*select_paths {|n| n.markers.include? marker })
128
+ end
129
+
130
+ def children
131
+ @children ||= []
132
+ end
133
+
134
+ def <<(*ts)
135
+ ts.each do |t|
136
+ t.parent = self
137
+ children << t
138
+ end
139
+ end
140
+
141
+ def root
142
+ if parent == nil
143
+ self
144
+ else
145
+ parent.root
146
+ end
147
+ end
148
+
149
+ def sibling_reverse_index
150
+ parent&.children&.reverse&.index self
151
+ end
152
+
153
+ def sibling_index
154
+ parent&.children&.index self
155
+ end
156
+
157
+ def flatten
158
+ [self, *children.map(&:flatten).flatten]
159
+ end
160
+
161
+ alias_method :self_and_descendants, :flatten
162
+
163
+ def path
164
+ [*parent&.path, *sibling_index]
165
+ end
166
+
167
+ def each(&b)
168
+ b.call self
169
+ children.each {|c| c.each(&b) }
170
+ self
171
+ end
172
+
173
+ def right_uncles
174
+ if parent
175
+ sibling_reverse_index + parent.right_uncles
176
+ else
177
+ 0
178
+ end
179
+ end
180
+
181
+ def right_tree_slice
182
+ "*" + "|" * right_uncles
183
+ end
184
+
185
+ def dup(currently_descending: false)
186
+ self_path = path
187
+ if parent && !currently_descending
188
+ root.dup.get self_path
189
+ else
190
+ super().tap do |d|
191
+ d.children = d.children.map do |c|
192
+ c.dup(currently_descending: true).tap do |dc|
193
+ dc.parent = d
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def splice_self!
201
+ idx = sibling_index
202
+ parent.children.delete_at(idx)
203
+ parent.children.insert(idx, *children.each {|c| c.parent = parent })
204
+ parent
205
+ end
206
+
207
+ def splice!(*paths)
208
+ self.children = paths
209
+ .map {|p| get(p)&.tap {|d| d.parent = self } }
210
+ .reject(&:nil?)
211
+ self
212
+ end
213
+
214
+ def splice(*paths)
215
+ dup.splice!(*paths)
216
+ end
217
+
218
+ def trim_to_just!(*paths, &rejecting)
219
+ max_sibling = paths.map(&:first).reject(&:nil?).max
220
+ self.children = if max_sibling.nil?
221
+ []
222
+ else
223
+ children[0..max_sibling]
224
+ .map.with_index {|c, i| [c, i] }
225
+ .reject {|(c, i)| rejecting.call c, i, max_sibling if rejecting }
226
+ .each do |(child, i)|
227
+ subpaths = paths
228
+ .select {|p| p.first == i}
229
+ .map {|p| p.drop 1 }
230
+ child.trim_to_just!(*subpaths, &rejecting)
231
+ end
232
+ .map(&:first)
233
+ end
234
+ self
235
+ end
236
+
237
+ def select(&b)
238
+ r = []
239
+ each do |n|
240
+ if b.call n
241
+ r << n
242
+ end
243
+ end
244
+ r
245
+ end
246
+
247
+ def select_paths(&b)
248
+ root_path = path
249
+ select(&b).map do |n|
250
+ n.path.drop root_path.length
251
+ end
252
+ end
253
+
254
+ def get(path)
255
+ return self if path.empty?
256
+ idx, *sub_path = path
257
+ child = children[idx]
258
+ child&.get sub_path
259
+ end
260
+
261
+ def self_and_ancestors
262
+ [self, *parent&.self_and_ancestors]
263
+ end
264
+ end
265
+
266
+ class ParsedRange < PosRange
267
+ attr_reader :label
268
+ attr_accessor :failed
269
+
270
+ include Tree
271
+
272
+ # Initialize failure with starting position, ending position, and
273
+ # label of what was expected.
274
+ def initialize(pos_start, pos_end, label)
275
+ @label = label
276
+ super(pos_start, pos_end)
277
+ end
278
+
279
+ alias_method :underline, :render_in
280
+ end
281
+
282
+ class ExpectationFailed < Error
283
+ attr_reader :ctx
284
+
285
+ # Initializes an ExpectationFailed from a backed_io and an optional
286
+ # expectation with which to start the list of expectations that lead to
287
+ # this failure.
288
+ def initialize(ctx)
289
+ @ctx = ctx
290
+ end
291
+
292
+ INDENTATION = 2
293
+
294
+ def message_hunk(failure_tree)
295
+ end
296
+
297
+ def failure_tree
298
+ @failure_tree ||= begin
299
+ other_ranges = ctx.parsed_ranges.flatten.select do |range|
300
+ range.start == parsed_range.start && range != parsed_range
301
+ end
302
+ relevant_paths = [parsed_range, *other_ranges].map(&:path)
303
+ parsed_range.dup.root.trim_to_just!(*relevant_paths) do |c, i, max_sibling|
304
+ c.failed && i != max_sibling && c.start != parsed_range.start
305
+ end
306
+ end
307
+ end
308
+
309
+ def parsed_range
310
+ @parsed_range ||= ctx.furthest_parsed_range
311
+ end
312
+
313
+ def hunk_prelude
314
+ <<~EOF
315
+ line #{ctx.bio.line_number}:
316
+ #{" " * INDENTATION}#{ctx.bio.current_line}
317
+ EOF
318
+ end
319
+
320
+ def hunk_graph
321
+ line_range = ctx.bio.current_line_range
322
+ line_length = ctx.bio.current_line.length
323
+ tree_lines = []
324
+ max_tree_slice_length = failure_tree.flatten.map {|t| t.right_tree_slice.length }.max
325
+ prev_slice_length = nil
326
+ failure_tree.each do |range|
327
+ line = ""
328
+ line << " " * INDENTATION
329
+ line << range.underline(line_range)
330
+ line << " " * (line_length + INDENTATION - line.length)
331
+ this_slice_length = range.right_tree_slice.length
332
+ # If previous slice was a parent with multiple children (current
333
+ # slice being the first child), we'll want to draw the forking
334
+ # line.
335
+ if prev_slice_length && this_slice_length > prev_slice_length
336
+ # Current line already has the correct width to start drawing the
337
+ # tree. Copy it and substitute the rendered range with spaces.
338
+ fork_line = line.gsub(/./, " ")
339
+ fork_line << " "
340
+ i = 0
341
+ fork_line << range.right_tree_slice.rjust(max_tree_slice_length).gsub(/[*|]/) do |c|
342
+ i += 1
343
+ if i <= this_slice_length - prev_slice_length
344
+ "\\"
345
+ else
346
+ c
347
+ end
348
+ end
349
+ fork_line << "\n"
350
+ else
351
+ fork_line = ""
352
+ end
353
+ prev_slice_length = this_slice_length
354
+ line << " #{range.right_tree_slice.rjust(max_tree_slice_length)}"
355
+ line << " #{range.failed ? "failure" : "success"}: #{range.label}"
356
+ line << "\n"
357
+ tree_lines << fork_line << line
358
+ end
359
+ tree_lines.reverse.join
360
+ end
361
+
362
+ def hunk_at(pos)
363
+ ctx.bio.with_saved_pos do
364
+ ctx.bio.seek pos
365
+ hunk_prelude + hunk_graph
366
+ end
367
+ end
368
+
369
+ # The message of the exception. It's the current line, with a kind-of
370
+ # backtrace showing the failed expectations with a visualization of
371
+ # their range in the current line.
372
+ def message
373
+ hunk_at parsed_range.start
374
+ end
375
+ end
376
+
377
+ class Token
378
+ attr_reader :name
379
+
380
+ # Makes a token with the given name.
381
+ def initialize(name)
382
+ @name = name
383
+ end
384
+
385
+ # Renders token name by surrounding it in angle brackets.
386
+ def to_s
387
+ "<#{name}>"
388
+ end
389
+
390
+ # Compare tokens
391
+ def ==(t)
392
+ t.is_a?(self.class) && t.name == name
393
+ end
394
+
395
+ # Flipped version of Parsby#%, so you can specify the token of a parser
396
+ # at the beginning of a parser expression.
397
+ def %(p)
398
+ p % self
399
+ end
400
+ end
401
+
402
+ class Backup < StringIO
403
+ def with_saved_pos(&b)
404
+ saved = pos
405
+ b.call saved
406
+ ensure
407
+ seek saved
408
+ end
409
+
410
+ def all
411
+ with_saved_pos do
412
+ seek 0
413
+ read
414
+ end
415
+ end
416
+
417
+ alias_method :back_size, :pos
418
+
419
+ def back(n = back_size)
420
+ with_saved_pos do |saved|
421
+ seek -n, IO::SEEK_CUR
422
+ read n
423
+ end
424
+ end
425
+
426
+ def rest_of_line
427
+ with_saved_pos { readline }
428
+ rescue EOFError
429
+ ""
430
+ end
431
+
432
+ def back_lines
433
+ (back + rest_of_line).lines
434
+ end
435
+
436
+ def col
437
+ back[/(?<=\A|\n).*\z/].length
438
+ end
439
+
440
+ def current_line
441
+ with_saved_pos do
442
+ seek(-col, IO::SEEK_CUR)
443
+ readline.chomp
444
+ end
445
+ end
446
+ end
447
+
448
+ class BackedIO
449
+ # Initializes a BackedIO out of the provided IO object or String. The
450
+ # String will be turned into an IO using StringIO.
451
+ def initialize(io)
452
+ io = StringIO.new io if io.is_a? String
453
+ @io = io
454
+ @backup = Backup.new
455
+ end
456
+
457
+ # Makes a new BackedIO out of the provided IO, calls the provided
458
+ # blocked and restores the IO on an exception.
459
+ def self.for(io, &b)
460
+ bio = new io
461
+ begin
462
+ b.call bio
463
+ rescue
464
+ bio.restore
465
+ raise
466
+ end
467
+ end
468
+
469
+ # Similar to BackedIO.for, but it always restores the IO, even when
470
+ # there's no exception.
471
+ def self.peek(io, &b)
472
+ self.for io do |bio|
473
+ begin
474
+ b.call bio
475
+ ensure
476
+ bio.restore
477
+ end
478
+ end
479
+ end
480
+
481
+ def with_saved_pos(&b)
482
+ saved = pos
483
+ begin
484
+ b.call saved
485
+ ensure
486
+ restore_to saved
487
+ end
488
+ end
489
+
490
+ # Like #read, but without consuming.
491
+ def peek(*args)
492
+ with_saved_pos { read(*args) }
493
+ end
494
+
495
+ # Delegates pos to inner io, and works around pipes' inability to
496
+ # return pos by getting the length of the innermost BackedIO.
497
+ def pos
498
+ @io.pos
499
+ rescue Errno::ESPIPE
500
+ backup.pos
501
+ end
502
+
503
+ # Returns line number of current line. This is 1-indexed.
504
+ def line_number
505
+ lines_read.length
506
+ end
507
+
508
+ def seek(amount, whence = IO::SEEK_SET)
509
+ if whence == IO::SEEK_END
510
+ read
511
+ restore(-amount)
512
+ return
513
+ end
514
+ new_pos = case whence
515
+ when IO::SEEK_SET
516
+ amount
517
+ when IO::SEEK_CUR
518
+ pos + amount
519
+ end
520
+ if new_pos > pos
521
+ read new_pos - pos
522
+ else
523
+ restore_to new_pos
524
+ end
525
+ end
526
+
527
+ # pos == current_line_pos + col. This is needed to convert a pos to a
528
+ # col.
529
+ def current_line_pos
530
+ pos - col
531
+ end
532
+
533
+ def col
534
+ backup.col
535
+ end
536
+
537
+ def current_line_range
538
+ start = current_line_pos
539
+ PosRange.new start, start + current_line.length
540
+ end
541
+
542
+ def load_rest_of_line
543
+ with_saved_pos { readline }
544
+ end
545
+
546
+ def lines_read
547
+ load_rest_of_line
548
+ backup.back_lines.map(&:chomp)
549
+ end
550
+
551
+ # Returns current line, including what's to come from #read, without
552
+ # consuming input.
553
+ def current_line
554
+ load_rest_of_line
555
+ backup.current_line
556
+ end
557
+
558
+ # Restore n chars from the backup.
559
+ def restore(n = backup.back_size)
560
+ # Handle negatives in consideration of #with_saved_pos.
561
+ if n < 0
562
+ read(-n)
563
+ else
564
+ backup.back(n).chars.reverse.each {|c| ungetc c}
565
+ end
566
+ nil
567
+ end
568
+
569
+ def restore_to(prev_pos)
570
+ restore(pos - prev_pos)
571
+ end
572
+
573
+ # This is to provide transparent delegation to methods of underlying
574
+ # IO.
575
+ def method_missing(m, *args, &b)
576
+ @io.send(m, *args, &b)
577
+ end
578
+
579
+ def readline(*args)
580
+ @io.readline(*args).tap {|r| backup.write r unless r.nil? }
581
+ end
582
+
583
+ # Reads from underlying IO and backs it up.
584
+ def read(*args)
585
+ @io.read(*args).tap {|r| backup.write r unless r.nil? }
586
+ end
587
+
588
+ # Pass to underlying IO's ungetc and discard a part of the same length
589
+ # from the backup. As specified with different IO classes, the argument
590
+ # should be a single character. To restore from the backup, use
591
+ # #restore.
592
+ def ungetc(c)
593
+ # Though c is supposed to be a single character, as specified by the
594
+ # ungetc of different IO objects, let's not assume that when
595
+ # adjusting the backup.
596
+ backup.seek(-c.length, IO::SEEK_CUR)
597
+ @io.ungetc(c)
598
+ end
599
+
600
+ private
601
+
602
+ def backup
603
+ @backup
604
+ end
605
+ end
606
+
607
+ class Context
608
+ attr_reader :bio
609
+ attr_accessor :parsed_ranges
610
+
611
+ def initialize(io)
612
+ @bio = BackedIO.new io
613
+ @failures = []
614
+ end
615
+
616
+ def furthest_parsed_range
617
+ parsed_ranges.flatten.max_by(&:start)
618
+ end
619
+ end
620
+
621
+ # The parser's label. It's an "unknown" token by default.
622
+ def label
623
+ @label || Token.new("unknown")
624
+ end
625
+
626
+ # Assign label to parser. If given a symbol, it'll be turned into a
627
+ # Parsby::Token.
628
+ def label=(name)
629
+ @label = name.is_a?(Symbol) ? Token.new(name) : name
630
+ end
631
+
632
+ # Initialize parser with optional label argument, and parsing block. The
633
+ # parsing block is given an IO as argument, and its result is the result
634
+ # when parsing.
635
+ def initialize(label = nil, &b)
636
+ self.label = label if label
637
+ @parser = b
638
+ end
639
+
640
+ # Parse a String or IO object.
641
+ def parse(src)
642
+ ctx = src.is_a?(Context) ? src : Context.new(src)
643
+ parsed_range = ParsedRange.new(ctx.bio.pos, ctx.bio.pos, label)
644
+ ctx.parsed_ranges << parsed_range if ctx.parsed_ranges
645
+ parent_parsed_range = ctx.parsed_ranges
646
+ ctx.parsed_ranges = parsed_range
647
+ begin
648
+ r = @parser.call ctx
649
+ rescue ExpectationFailed => e
650
+ ctx.parsed_ranges.end = ctx.bio.pos
651
+ ctx.parsed_ranges.failed = true
652
+ ctx.bio.restore_to ctx.parsed_ranges.start
653
+ raise
654
+ else
655
+ ctx.parsed_ranges.end = ctx.bio.pos
656
+ r
657
+ ensure
658
+ # Keep the root one for use in ExceptionFailed#message
659
+ if parent_parsed_range
660
+ ctx.parsed_ranges = parent_parsed_range
661
+ end
662
+ end
663
+ end
664
+
665
+ # Parses without consuming input.
666
+ def peek(src)
667
+ ctx = src.is_a?(Context) ? src : Context.new(src)
668
+ starting_pos = ctx.bio.pos
669
+ begin
670
+ parse ctx
671
+ ensure
672
+ ctx.bio.restore_to starting_pos
673
+ end
674
+ end
675
+
676
+ # <tt>x | y</tt> tries y if x fails.
677
+ def |(p)
678
+ Parsby.new "(#{self.label} | #{p.label})" do |c|
679
+ begin
680
+ parse c
681
+ rescue Error
682
+ p.parse c
683
+ end
684
+ end
685
+ end
686
+
687
+ # x < y runs parser x then y and returns x.
688
+ def <(p)
689
+ self.then {|r| p.then { pure r } } % "(#{label} < #{p.label})"
690
+ end
691
+
692
+ # x > y runs parser x then y and returns y.
693
+ def >(p)
694
+ self.then { p } % "(#{label} > #{p.label})"
695
+ end
696
+
697
+ def ~
698
+ Parsby.new "(~ #{label})" do |c|
699
+ begin
700
+ parse c
701
+ ensure
702
+ c.parsed_ranges.children[0].splice_self!
703
+ if c.parsed_ranges.parent
704
+ c.parsed_ranges.splice_self!
705
+ end
706
+ end
707
+ end
708
+ end
709
+
710
+ # p * n, runs parser p n times, grouping results in an array.
711
+ def *(n)
712
+ Parsby.new "(#{label} * #{n})" do |c|
713
+ n.times.map { parse c }
714
+ end
715
+ end
716
+
717
+ # x + y does + on the results of x and y. This is mostly meant to be used
718
+ # with arrays, but it would work with numbers and strings too.
719
+ def +(p)
720
+ group(self, p)
721
+ .fmap {|(x, y)| x + y }
722
+ .tap {|r| r.label = "(#{label} + #{p.label})" }
723
+ end
724
+
725
+ # xs << x appends result of parser x to list result of parser xs.
726
+ def <<(p)
727
+ Parsby.new "(#{label} << #{p.label})" do |c|
728
+ x = parse c
729
+ y = p.parse c
730
+ # like x << y, but without modifying x.
731
+ x + [y]
732
+ end
733
+ end
734
+
735
+ # Set the label and return self.
736
+ def %(name)
737
+ self.label = name
738
+ self
739
+ end
740
+
741
+ # Like map for arrays, this lets you work with the value "inside" the
742
+ # parser, i.e. the result.
743
+ #
744
+ # Example:
745
+ #
746
+ # decimal.fmap {|x| x + 1}.parse("2")
747
+ # => 3
748
+ def fmap(&b)
749
+ Parsby.new "#{label}.fmap" do |c|
750
+ b.call parse c
751
+ end
752
+ end
753
+
754
+ # Pass result of self parser to block to construct the next parser.
755
+ #
756
+ # For example, instead of writing:
757
+ #
758
+ # Parsby.new do |c|
759
+ # x = foo.parse c
760
+ # bar(x).parse c
761
+ # end
762
+ #
763
+ # you can write:
764
+ #
765
+ # foo.then {|x| bar x }
766
+ #
767
+ # This is analogous to Parsec's >>= operator in Haskell, where you could
768
+ # write:
769
+ #
770
+ # foo >>= bar
771
+ def then(&b)
772
+ Parsby.new "#{label}.then" do |c|
773
+ b.call(parse(c)).parse(c)
774
+ end
775
+ end
776
+
777
+ # <tt>x.that_fails(y)</tt> will try <tt>y</tt>, fail if <tt>y</tt>
778
+ # succeeds, or parse with <tt>x</tt> if <tt>y</tt>
779
+ # fails.
780
+ #
781
+ # Example:
782
+ #
783
+ # decimal.that_fails(string("10")).parse "3"
784
+ # => 3
785
+ # decimal.that_fails(string("10")).parse "10"
786
+ # Parsby::ExpectationFailed: line 1:
787
+ # 10
788
+ # \/ expected: (not "10")
789
+ def that_fails(p)
790
+ Parsby.new "#{label}.that_fails(#{p.label})" do |c|
791
+ orig_pos = c.bio.pos
792
+ begin
793
+ r = p.parse c.bio
794
+ rescue Error
795
+ c.bio.restore_to orig_pos
796
+ parse c.bio
797
+ else
798
+ raise ExpectationFailed.new c
799
+ end
800
+ end
801
+ end
802
+
803
+ alias_method :that_fail, :that_fails
804
+ end