parsby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,35 @@
1
+ #!/bin/bash
2
+
3
+ # This lists all instance and class methods defined in the project. It's
4
+ # used along with the script tested-methods in a test in projects_spec.rb
5
+ # to ensure test-coverage.
6
+
7
+ grep -REh '^\s*(class|module|def|define_combinator|end)\b' lib | awk '
8
+ function get_indent_level() {
9
+ return match($0, "[^[:blank:]]") - 1
10
+ }
11
+
12
+ /^\s*(module|class)\>/ {
13
+ match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
14
+ indent_level = get_indent_level()
15
+ context[indent_level] = matches[2]
16
+ for (i in context) {
17
+ if (i > indent_level) {
18
+ delete context[i]
19
+ }
20
+ }
21
+ }
22
+
23
+ /^\s*(def|define_combinator)\>/ {
24
+ indent_level = get_indent_level()
25
+ first = 1
26
+ for (i in context) {
27
+ if (i < indent_level) {
28
+ printf(first ? "%s" : "::%s", context[i])
29
+ first = 0
30
+ }
31
+ }
32
+ match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^,([:blank:]]+)/, matches)
33
+ print (matches[2] == "self." ? "." : "#") matches[3]
34
+ }
35
+ ' | grep -v '[#.]included$' | sort
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "parsby"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+
12
+ def to_constant_representation(path)
13
+ path
14
+ .gsub("/", "::")
15
+ .gsub(/(\A|(?<=::)|_)./) {|x| x[-1].upcase }
16
+ .sub(/\.rb\z/, "")
17
+ end
18
+
19
+ def reload!
20
+ Dir["lib/**/*"]
21
+ .map {|p| Pathname.new(p).relative_path_from(Pathname.new("lib/")) }
22
+ .select {|p| p.to_s =~ /\.rb\z/ && p.to_s != "parsby/version.rb" }
23
+ .each do |p|
24
+ const = to_constant_representation(p.to_s)
25
+ # Don't bother removing children constants, since we'll remove the
26
+ # parents.
27
+ unless const =~ /::/ || !Object.const_defined?(const)
28
+ Object.send(:remove_const, const)
29
+ end
30
+ end
31
+ .each(&method(:load)) # Load everything only after having removed everything.
32
+
33
+ include Parsby::Combinators
34
+ extend Parsby::Combinators::ModuleMethods
35
+ nil
36
+ end
37
+
38
+ reload!
39
+
40
+ Pry.start
@@ -0,0 +1,49 @@
1
+ #!/bin/bash
2
+
3
+ # This script lists the methods that aren't immediately preceded by a
4
+ # comment. The output is empty and the exit status is successful if they
5
+ # all have a comment.
6
+ #
7
+ # This is used in a test in project_spec.rb to ensure documentation
8
+ # coverage.
9
+
10
+ find lib -type f -name \*.rb ! -path lib/parsby/example/\* \
11
+ | xargs grep -REh '^\s*(#|(class|module|def|define_combinator|end)\b)' \
12
+ | awk '
13
+ function get_indent_level() {
14
+ return match($0, "[^[:blank:]]") - 1
15
+ }
16
+
17
+ /^\s*(module|class)\>/ {
18
+ match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
19
+ indent_level = get_indent_level()
20
+ context[indent_level] = matches[2]
21
+ for (i in context) {
22
+ if (i > indent_level) {
23
+ delete context[i]
24
+ }
25
+ }
26
+ }
27
+
28
+ /^\s*(def|define_combinator)\>/ && !prev_line_is_comment {
29
+ indent_level = get_indent_level()
30
+ first = 1
31
+ for (i in context) {
32
+ if (i < indent_level) {
33
+ printf(first ? "%s" : "::%s", context[i])
34
+ first = 0
35
+ }
36
+ }
37
+ match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^([:blank:]]+)/, matches)
38
+ print (matches[2] == "self." ? "." : "#") matches[3]
39
+ at_least_one_missing = 1
40
+ }
41
+
42
+ {
43
+ prev_line_is_comment = /^\s*#/
44
+ }
45
+
46
+ END {
47
+ exit at_least_one_missing
48
+ }
49
+ ' | grep -v '[#.]included$'
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+
3
+ # This lists all tested instance and class methods defined in the project.
4
+ # It's used along with the script all-methods in a test in projects_spec.rb
5
+ # to ensure test-coverage.
6
+ #
7
+ # It depends on rspec contexts being done in a specific format, and
8
+ # properly indented. The format can be exemplified by:
9
+ #
10
+ # RSpec.describe Foo do
11
+ # describe Foo::Bar do
12
+ # describe "#foo" do
13
+ # ...
14
+ # end
15
+ # describe ".bar" do
16
+ # ...
17
+ # end
18
+ # end
19
+ # end
20
+
21
+ grep -REh '^\s*(RSpec\.)?describe\(?\s*([A-Z]|"[#.])' spec | awk '
22
+ function get_indent_level() {
23
+ return match($0, "[^[:blank:]]") - 1
24
+ }
25
+
26
+ /^\s*(RSpec\.)?describe\(?\s*[A-Z]/ {
27
+ match($0, /^\s*(RSpec\.)?describe\(?\s*([A-Za-z0-9:_]+)/, matches)
28
+ indent_level = get_indent_level()
29
+ context[indent_level] = matches[2]
30
+ for (i in context) {
31
+ if (i > indent_level) {
32
+ delete context[i]
33
+ }
34
+ }
35
+ }
36
+
37
+ /^\s*(RSpec\.)?describe\(?\s*["'\''][#.]/ {
38
+ indent_level = get_indent_level()
39
+ for (i in context) {
40
+ if (i < indent_level) {
41
+ ctx = context[i]
42
+ }
43
+ }
44
+ match($0, /^\s*(RSpec\.)?describe\(?\s*(["'\''])([#.].+)["'\'']/, matches)
45
+ print ctx matches[3]
46
+ }
47
+ ' | sort
@@ -0,0 +1,30 @@
1
+ #!/bin/bash
2
+
3
+ # Vestigial methods are those that are defined, but not used anywhere else
4
+ # lib/. This is to help us detect methods that we no longer use to
5
+ # determine whether to remove them. If a vestigial method is still useful
6
+ # for users of this library or otherwise, add it to the whitelist below.
7
+
8
+ for n in $(grep -Phro '^[^#]*(def (self\.)?|define_combinator :)\K\w+' lib | sort -u); do
9
+ printf "%s\t%s\n" \
10
+ "$(grep -Fwrn "$n" lib | grep -Ev '#[^{]|\b(def|define_combinator|require)\b' | wc -l)" \
11
+ "$n"
12
+ done | awk '
13
+ function is_ruby_hook() {
14
+ return $2 ~ /\<(included|initialize|message|method_missing)\>/
15
+ }
16
+
17
+ function whitelisted() {
18
+ return is_ruby_hook() \
19
+ || $2 == "define_combinator" \
20
+ || $2 == "parsby" \
21
+ || $2 == "ilit" \
22
+ || $2 == "recursive" \
23
+ || $2 == "decimal_fraction" \
24
+ || $2 == "left_tree_slice" \
25
+ || $2 == "all" \
26
+ ;
27
+ }
28
+
29
+ !$1 && !whitelisted() { print $2 }
30
+ '
@@ -0,0 +1,804 @@
1
+ require "parsby/version"
2
+ require "parsby/combinators"
3
+
4
+ class Parsby
5
+ include Combinators
6
+
7
+ class Error < StandardError; end
8
+
9
+ class PosRange
10
+ attr_accessor :start, :end
11
+
12
+ # PosRanges are constructed with a starting and ending position. We
13
+ # consider the starting position to be inside the range, and the ending
14
+ # position to be outside the range. So, if start is 1 and end is 2,
15
+ # then only position 1 is inside the range. If start is 1 and end is 1,
16
+ # then there is no position inside the range.
17
+ def initialize(pos_start, pos_end)
18
+ @start = pos_start
19
+ @end = pos_end
20
+ end
21
+
22
+ # Length of range.
23
+ def length
24
+ @end - @start
25
+ end
26
+
27
+ # Length of overlap. 0 for non-overlapping ranges.
28
+ def length_in(range)
29
+ (self & range)&.length || 0
30
+ end
31
+
32
+ # Intersection of two ranges. Touching ranges result in a range of
33
+ # length 0.
34
+ def &(range)
35
+ return nil unless overlaps?(range) || touching?(range)
36
+ PosRange.new [@start, range.start].max, [@end, range.end].min
37
+ end
38
+
39
+ # True when the end of one is the beginning of the other.
40
+ def touching?(range)
41
+ range.end == self.start || self.end == range.start
42
+ end
43
+
44
+ # True when one is not completely left of or right of the other.
45
+ # Touching ranges do not overlap, even though they have an intersection
46
+ # range of length 0.
47
+ def overlaps?(range)
48
+ !(completely_left_of?(range) || completely_right_of?(range))
49
+ end
50
+
51
+ def completely_left_of?(range)
52
+ @end <= range.start
53
+ end
54
+
55
+ def completely_right_of?(range)
56
+ range.end <= @start
57
+ end
58
+
59
+ def contains?(pos)
60
+ @start <= pos && pos < @end
61
+ end
62
+
63
+ def starts_inside_of?(range)
64
+ range.contains? @start
65
+ end
66
+
67
+ def ends_inside_of?(range)
68
+ range.contains?(@end) || range.end == @end
69
+ end
70
+
71
+ def completely_inside_of?(range)
72
+ starts_inside_of?(range) && ends_inside_of?(range)
73
+ end
74
+
75
+ def render_in(line_range)
76
+ return "<-" if completely_left_of?(line_range) && !starts_inside_of?(line_range)
77
+ return "->" if completely_right_of? line_range
78
+ indentation = " " * [0, start - line_range.start].max
79
+ r = "-" * length_in(line_range)
80
+ r[0] = "\\" if starts_inside_of? line_range
81
+ r[-1] = "/" if ends_inside_of? line_range
82
+ r[0] = "|" if length_in(line_range) == 0
83
+ r[0] = "V" if length_in(line_range) == 1 && completely_inside_of?(line_range)
84
+ indentation + r
85
+ end
86
+ end
87
+
88
+ class Splicer
89
+ def self.start(label = nil, &b)
90
+ m = new
91
+ p = b.call m
92
+ p % label if label
93
+ m.start p
94
+ end
95
+
96
+ def start(p)
97
+ Parsby.new("splicer.start(#{p.label})") { |c|
98
+ begin
99
+ p.parse c
100
+ ensure
101
+ c.parsed_ranges.splice_to! self
102
+ end
103
+ }
104
+ end
105
+
106
+ def end(p)
107
+ Parsby.new("splicer.end(#{p.label})") { |c|
108
+ begin
109
+ p.parse c
110
+ ensure
111
+ c.parsed_ranges.children[0].markers << self
112
+ end
113
+ }
114
+ end
115
+ end
116
+
117
+ module Tree
118
+ attr_accessor :parent
119
+ attr_reader :markers
120
+ attr_writer :children
121
+
122
+ def markers
123
+ @markers ||= []
124
+ end
125
+
126
+ def splice_to!(marker)
127
+ splice!(*select_paths {|n| n.markers.include? marker })
128
+ end
129
+
130
+ def children
131
+ @children ||= []
132
+ end
133
+
134
+ def <<(*ts)
135
+ ts.each do |t|
136
+ t.parent = self
137
+ children << t
138
+ end
139
+ end
140
+
141
+ def root
142
+ if parent == nil
143
+ self
144
+ else
145
+ parent.root
146
+ end
147
+ end
148
+
149
+ def sibling_reverse_index
150
+ parent&.children&.reverse&.index self
151
+ end
152
+
153
+ def sibling_index
154
+ parent&.children&.index self
155
+ end
156
+
157
+ def flatten
158
+ [self, *children.map(&:flatten).flatten]
159
+ end
160
+
161
+ alias_method :self_and_descendants, :flatten
162
+
163
+ def path
164
+ [*parent&.path, *sibling_index]
165
+ end
166
+
167
+ def each(&b)
168
+ b.call self
169
+ children.each {|c| c.each(&b) }
170
+ self
171
+ end
172
+
173
+ def right_uncles
174
+ if parent
175
+ sibling_reverse_index + parent.right_uncles
176
+ else
177
+ 0
178
+ end
179
+ end
180
+
181
+ def right_tree_slice
182
+ "*" + "|" * right_uncles
183
+ end
184
+
185
+ def dup(currently_descending: false)
186
+ self_path = path
187
+ if parent && !currently_descending
188
+ root.dup.get self_path
189
+ else
190
+ super().tap do |d|
191
+ d.children = d.children.map do |c|
192
+ c.dup(currently_descending: true).tap do |dc|
193
+ dc.parent = d
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+
200
+ def splice_self!
201
+ idx = sibling_index
202
+ parent.children.delete_at(idx)
203
+ parent.children.insert(idx, *children.each {|c| c.parent = parent })
204
+ parent
205
+ end
206
+
207
+ def splice!(*paths)
208
+ self.children = paths
209
+ .map {|p| get(p)&.tap {|d| d.parent = self } }
210
+ .reject(&:nil?)
211
+ self
212
+ end
213
+
214
+ def splice(*paths)
215
+ dup.splice!(*paths)
216
+ end
217
+
218
+ def trim_to_just!(*paths, &rejecting)
219
+ max_sibling = paths.map(&:first).reject(&:nil?).max
220
+ self.children = if max_sibling.nil?
221
+ []
222
+ else
223
+ children[0..max_sibling]
224
+ .map.with_index {|c, i| [c, i] }
225
+ .reject {|(c, i)| rejecting.call c, i, max_sibling if rejecting }
226
+ .each do |(child, i)|
227
+ subpaths = paths
228
+ .select {|p| p.first == i}
229
+ .map {|p| p.drop 1 }
230
+ child.trim_to_just!(*subpaths, &rejecting)
231
+ end
232
+ .map(&:first)
233
+ end
234
+ self
235
+ end
236
+
237
+ def select(&b)
238
+ r = []
239
+ each do |n|
240
+ if b.call n
241
+ r << n
242
+ end
243
+ end
244
+ r
245
+ end
246
+
247
+ def select_paths(&b)
248
+ root_path = path
249
+ select(&b).map do |n|
250
+ n.path.drop root_path.length
251
+ end
252
+ end
253
+
254
+ def get(path)
255
+ return self if path.empty?
256
+ idx, *sub_path = path
257
+ child = children[idx]
258
+ child&.get sub_path
259
+ end
260
+
261
+ def self_and_ancestors
262
+ [self, *parent&.self_and_ancestors]
263
+ end
264
+ end
265
+
266
+ class ParsedRange < PosRange
267
+ attr_reader :label
268
+ attr_accessor :failed
269
+
270
+ include Tree
271
+
272
+ # Initialize failure with starting position, ending position, and
273
+ # label of what was expected.
274
+ def initialize(pos_start, pos_end, label)
275
+ @label = label
276
+ super(pos_start, pos_end)
277
+ end
278
+
279
+ alias_method :underline, :render_in
280
+ end
281
+
282
+ class ExpectationFailed < Error
283
+ attr_reader :ctx
284
+
285
+ # Initializes an ExpectationFailed from a backed_io and an optional
286
+ # expectation with which to start the list of expectations that lead to
287
+ # this failure.
288
+ def initialize(ctx)
289
+ @ctx = ctx
290
+ end
291
+
292
+ INDENTATION = 2
293
+
294
+ def message_hunk(failure_tree)
295
+ end
296
+
297
+ def failure_tree
298
+ @failure_tree ||= begin
299
+ other_ranges = ctx.parsed_ranges.flatten.select do |range|
300
+ range.start == parsed_range.start && range != parsed_range
301
+ end
302
+ relevant_paths = [parsed_range, *other_ranges].map(&:path)
303
+ parsed_range.dup.root.trim_to_just!(*relevant_paths) do |c, i, max_sibling|
304
+ c.failed && i != max_sibling && c.start != parsed_range.start
305
+ end
306
+ end
307
+ end
308
+
309
+ def parsed_range
310
+ @parsed_range ||= ctx.furthest_parsed_range
311
+ end
312
+
313
+ def hunk_prelude
314
+ <<~EOF
315
+ line #{ctx.bio.line_number}:
316
+ #{" " * INDENTATION}#{ctx.bio.current_line}
317
+ EOF
318
+ end
319
+
320
+ def hunk_graph
321
+ line_range = ctx.bio.current_line_range
322
+ line_length = ctx.bio.current_line.length
323
+ tree_lines = []
324
+ max_tree_slice_length = failure_tree.flatten.map {|t| t.right_tree_slice.length }.max
325
+ prev_slice_length = nil
326
+ failure_tree.each do |range|
327
+ line = ""
328
+ line << " " * INDENTATION
329
+ line << range.underline(line_range)
330
+ line << " " * (line_length + INDENTATION - line.length)
331
+ this_slice_length = range.right_tree_slice.length
332
+ # If previous slice was a parent with multiple children (current
333
+ # slice being the first child), we'll want to draw the forking
334
+ # line.
335
+ if prev_slice_length && this_slice_length > prev_slice_length
336
+ # Current line already has the correct width to start drawing the
337
+ # tree. Copy it and substitute the rendered range with spaces.
338
+ fork_line = line.gsub(/./, " ")
339
+ fork_line << " "
340
+ i = 0
341
+ fork_line << range.right_tree_slice.rjust(max_tree_slice_length).gsub(/[*|]/) do |c|
342
+ i += 1
343
+ if i <= this_slice_length - prev_slice_length
344
+ "\\"
345
+ else
346
+ c
347
+ end
348
+ end
349
+ fork_line << "\n"
350
+ else
351
+ fork_line = ""
352
+ end
353
+ prev_slice_length = this_slice_length
354
+ line << " #{range.right_tree_slice.rjust(max_tree_slice_length)}"
355
+ line << " #{range.failed ? "failure" : "success"}: #{range.label}"
356
+ line << "\n"
357
+ tree_lines << fork_line << line
358
+ end
359
+ tree_lines.reverse.join
360
+ end
361
+
362
+ def hunk_at(pos)
363
+ ctx.bio.with_saved_pos do
364
+ ctx.bio.seek pos
365
+ hunk_prelude + hunk_graph
366
+ end
367
+ end
368
+
369
+ # The message of the exception. It's the current line, with a kind-of
370
+ # backtrace showing the failed expectations with a visualization of
371
+ # their range in the current line.
372
+ def message
373
+ hunk_at parsed_range.start
374
+ end
375
+ end
376
+
377
+ class Token
378
+ attr_reader :name
379
+
380
+ # Makes a token with the given name.
381
+ def initialize(name)
382
+ @name = name
383
+ end
384
+
385
+ # Renders token name by surrounding it in angle brackets.
386
+ def to_s
387
+ "<#{name}>"
388
+ end
389
+
390
+ # Compare tokens
391
+ def ==(t)
392
+ t.is_a?(self.class) && t.name == name
393
+ end
394
+
395
+ # Flipped version of Parsby#%, so you can specify the token of a parser
396
+ # at the beginning of a parser expression.
397
+ def %(p)
398
+ p % self
399
+ end
400
+ end
401
+
402
+ class Backup < StringIO
403
+ def with_saved_pos(&b)
404
+ saved = pos
405
+ b.call saved
406
+ ensure
407
+ seek saved
408
+ end
409
+
410
+ def all
411
+ with_saved_pos do
412
+ seek 0
413
+ read
414
+ end
415
+ end
416
+
417
+ alias_method :back_size, :pos
418
+
419
+ def back(n = back_size)
420
+ with_saved_pos do |saved|
421
+ seek -n, IO::SEEK_CUR
422
+ read n
423
+ end
424
+ end
425
+
426
+ def rest_of_line
427
+ with_saved_pos { readline }
428
+ rescue EOFError
429
+ ""
430
+ end
431
+
432
+ def back_lines
433
+ (back + rest_of_line).lines
434
+ end
435
+
436
+ def col
437
+ back[/(?<=\A|\n).*\z/].length
438
+ end
439
+
440
+ def current_line
441
+ with_saved_pos do
442
+ seek(-col, IO::SEEK_CUR)
443
+ readline.chomp
444
+ end
445
+ end
446
+ end
447
+
448
+ class BackedIO
449
+ # Initializes a BackedIO out of the provided IO object or String. The
450
+ # String will be turned into an IO using StringIO.
451
+ def initialize(io)
452
+ io = StringIO.new io if io.is_a? String
453
+ @io = io
454
+ @backup = Backup.new
455
+ end
456
+
457
+ # Makes a new BackedIO out of the provided IO, calls the provided
458
+ # blocked and restores the IO on an exception.
459
+ def self.for(io, &b)
460
+ bio = new io
461
+ begin
462
+ b.call bio
463
+ rescue
464
+ bio.restore
465
+ raise
466
+ end
467
+ end
468
+
469
+ # Similar to BackedIO.for, but it always restores the IO, even when
470
+ # there's no exception.
471
+ def self.peek(io, &b)
472
+ self.for io do |bio|
473
+ begin
474
+ b.call bio
475
+ ensure
476
+ bio.restore
477
+ end
478
+ end
479
+ end
480
+
481
+ def with_saved_pos(&b)
482
+ saved = pos
483
+ begin
484
+ b.call saved
485
+ ensure
486
+ restore_to saved
487
+ end
488
+ end
489
+
490
+ # Like #read, but without consuming.
491
+ def peek(*args)
492
+ with_saved_pos { read(*args) }
493
+ end
494
+
495
+ # Delegates pos to inner io, and works around pipes' inability to
496
+ # return pos by getting the length of the innermost BackedIO.
497
+ def pos
498
+ @io.pos
499
+ rescue Errno::ESPIPE
500
+ backup.pos
501
+ end
502
+
503
+ # Returns line number of current line. This is 1-indexed.
504
+ def line_number
505
+ lines_read.length
506
+ end
507
+
508
+ def seek(amount, whence = IO::SEEK_SET)
509
+ if whence == IO::SEEK_END
510
+ read
511
+ restore(-amount)
512
+ return
513
+ end
514
+ new_pos = case whence
515
+ when IO::SEEK_SET
516
+ amount
517
+ when IO::SEEK_CUR
518
+ pos + amount
519
+ end
520
+ if new_pos > pos
521
+ read new_pos - pos
522
+ else
523
+ restore_to new_pos
524
+ end
525
+ end
526
+
527
+ # pos == current_line_pos + col. This is needed to convert a pos to a
528
+ # col.
529
+ def current_line_pos
530
+ pos - col
531
+ end
532
+
533
+ def col
534
+ backup.col
535
+ end
536
+
537
+ def current_line_range
538
+ start = current_line_pos
539
+ PosRange.new start, start + current_line.length
540
+ end
541
+
542
+ def load_rest_of_line
543
+ with_saved_pos { readline }
544
+ end
545
+
546
+ def lines_read
547
+ load_rest_of_line
548
+ backup.back_lines.map(&:chomp)
549
+ end
550
+
551
+ # Returns current line, including what's to come from #read, without
552
+ # consuming input.
553
+ def current_line
554
+ load_rest_of_line
555
+ backup.current_line
556
+ end
557
+
558
+ # Restore n chars from the backup.
559
+ def restore(n = backup.back_size)
560
+ # Handle negatives in consideration of #with_saved_pos.
561
+ if n < 0
562
+ read(-n)
563
+ else
564
+ backup.back(n).chars.reverse.each {|c| ungetc c}
565
+ end
566
+ nil
567
+ end
568
+
569
+ def restore_to(prev_pos)
570
+ restore(pos - prev_pos)
571
+ end
572
+
573
+ # This is to provide transparent delegation to methods of underlying
574
+ # IO.
575
+ def method_missing(m, *args, &b)
576
+ @io.send(m, *args, &b)
577
+ end
578
+
579
+ def readline(*args)
580
+ @io.readline(*args).tap {|r| backup.write r unless r.nil? }
581
+ end
582
+
583
+ # Reads from underlying IO and backs it up.
584
+ def read(*args)
585
+ @io.read(*args).tap {|r| backup.write r unless r.nil? }
586
+ end
587
+
588
+ # Pass to underlying IO's ungetc and discard a part of the same length
589
+ # from the backup. As specified with different IO classes, the argument
590
+ # should be a single character. To restore from the backup, use
591
+ # #restore.
592
+ def ungetc(c)
593
+ # Though c is supposed to be a single character, as specified by the
594
+ # ungetc of different IO objects, let's not assume that when
595
+ # adjusting the backup.
596
+ backup.seek(-c.length, IO::SEEK_CUR)
597
+ @io.ungetc(c)
598
+ end
599
+
600
+ private
601
+
602
+ def backup
603
+ @backup
604
+ end
605
+ end
606
+
607
+ class Context
608
+ attr_reader :bio
609
+ attr_accessor :parsed_ranges
610
+
611
+ def initialize(io)
612
+ @bio = BackedIO.new io
613
+ @failures = []
614
+ end
615
+
616
+ def furthest_parsed_range
617
+ parsed_ranges.flatten.max_by(&:start)
618
+ end
619
+ end
620
+
621
+ # The parser's label. It's an "unknown" token by default.
622
+ def label
623
+ @label || Token.new("unknown")
624
+ end
625
+
626
+ # Assign label to parser. If given a symbol, it'll be turned into a
627
+ # Parsby::Token.
628
+ def label=(name)
629
+ @label = name.is_a?(Symbol) ? Token.new(name) : name
630
+ end
631
+
632
+ # Initialize parser with optional label argument, and parsing block. The
633
+ # parsing block is given an IO as argument, and its result is the result
634
+ # when parsing.
635
+ def initialize(label = nil, &b)
636
+ self.label = label if label
637
+ @parser = b
638
+ end
639
+
640
+ # Parse a String or IO object.
641
+ def parse(src)
642
+ ctx = src.is_a?(Context) ? src : Context.new(src)
643
+ parsed_range = ParsedRange.new(ctx.bio.pos, ctx.bio.pos, label)
644
+ ctx.parsed_ranges << parsed_range if ctx.parsed_ranges
645
+ parent_parsed_range = ctx.parsed_ranges
646
+ ctx.parsed_ranges = parsed_range
647
+ begin
648
+ r = @parser.call ctx
649
+ rescue ExpectationFailed => e
650
+ ctx.parsed_ranges.end = ctx.bio.pos
651
+ ctx.parsed_ranges.failed = true
652
+ ctx.bio.restore_to ctx.parsed_ranges.start
653
+ raise
654
+ else
655
+ ctx.parsed_ranges.end = ctx.bio.pos
656
+ r
657
+ ensure
658
+ # Keep the root one for use in ExceptionFailed#message
659
+ if parent_parsed_range
660
+ ctx.parsed_ranges = parent_parsed_range
661
+ end
662
+ end
663
+ end
664
+
665
+ # Parses without consuming input.
666
+ def peek(src)
667
+ ctx = src.is_a?(Context) ? src : Context.new(src)
668
+ starting_pos = ctx.bio.pos
669
+ begin
670
+ parse ctx
671
+ ensure
672
+ ctx.bio.restore_to starting_pos
673
+ end
674
+ end
675
+
676
+ # <tt>x | y</tt> tries y if x fails.
677
+ def |(p)
678
+ Parsby.new "(#{self.label} | #{p.label})" do |c|
679
+ begin
680
+ parse c
681
+ rescue Error
682
+ p.parse c
683
+ end
684
+ end
685
+ end
686
+
687
+ # x < y runs parser x then y and returns x.
688
+ def <(p)
689
+ self.then {|r| p.then { pure r } } % "(#{label} < #{p.label})"
690
+ end
691
+
692
+ # x > y runs parser x then y and returns y.
693
+ def >(p)
694
+ self.then { p } % "(#{label} > #{p.label})"
695
+ end
696
+
697
+ def ~
698
+ Parsby.new "(~ #{label})" do |c|
699
+ begin
700
+ parse c
701
+ ensure
702
+ c.parsed_ranges.children[0].splice_self!
703
+ if c.parsed_ranges.parent
704
+ c.parsed_ranges.splice_self!
705
+ end
706
+ end
707
+ end
708
+ end
709
+
710
+ # p * n, runs parser p n times, grouping results in an array.
711
+ def *(n)
712
+ Parsby.new "(#{label} * #{n})" do |c|
713
+ n.times.map { parse c }
714
+ end
715
+ end
716
+
717
+ # x + y does + on the results of x and y. This is mostly meant to be used
718
+ # with arrays, but it would work with numbers and strings too.
719
+ def +(p)
720
+ group(self, p)
721
+ .fmap {|(x, y)| x + y }
722
+ .tap {|r| r.label = "(#{label} + #{p.label})" }
723
+ end
724
+
725
+ # xs << x appends result of parser x to list result of parser xs.
726
+ def <<(p)
727
+ Parsby.new "(#{label} << #{p.label})" do |c|
728
+ x = parse c
729
+ y = p.parse c
730
+ # like x << y, but without modifying x.
731
+ x + [y]
732
+ end
733
+ end
734
+
735
+ # Set the label and return self.
736
+ def %(name)
737
+ self.label = name
738
+ self
739
+ end
740
+
741
+ # Like map for arrays, this lets you work with the value "inside" the
742
+ # parser, i.e. the result.
743
+ #
744
+ # Example:
745
+ #
746
+ # decimal.fmap {|x| x + 1}.parse("2")
747
+ # => 3
748
+ def fmap(&b)
749
+ Parsby.new "#{label}.fmap" do |c|
750
+ b.call parse c
751
+ end
752
+ end
753
+
754
+ # Pass result of self parser to block to construct the next parser.
755
+ #
756
+ # For example, instead of writing:
757
+ #
758
+ # Parsby.new do |c|
759
+ # x = foo.parse c
760
+ # bar(x).parse c
761
+ # end
762
+ #
763
+ # you can write:
764
+ #
765
+ # foo.then {|x| bar x }
766
+ #
767
+ # This is analogous to Parsec's >>= operator in Haskell, where you could
768
+ # write:
769
+ #
770
+ # foo >>= bar
771
+ def then(&b)
772
+ Parsby.new "#{label}.then" do |c|
773
+ b.call(parse(c)).parse(c)
774
+ end
775
+ end
776
+
777
+ # <tt>x.that_fails(y)</tt> will try <tt>y</tt>, fail if <tt>y</tt>
778
+ # succeeds, or parse with <tt>x</tt> if <tt>y</tt>
779
+ # fails.
780
+ #
781
+ # Example:
782
+ #
783
+ # decimal.that_fails(string("10")).parse "3"
784
+ # => 3
785
+ # decimal.that_fails(string("10")).parse "10"
786
+ # Parsby::ExpectationFailed: line 1:
787
+ # 10
788
+ # \/ expected: (not "10")
789
+ def that_fails(p)
790
+ Parsby.new "#{label}.that_fails(#{p.label})" do |c|
791
+ orig_pos = c.bio.pos
792
+ begin
793
+ r = p.parse c.bio
794
+ rescue Error
795
+ c.bio.restore_to orig_pos
796
+ parse c.bio
797
+ else
798
+ raise ExpectationFailed.new c
799
+ end
800
+ end
801
+ end
802
+
803
+ alias_method :that_fail, :that_fails
804
+ end