parsby 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +41 -0
- data/README.md +607 -0
- data/Rakefile +6 -0
- data/bin/all-methods +35 -0
- data/bin/console +40 -0
- data/bin/methods-with-pending-documentation +49 -0
- data/bin/setup +8 -0
- data/bin/tested-methods +47 -0
- data/bin/vestigial-methods +30 -0
- data/lib/parsby.rb +804 -0
- data/lib/parsby/combinators.rb +384 -0
- data/lib/parsby/example/arithmetic_parser.rb +96 -0
- data/lib/parsby/example/csv_parser.rb +41 -0
- data/lib/parsby/example/json_parser.rb +92 -0
- data/lib/parsby/example/lisp_parser.rb +135 -0
- data/lib/parsby/version.rb +3 -0
- data/parsby.gemspec +42 -0
- metadata +121 -0
data/Rakefile
ADDED
data/bin/all-methods
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This lists all instance and class methods defined in the project. It's
|
4
|
+
# used along with the script tested-methods in a test in projects_spec.rb
|
5
|
+
# to ensure test-coverage.
|
6
|
+
|
7
|
+
grep -REh '^\s*(class|module|def|define_combinator|end)\b' lib | awk '
|
8
|
+
function get_indent_level() {
|
9
|
+
return match($0, "[^[:blank:]]") - 1
|
10
|
+
}
|
11
|
+
|
12
|
+
/^\s*(module|class)\>/ {
|
13
|
+
match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
|
14
|
+
indent_level = get_indent_level()
|
15
|
+
context[indent_level] = matches[2]
|
16
|
+
for (i in context) {
|
17
|
+
if (i > indent_level) {
|
18
|
+
delete context[i]
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
/^\s*(def|define_combinator)\>/ {
|
24
|
+
indent_level = get_indent_level()
|
25
|
+
first = 1
|
26
|
+
for (i in context) {
|
27
|
+
if (i < indent_level) {
|
28
|
+
printf(first ? "%s" : "::%s", context[i])
|
29
|
+
first = 0
|
30
|
+
}
|
31
|
+
}
|
32
|
+
match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^,([:blank:]]+)/, matches)
|
33
|
+
print (matches[2] == "self." ? "." : "#") matches[3]
|
34
|
+
}
|
35
|
+
' | grep -v '[#.]included$' | sort
|
data/bin/console
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "parsby"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
require "pry"
|
11
|
+
|
12
|
+
def to_constant_representation(path)
|
13
|
+
path
|
14
|
+
.gsub("/", "::")
|
15
|
+
.gsub(/(\A|(?<=::)|_)./) {|x| x[-1].upcase }
|
16
|
+
.sub(/\.rb\z/, "")
|
17
|
+
end
|
18
|
+
|
19
|
+
def reload!
|
20
|
+
Dir["lib/**/*"]
|
21
|
+
.map {|p| Pathname.new(p).relative_path_from(Pathname.new("lib/")) }
|
22
|
+
.select {|p| p.to_s =~ /\.rb\z/ && p.to_s != "parsby/version.rb" }
|
23
|
+
.each do |p|
|
24
|
+
const = to_constant_representation(p.to_s)
|
25
|
+
# Don't bother removing children constants, since we'll remove the
|
26
|
+
# parents.
|
27
|
+
unless const =~ /::/ || !Object.const_defined?(const)
|
28
|
+
Object.send(:remove_const, const)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
.each(&method(:load)) # Load everything only after having removed everything.
|
32
|
+
|
33
|
+
include Parsby::Combinators
|
34
|
+
extend Parsby::Combinators::ModuleMethods
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
|
38
|
+
reload!
|
39
|
+
|
40
|
+
Pry.start
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script lists the methods that aren't immediately preceded by a
|
4
|
+
# comment. The output is empty and the exit status is successful if they
|
5
|
+
# all have a comment.
|
6
|
+
#
|
7
|
+
# This is used in a test in project_spec.rb to ensure documentation
|
8
|
+
# coverage.
|
9
|
+
|
10
|
+
find lib -type f -name \*.rb ! -path lib/parsby/example/\* \
|
11
|
+
| xargs grep -REh '^\s*(#|(class|module|def|define_combinator|end)\b)' \
|
12
|
+
| awk '
|
13
|
+
function get_indent_level() {
|
14
|
+
return match($0, "[^[:blank:]]") - 1
|
15
|
+
}
|
16
|
+
|
17
|
+
/^\s*(module|class)\>/ {
|
18
|
+
match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
|
19
|
+
indent_level = get_indent_level()
|
20
|
+
context[indent_level] = matches[2]
|
21
|
+
for (i in context) {
|
22
|
+
if (i > indent_level) {
|
23
|
+
delete context[i]
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
/^\s*(def|define_combinator)\>/ && !prev_line_is_comment {
|
29
|
+
indent_level = get_indent_level()
|
30
|
+
first = 1
|
31
|
+
for (i in context) {
|
32
|
+
if (i < indent_level) {
|
33
|
+
printf(first ? "%s" : "::%s", context[i])
|
34
|
+
first = 0
|
35
|
+
}
|
36
|
+
}
|
37
|
+
match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^([:blank:]]+)/, matches)
|
38
|
+
print (matches[2] == "self." ? "." : "#") matches[3]
|
39
|
+
at_least_one_missing = 1
|
40
|
+
}
|
41
|
+
|
42
|
+
{
|
43
|
+
prev_line_is_comment = /^\s*#/
|
44
|
+
}
|
45
|
+
|
46
|
+
END {
|
47
|
+
exit at_least_one_missing
|
48
|
+
}
|
49
|
+
' | grep -v '[#.]included$'
|
data/bin/setup
ADDED
data/bin/tested-methods
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This lists all tested instance and class methods defined in the project.
|
4
|
+
# It's used along with the script all-methods in a test in projects_spec.rb
|
5
|
+
# to ensure test-coverage.
|
6
|
+
#
|
7
|
+
# It depends on rspec contexts being done in a specific format, and
|
8
|
+
# properly indented. The format can be exemplified by:
|
9
|
+
#
|
10
|
+
# RSpec.describe Foo do
|
11
|
+
# describe Foo::Bar do
|
12
|
+
# describe "#foo" do
|
13
|
+
# ...
|
14
|
+
# end
|
15
|
+
# describe ".bar" do
|
16
|
+
# ...
|
17
|
+
# end
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
|
21
|
+
grep -REh '^\s*(RSpec\.)?describe\(?\s*([A-Z]|"[#.])' spec | awk '
|
22
|
+
function get_indent_level() {
|
23
|
+
return match($0, "[^[:blank:]]") - 1
|
24
|
+
}
|
25
|
+
|
26
|
+
/^\s*(RSpec\.)?describe\(?\s*[A-Z]/ {
|
27
|
+
match($0, /^\s*(RSpec\.)?describe\(?\s*([A-Za-z0-9:_]+)/, matches)
|
28
|
+
indent_level = get_indent_level()
|
29
|
+
context[indent_level] = matches[2]
|
30
|
+
for (i in context) {
|
31
|
+
if (i > indent_level) {
|
32
|
+
delete context[i]
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
/^\s*(RSpec\.)?describe\(?\s*["'\''][#.]/ {
|
38
|
+
indent_level = get_indent_level()
|
39
|
+
for (i in context) {
|
40
|
+
if (i < indent_level) {
|
41
|
+
ctx = context[i]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
match($0, /^\s*(RSpec\.)?describe\(?\s*(["'\''])([#.].+)["'\'']/, matches)
|
45
|
+
print ctx matches[3]
|
46
|
+
}
|
47
|
+
' | sort
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# Vestigial methods are those that are defined, but not used anywhere else
|
4
|
+
# lib/. This is to help us detect methods that we no longer use to
|
5
|
+
# determine whether to remove them. If a vestigial method is still useful
|
6
|
+
# for users of this library or otherwise, add it to the whitelist below.
|
7
|
+
|
8
|
+
for n in $(grep -Phro '^[^#]*(def (self\.)?|define_combinator :)\K\w+' lib | sort -u); do
|
9
|
+
printf "%s\t%s\n" \
|
10
|
+
"$(grep -Fwrn "$n" lib | grep -Ev '#[^{]|\b(def|define_combinator|require)\b' | wc -l)" \
|
11
|
+
"$n"
|
12
|
+
done | awk '
|
13
|
+
function is_ruby_hook() {
|
14
|
+
return $2 ~ /\<(included|initialize|message|method_missing)\>/
|
15
|
+
}
|
16
|
+
|
17
|
+
function whitelisted() {
|
18
|
+
return is_ruby_hook() \
|
19
|
+
|| $2 == "define_combinator" \
|
20
|
+
|| $2 == "parsby" \
|
21
|
+
|| $2 == "ilit" \
|
22
|
+
|| $2 == "recursive" \
|
23
|
+
|| $2 == "decimal_fraction" \
|
24
|
+
|| $2 == "left_tree_slice" \
|
25
|
+
|| $2 == "all" \
|
26
|
+
;
|
27
|
+
}
|
28
|
+
|
29
|
+
!$1 && !whitelisted() { print $2 }
|
30
|
+
'
|
data/lib/parsby.rb
ADDED
@@ -0,0 +1,804 @@
|
|
1
|
+
require "parsby/version"
|
2
|
+
require "parsby/combinators"
|
3
|
+
|
4
|
+
class Parsby
|
5
|
+
include Combinators
|
6
|
+
|
7
|
+
class Error < StandardError; end
|
8
|
+
|
9
|
+
class PosRange
|
10
|
+
attr_accessor :start, :end
|
11
|
+
|
12
|
+
# PosRanges are constructed with a starting and ending position. We
|
13
|
+
# consider the starting position to be inside the range, and the ending
|
14
|
+
# position to be outside the range. So, if start is 1 and end is 2,
|
15
|
+
# then only position 1 is inside the range. If start is 1 and end is 1,
|
16
|
+
# then there is no position inside the range.
|
17
|
+
def initialize(pos_start, pos_end)
|
18
|
+
@start = pos_start
|
19
|
+
@end = pos_end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Length of range.
|
23
|
+
def length
|
24
|
+
@end - @start
|
25
|
+
end
|
26
|
+
|
27
|
+
# Length of overlap. 0 for non-overlapping ranges.
|
28
|
+
def length_in(range)
|
29
|
+
(self & range)&.length || 0
|
30
|
+
end
|
31
|
+
|
32
|
+
# Intersection of two ranges. Touching ranges result in a range of
|
33
|
+
# length 0.
|
34
|
+
def &(range)
|
35
|
+
return nil unless overlaps?(range) || touching?(range)
|
36
|
+
PosRange.new [@start, range.start].max, [@end, range.end].min
|
37
|
+
end
|
38
|
+
|
39
|
+
# True when the end of one is the beginning of the other.
|
40
|
+
def touching?(range)
|
41
|
+
range.end == self.start || self.end == range.start
|
42
|
+
end
|
43
|
+
|
44
|
+
# True when one is not completely left of or right of the other.
|
45
|
+
# Touching ranges do not overlap, even though they have an intersection
|
46
|
+
# range of length 0.
|
47
|
+
def overlaps?(range)
|
48
|
+
!(completely_left_of?(range) || completely_right_of?(range))
|
49
|
+
end
|
50
|
+
|
51
|
+
def completely_left_of?(range)
|
52
|
+
@end <= range.start
|
53
|
+
end
|
54
|
+
|
55
|
+
def completely_right_of?(range)
|
56
|
+
range.end <= @start
|
57
|
+
end
|
58
|
+
|
59
|
+
def contains?(pos)
|
60
|
+
@start <= pos && pos < @end
|
61
|
+
end
|
62
|
+
|
63
|
+
def starts_inside_of?(range)
|
64
|
+
range.contains? @start
|
65
|
+
end
|
66
|
+
|
67
|
+
def ends_inside_of?(range)
|
68
|
+
range.contains?(@end) || range.end == @end
|
69
|
+
end
|
70
|
+
|
71
|
+
def completely_inside_of?(range)
|
72
|
+
starts_inside_of?(range) && ends_inside_of?(range)
|
73
|
+
end
|
74
|
+
|
75
|
+
def render_in(line_range)
|
76
|
+
return "<-" if completely_left_of?(line_range) && !starts_inside_of?(line_range)
|
77
|
+
return "->" if completely_right_of? line_range
|
78
|
+
indentation = " " * [0, start - line_range.start].max
|
79
|
+
r = "-" * length_in(line_range)
|
80
|
+
r[0] = "\\" if starts_inside_of? line_range
|
81
|
+
r[-1] = "/" if ends_inside_of? line_range
|
82
|
+
r[0] = "|" if length_in(line_range) == 0
|
83
|
+
r[0] = "V" if length_in(line_range) == 1 && completely_inside_of?(line_range)
|
84
|
+
indentation + r
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Splicer
|
89
|
+
def self.start(label = nil, &b)
|
90
|
+
m = new
|
91
|
+
p = b.call m
|
92
|
+
p % label if label
|
93
|
+
m.start p
|
94
|
+
end
|
95
|
+
|
96
|
+
def start(p)
|
97
|
+
Parsby.new("splicer.start(#{p.label})") { |c|
|
98
|
+
begin
|
99
|
+
p.parse c
|
100
|
+
ensure
|
101
|
+
c.parsed_ranges.splice_to! self
|
102
|
+
end
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def end(p)
|
107
|
+
Parsby.new("splicer.end(#{p.label})") { |c|
|
108
|
+
begin
|
109
|
+
p.parse c
|
110
|
+
ensure
|
111
|
+
c.parsed_ranges.children[0].markers << self
|
112
|
+
end
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
module Tree
|
118
|
+
attr_accessor :parent
|
119
|
+
attr_reader :markers
|
120
|
+
attr_writer :children
|
121
|
+
|
122
|
+
def markers
|
123
|
+
@markers ||= []
|
124
|
+
end
|
125
|
+
|
126
|
+
def splice_to!(marker)
|
127
|
+
splice!(*select_paths {|n| n.markers.include? marker })
|
128
|
+
end
|
129
|
+
|
130
|
+
def children
|
131
|
+
@children ||= []
|
132
|
+
end
|
133
|
+
|
134
|
+
def <<(*ts)
|
135
|
+
ts.each do |t|
|
136
|
+
t.parent = self
|
137
|
+
children << t
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def root
|
142
|
+
if parent == nil
|
143
|
+
self
|
144
|
+
else
|
145
|
+
parent.root
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def sibling_reverse_index
|
150
|
+
parent&.children&.reverse&.index self
|
151
|
+
end
|
152
|
+
|
153
|
+
def sibling_index
|
154
|
+
parent&.children&.index self
|
155
|
+
end
|
156
|
+
|
157
|
+
def flatten
|
158
|
+
[self, *children.map(&:flatten).flatten]
|
159
|
+
end
|
160
|
+
|
161
|
+
alias_method :self_and_descendants, :flatten
|
162
|
+
|
163
|
+
def path
|
164
|
+
[*parent&.path, *sibling_index]
|
165
|
+
end
|
166
|
+
|
167
|
+
def each(&b)
|
168
|
+
b.call self
|
169
|
+
children.each {|c| c.each(&b) }
|
170
|
+
self
|
171
|
+
end
|
172
|
+
|
173
|
+
def right_uncles
|
174
|
+
if parent
|
175
|
+
sibling_reverse_index + parent.right_uncles
|
176
|
+
else
|
177
|
+
0
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def right_tree_slice
|
182
|
+
"*" + "|" * right_uncles
|
183
|
+
end
|
184
|
+
|
185
|
+
def dup(currently_descending: false)
|
186
|
+
self_path = path
|
187
|
+
if parent && !currently_descending
|
188
|
+
root.dup.get self_path
|
189
|
+
else
|
190
|
+
super().tap do |d|
|
191
|
+
d.children = d.children.map do |c|
|
192
|
+
c.dup(currently_descending: true).tap do |dc|
|
193
|
+
dc.parent = d
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def splice_self!
|
201
|
+
idx = sibling_index
|
202
|
+
parent.children.delete_at(idx)
|
203
|
+
parent.children.insert(idx, *children.each {|c| c.parent = parent })
|
204
|
+
parent
|
205
|
+
end
|
206
|
+
|
207
|
+
def splice!(*paths)
|
208
|
+
self.children = paths
|
209
|
+
.map {|p| get(p)&.tap {|d| d.parent = self } }
|
210
|
+
.reject(&:nil?)
|
211
|
+
self
|
212
|
+
end
|
213
|
+
|
214
|
+
def splice(*paths)
|
215
|
+
dup.splice!(*paths)
|
216
|
+
end
|
217
|
+
|
218
|
+
def trim_to_just!(*paths, &rejecting)
|
219
|
+
max_sibling = paths.map(&:first).reject(&:nil?).max
|
220
|
+
self.children = if max_sibling.nil?
|
221
|
+
[]
|
222
|
+
else
|
223
|
+
children[0..max_sibling]
|
224
|
+
.map.with_index {|c, i| [c, i] }
|
225
|
+
.reject {|(c, i)| rejecting.call c, i, max_sibling if rejecting }
|
226
|
+
.each do |(child, i)|
|
227
|
+
subpaths = paths
|
228
|
+
.select {|p| p.first == i}
|
229
|
+
.map {|p| p.drop 1 }
|
230
|
+
child.trim_to_just!(*subpaths, &rejecting)
|
231
|
+
end
|
232
|
+
.map(&:first)
|
233
|
+
end
|
234
|
+
self
|
235
|
+
end
|
236
|
+
|
237
|
+
def select(&b)
|
238
|
+
r = []
|
239
|
+
each do |n|
|
240
|
+
if b.call n
|
241
|
+
r << n
|
242
|
+
end
|
243
|
+
end
|
244
|
+
r
|
245
|
+
end
|
246
|
+
|
247
|
+
def select_paths(&b)
|
248
|
+
root_path = path
|
249
|
+
select(&b).map do |n|
|
250
|
+
n.path.drop root_path.length
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def get(path)
|
255
|
+
return self if path.empty?
|
256
|
+
idx, *sub_path = path
|
257
|
+
child = children[idx]
|
258
|
+
child&.get sub_path
|
259
|
+
end
|
260
|
+
|
261
|
+
def self_and_ancestors
|
262
|
+
[self, *parent&.self_and_ancestors]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
class ParsedRange < PosRange
|
267
|
+
attr_reader :label
|
268
|
+
attr_accessor :failed
|
269
|
+
|
270
|
+
include Tree
|
271
|
+
|
272
|
+
# Initialize failure with starting position, ending position, and
|
273
|
+
# label of what was expected.
|
274
|
+
def initialize(pos_start, pos_end, label)
|
275
|
+
@label = label
|
276
|
+
super(pos_start, pos_end)
|
277
|
+
end
|
278
|
+
|
279
|
+
alias_method :underline, :render_in
|
280
|
+
end
|
281
|
+
|
282
|
+
class ExpectationFailed < Error
|
283
|
+
attr_reader :ctx
|
284
|
+
|
285
|
+
# Initializes an ExpectationFailed from a backed_io and an optional
|
286
|
+
# expectation with which to start the list of expectations that lead to
|
287
|
+
# this failure.
|
288
|
+
def initialize(ctx)
|
289
|
+
@ctx = ctx
|
290
|
+
end
|
291
|
+
|
292
|
+
INDENTATION = 2
|
293
|
+
|
294
|
+
def message_hunk(failure_tree)
|
295
|
+
end
|
296
|
+
|
297
|
+
def failure_tree
|
298
|
+
@failure_tree ||= begin
|
299
|
+
other_ranges = ctx.parsed_ranges.flatten.select do |range|
|
300
|
+
range.start == parsed_range.start && range != parsed_range
|
301
|
+
end
|
302
|
+
relevant_paths = [parsed_range, *other_ranges].map(&:path)
|
303
|
+
parsed_range.dup.root.trim_to_just!(*relevant_paths) do |c, i, max_sibling|
|
304
|
+
c.failed && i != max_sibling && c.start != parsed_range.start
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def parsed_range
|
310
|
+
@parsed_range ||= ctx.furthest_parsed_range
|
311
|
+
end
|
312
|
+
|
313
|
+
def hunk_prelude
|
314
|
+
<<~EOF
|
315
|
+
line #{ctx.bio.line_number}:
|
316
|
+
#{" " * INDENTATION}#{ctx.bio.current_line}
|
317
|
+
EOF
|
318
|
+
end
|
319
|
+
|
320
|
+
def hunk_graph
|
321
|
+
line_range = ctx.bio.current_line_range
|
322
|
+
line_length = ctx.bio.current_line.length
|
323
|
+
tree_lines = []
|
324
|
+
max_tree_slice_length = failure_tree.flatten.map {|t| t.right_tree_slice.length }.max
|
325
|
+
prev_slice_length = nil
|
326
|
+
failure_tree.each do |range|
|
327
|
+
line = ""
|
328
|
+
line << " " * INDENTATION
|
329
|
+
line << range.underline(line_range)
|
330
|
+
line << " " * (line_length + INDENTATION - line.length)
|
331
|
+
this_slice_length = range.right_tree_slice.length
|
332
|
+
# If previous slice was a parent with multiple children (current
|
333
|
+
# slice being the first child), we'll want to draw the forking
|
334
|
+
# line.
|
335
|
+
if prev_slice_length && this_slice_length > prev_slice_length
|
336
|
+
# Current line already has the correct width to start drawing the
|
337
|
+
# tree. Copy it and substitute the rendered range with spaces.
|
338
|
+
fork_line = line.gsub(/./, " ")
|
339
|
+
fork_line << " "
|
340
|
+
i = 0
|
341
|
+
fork_line << range.right_tree_slice.rjust(max_tree_slice_length).gsub(/[*|]/) do |c|
|
342
|
+
i += 1
|
343
|
+
if i <= this_slice_length - prev_slice_length
|
344
|
+
"\\"
|
345
|
+
else
|
346
|
+
c
|
347
|
+
end
|
348
|
+
end
|
349
|
+
fork_line << "\n"
|
350
|
+
else
|
351
|
+
fork_line = ""
|
352
|
+
end
|
353
|
+
prev_slice_length = this_slice_length
|
354
|
+
line << " #{range.right_tree_slice.rjust(max_tree_slice_length)}"
|
355
|
+
line << " #{range.failed ? "failure" : "success"}: #{range.label}"
|
356
|
+
line << "\n"
|
357
|
+
tree_lines << fork_line << line
|
358
|
+
end
|
359
|
+
tree_lines.reverse.join
|
360
|
+
end
|
361
|
+
|
362
|
+
def hunk_at(pos)
|
363
|
+
ctx.bio.with_saved_pos do
|
364
|
+
ctx.bio.seek pos
|
365
|
+
hunk_prelude + hunk_graph
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
# The message of the exception. It's the current line, with a kind-of
|
370
|
+
# backtrace showing the failed expectations with a visualization of
|
371
|
+
# their range in the current line.
|
372
|
+
def message
|
373
|
+
hunk_at parsed_range.start
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
class Token
|
378
|
+
attr_reader :name
|
379
|
+
|
380
|
+
# Makes a token with the given name.
|
381
|
+
def initialize(name)
|
382
|
+
@name = name
|
383
|
+
end
|
384
|
+
|
385
|
+
# Renders token name by surrounding it in angle brackets.
|
386
|
+
def to_s
|
387
|
+
"<#{name}>"
|
388
|
+
end
|
389
|
+
|
390
|
+
# Compare tokens
|
391
|
+
def ==(t)
|
392
|
+
t.is_a?(self.class) && t.name == name
|
393
|
+
end
|
394
|
+
|
395
|
+
# Flipped version of Parsby#%, so you can specify the token of a parser
|
396
|
+
# at the beginning of a parser expression.
|
397
|
+
def %(p)
|
398
|
+
p % self
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Backup < StringIO
|
403
|
+
def with_saved_pos(&b)
|
404
|
+
saved = pos
|
405
|
+
b.call saved
|
406
|
+
ensure
|
407
|
+
seek saved
|
408
|
+
end
|
409
|
+
|
410
|
+
def all
|
411
|
+
with_saved_pos do
|
412
|
+
seek 0
|
413
|
+
read
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
alias_method :back_size, :pos
|
418
|
+
|
419
|
+
def back(n = back_size)
|
420
|
+
with_saved_pos do |saved|
|
421
|
+
seek -n, IO::SEEK_CUR
|
422
|
+
read n
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def rest_of_line
|
427
|
+
with_saved_pos { readline }
|
428
|
+
rescue EOFError
|
429
|
+
""
|
430
|
+
end
|
431
|
+
|
432
|
+
def back_lines
|
433
|
+
(back + rest_of_line).lines
|
434
|
+
end
|
435
|
+
|
436
|
+
def col
|
437
|
+
back[/(?<=\A|\n).*\z/].length
|
438
|
+
end
|
439
|
+
|
440
|
+
def current_line
|
441
|
+
with_saved_pos do
|
442
|
+
seek(-col, IO::SEEK_CUR)
|
443
|
+
readline.chomp
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
class BackedIO
|
449
|
+
# Initializes a BackedIO out of the provided IO object or String. The
|
450
|
+
# String will be turned into an IO using StringIO.
|
451
|
+
def initialize(io)
|
452
|
+
io = StringIO.new io if io.is_a? String
|
453
|
+
@io = io
|
454
|
+
@backup = Backup.new
|
455
|
+
end
|
456
|
+
|
457
|
+
# Makes a new BackedIO out of the provided IO, calls the provided
|
458
|
+
# blocked and restores the IO on an exception.
|
459
|
+
def self.for(io, &b)
|
460
|
+
bio = new io
|
461
|
+
begin
|
462
|
+
b.call bio
|
463
|
+
rescue
|
464
|
+
bio.restore
|
465
|
+
raise
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
# Similar to BackedIO.for, but it always restores the IO, even when
|
470
|
+
# there's no exception.
|
471
|
+
def self.peek(io, &b)
|
472
|
+
self.for io do |bio|
|
473
|
+
begin
|
474
|
+
b.call bio
|
475
|
+
ensure
|
476
|
+
bio.restore
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
def with_saved_pos(&b)
|
482
|
+
saved = pos
|
483
|
+
begin
|
484
|
+
b.call saved
|
485
|
+
ensure
|
486
|
+
restore_to saved
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
# Like #read, but without consuming.
|
491
|
+
def peek(*args)
|
492
|
+
with_saved_pos { read(*args) }
|
493
|
+
end
|
494
|
+
|
495
|
+
# Delegates pos to inner io, and works around pipes' inability to
|
496
|
+
# return pos by getting the length of the innermost BackedIO.
|
497
|
+
def pos
|
498
|
+
@io.pos
|
499
|
+
rescue Errno::ESPIPE
|
500
|
+
backup.pos
|
501
|
+
end
|
502
|
+
|
503
|
+
# Returns line number of current line. This is 1-indexed.
|
504
|
+
def line_number
|
505
|
+
lines_read.length
|
506
|
+
end
|
507
|
+
|
508
|
+
def seek(amount, whence = IO::SEEK_SET)
|
509
|
+
if whence == IO::SEEK_END
|
510
|
+
read
|
511
|
+
restore(-amount)
|
512
|
+
return
|
513
|
+
end
|
514
|
+
new_pos = case whence
|
515
|
+
when IO::SEEK_SET
|
516
|
+
amount
|
517
|
+
when IO::SEEK_CUR
|
518
|
+
pos + amount
|
519
|
+
end
|
520
|
+
if new_pos > pos
|
521
|
+
read new_pos - pos
|
522
|
+
else
|
523
|
+
restore_to new_pos
|
524
|
+
end
|
525
|
+
end
|
526
|
+
|
527
|
+
# pos == current_line_pos + col. This is needed to convert a pos to a
|
528
|
+
# col.
|
529
|
+
def current_line_pos
|
530
|
+
pos - col
|
531
|
+
end
|
532
|
+
|
533
|
+
def col
|
534
|
+
backup.col
|
535
|
+
end
|
536
|
+
|
537
|
+
def current_line_range
|
538
|
+
start = current_line_pos
|
539
|
+
PosRange.new start, start + current_line.length
|
540
|
+
end
|
541
|
+
|
542
|
+
def load_rest_of_line
|
543
|
+
with_saved_pos { readline }
|
544
|
+
end
|
545
|
+
|
546
|
+
def lines_read
|
547
|
+
load_rest_of_line
|
548
|
+
backup.back_lines.map(&:chomp)
|
549
|
+
end
|
550
|
+
|
551
|
+
# Returns current line, including what's to come from #read, without
|
552
|
+
# consuming input.
|
553
|
+
def current_line
|
554
|
+
load_rest_of_line
|
555
|
+
backup.current_line
|
556
|
+
end
|
557
|
+
|
558
|
+
# Restore n chars from the backup.
|
559
|
+
def restore(n = backup.back_size)
|
560
|
+
# Handle negatives in consideration of #with_saved_pos.
|
561
|
+
if n < 0
|
562
|
+
read(-n)
|
563
|
+
else
|
564
|
+
backup.back(n).chars.reverse.each {|c| ungetc c}
|
565
|
+
end
|
566
|
+
nil
|
567
|
+
end
|
568
|
+
|
569
|
+
def restore_to(prev_pos)
|
570
|
+
restore(pos - prev_pos)
|
571
|
+
end
|
572
|
+
|
573
|
+
# This is to provide transparent delegation to methods of underlying
|
574
|
+
# IO.
|
575
|
+
def method_missing(m, *args, &b)
|
576
|
+
@io.send(m, *args, &b)
|
577
|
+
end
|
578
|
+
|
579
|
+
def readline(*args)
|
580
|
+
@io.readline(*args).tap {|r| backup.write r unless r.nil? }
|
581
|
+
end
|
582
|
+
|
583
|
+
# Reads from underlying IO and backs it up.
|
584
|
+
def read(*args)
|
585
|
+
@io.read(*args).tap {|r| backup.write r unless r.nil? }
|
586
|
+
end
|
587
|
+
|
588
|
+
# Pass to underlying IO's ungetc and discard a part of the same length
|
589
|
+
# from the backup. As specified with different IO classes, the argument
|
590
|
+
# should be a single character. To restore from the backup, use
|
591
|
+
# #restore.
|
592
|
+
def ungetc(c)
|
593
|
+
# Though c is supposed to be a single character, as specified by the
|
594
|
+
# ungetc of different IO objects, let's not assume that when
|
595
|
+
# adjusting the backup.
|
596
|
+
backup.seek(-c.length, IO::SEEK_CUR)
|
597
|
+
@io.ungetc(c)
|
598
|
+
end
|
599
|
+
|
600
|
+
private
|
601
|
+
|
602
|
+
def backup
|
603
|
+
@backup
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
class Context
|
608
|
+
attr_reader :bio
|
609
|
+
attr_accessor :parsed_ranges
|
610
|
+
|
611
|
+
def initialize(io)
|
612
|
+
@bio = BackedIO.new io
|
613
|
+
@failures = []
|
614
|
+
end
|
615
|
+
|
616
|
+
def furthest_parsed_range
|
617
|
+
parsed_ranges.flatten.max_by(&:start)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
# The parser's label. It's an "unknown" token by default.
|
622
|
+
def label
|
623
|
+
@label || Token.new("unknown")
|
624
|
+
end
|
625
|
+
|
626
|
+
# Assign label to parser. If given a symbol, it'll be turned into a
|
627
|
+
# Parsby::Token.
|
628
|
+
def label=(name)
|
629
|
+
@label = name.is_a?(Symbol) ? Token.new(name) : name
|
630
|
+
end
|
631
|
+
|
632
|
+
# Initialize parser with optional label argument, and parsing block. The
|
633
|
+
# parsing block is given an IO as argument, and its result is the result
|
634
|
+
# when parsing.
|
635
|
+
def initialize(label = nil, &b)
|
636
|
+
self.label = label if label
|
637
|
+
@parser = b
|
638
|
+
end
|
639
|
+
|
640
|
+
# Parse a String or IO object.
|
641
|
+
def parse(src)
|
642
|
+
ctx = src.is_a?(Context) ? src : Context.new(src)
|
643
|
+
parsed_range = ParsedRange.new(ctx.bio.pos, ctx.bio.pos, label)
|
644
|
+
ctx.parsed_ranges << parsed_range if ctx.parsed_ranges
|
645
|
+
parent_parsed_range = ctx.parsed_ranges
|
646
|
+
ctx.parsed_ranges = parsed_range
|
647
|
+
begin
|
648
|
+
r = @parser.call ctx
|
649
|
+
rescue ExpectationFailed => e
|
650
|
+
ctx.parsed_ranges.end = ctx.bio.pos
|
651
|
+
ctx.parsed_ranges.failed = true
|
652
|
+
ctx.bio.restore_to ctx.parsed_ranges.start
|
653
|
+
raise
|
654
|
+
else
|
655
|
+
ctx.parsed_ranges.end = ctx.bio.pos
|
656
|
+
r
|
657
|
+
ensure
|
658
|
+
# Keep the root one for use in ExceptionFailed#message
|
659
|
+
if parent_parsed_range
|
660
|
+
ctx.parsed_ranges = parent_parsed_range
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
# Parses without consuming input.
|
666
|
+
def peek(src)
|
667
|
+
ctx = src.is_a?(Context) ? src : Context.new(src)
|
668
|
+
starting_pos = ctx.bio.pos
|
669
|
+
begin
|
670
|
+
parse ctx
|
671
|
+
ensure
|
672
|
+
ctx.bio.restore_to starting_pos
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
# <tt>x | y</tt> tries y if x fails.
|
677
|
+
def |(p)
|
678
|
+
Parsby.new "(#{self.label} | #{p.label})" do |c|
|
679
|
+
begin
|
680
|
+
parse c
|
681
|
+
rescue Error
|
682
|
+
p.parse c
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
687
|
+
# x < y runs parser x then y and returns x.
|
688
|
+
def <(p)
|
689
|
+
self.then {|r| p.then { pure r } } % "(#{label} < #{p.label})"
|
690
|
+
end
|
691
|
+
|
692
|
+
# x > y runs parser x then y and returns y.
|
693
|
+
def >(p)
|
694
|
+
self.then { p } % "(#{label} > #{p.label})"
|
695
|
+
end
|
696
|
+
|
697
|
+
def ~
|
698
|
+
Parsby.new "(~ #{label})" do |c|
|
699
|
+
begin
|
700
|
+
parse c
|
701
|
+
ensure
|
702
|
+
c.parsed_ranges.children[0].splice_self!
|
703
|
+
if c.parsed_ranges.parent
|
704
|
+
c.parsed_ranges.splice_self!
|
705
|
+
end
|
706
|
+
end
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# p * n, runs parser p n times, grouping results in an array.
|
711
|
+
def *(n)
|
712
|
+
Parsby.new "(#{label} * #{n})" do |c|
|
713
|
+
n.times.map { parse c }
|
714
|
+
end
|
715
|
+
end
|
716
|
+
|
717
|
+
# x + y does + on the results of x and y. This is mostly meant to be used
|
718
|
+
# with arrays, but it would work with numbers and strings too.
|
719
|
+
def +(p)
|
720
|
+
group(self, p)
|
721
|
+
.fmap {|(x, y)| x + y }
|
722
|
+
.tap {|r| r.label = "(#{label} + #{p.label})" }
|
723
|
+
end
|
724
|
+
|
725
|
+
# xs << x appends result of parser x to list result of parser xs.
|
726
|
+
def <<(p)
|
727
|
+
Parsby.new "(#{label} << #{p.label})" do |c|
|
728
|
+
x = parse c
|
729
|
+
y = p.parse c
|
730
|
+
# like x << y, but without modifying x.
|
731
|
+
x + [y]
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
# Set the label and return self.
|
736
|
+
def %(name)
|
737
|
+
self.label = name
|
738
|
+
self
|
739
|
+
end
|
740
|
+
|
741
|
+
# Like map for arrays, this lets you work with the value "inside" the
|
742
|
+
# parser, i.e. the result.
|
743
|
+
#
|
744
|
+
# Example:
|
745
|
+
#
|
746
|
+
# decimal.fmap {|x| x + 1}.parse("2")
|
747
|
+
# => 3
|
748
|
+
def fmap(&b)
|
749
|
+
Parsby.new "#{label}.fmap" do |c|
|
750
|
+
b.call parse c
|
751
|
+
end
|
752
|
+
end
|
753
|
+
|
754
|
+
# Pass result of self parser to block to construct the next parser.
|
755
|
+
#
|
756
|
+
# For example, instead of writing:
|
757
|
+
#
|
758
|
+
# Parsby.new do |c|
|
759
|
+
# x = foo.parse c
|
760
|
+
# bar(x).parse c
|
761
|
+
# end
|
762
|
+
#
|
763
|
+
# you can write:
|
764
|
+
#
|
765
|
+
# foo.then {|x| bar x }
|
766
|
+
#
|
767
|
+
# This is analogous to Parsec's >>= operator in Haskell, where you could
|
768
|
+
# write:
|
769
|
+
#
|
770
|
+
# foo >>= bar
|
771
|
+
def then(&b)
|
772
|
+
Parsby.new "#{label}.then" do |c|
|
773
|
+
b.call(parse(c)).parse(c)
|
774
|
+
end
|
775
|
+
end
|
776
|
+
|
777
|
+
# <tt>x.that_fails(y)</tt> will try <tt>y</tt>, fail if <tt>y</tt>
|
778
|
+
# succeeds, or parse with <tt>x</tt> if <tt>y</tt>
|
779
|
+
# fails.
|
780
|
+
#
|
781
|
+
# Example:
|
782
|
+
#
|
783
|
+
# decimal.that_fails(string("10")).parse "3"
|
784
|
+
# => 3
|
785
|
+
# decimal.that_fails(string("10")).parse "10"
|
786
|
+
# Parsby::ExpectationFailed: line 1:
|
787
|
+
# 10
|
788
|
+
# \/ expected: (not "10")
|
789
|
+
def that_fails(p)
|
790
|
+
Parsby.new "#{label}.that_fails(#{p.label})" do |c|
|
791
|
+
orig_pos = c.bio.pos
|
792
|
+
begin
|
793
|
+
r = p.parse c.bio
|
794
|
+
rescue Error
|
795
|
+
c.bio.restore_to orig_pos
|
796
|
+
parse c.bio
|
797
|
+
else
|
798
|
+
raise ExpectationFailed.new c
|
799
|
+
end
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
803
|
+
alias_method :that_fail, :that_fails
|
804
|
+
end
|