parsby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +41 -0
- data/README.md +607 -0
- data/Rakefile +6 -0
- data/bin/all-methods +35 -0
- data/bin/console +40 -0
- data/bin/methods-with-pending-documentation +49 -0
- data/bin/setup +8 -0
- data/bin/tested-methods +47 -0
- data/bin/vestigial-methods +30 -0
- data/lib/parsby.rb +804 -0
- data/lib/parsby/combinators.rb +384 -0
- data/lib/parsby/example/arithmetic_parser.rb +96 -0
- data/lib/parsby/example/csv_parser.rb +41 -0
- data/lib/parsby/example/json_parser.rb +92 -0
- data/lib/parsby/example/lisp_parser.rb +135 -0
- data/lib/parsby/version.rb +3 -0
- data/parsby.gemspec +42 -0
- metadata +121 -0
data/Rakefile
ADDED
data/bin/all-methods
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This lists all instance and class methods defined in the project. It's
|
4
|
+
# used along with the script tested-methods in a test in projects_spec.rb
|
5
|
+
# to ensure test-coverage.
|
6
|
+
|
7
|
+
grep -REh '^\s*(class|module|def|define_combinator|end)\b' lib | awk '
|
8
|
+
function get_indent_level() {
|
9
|
+
return match($0, "[^[:blank:]]") - 1
|
10
|
+
}
|
11
|
+
|
12
|
+
/^\s*(module|class)\>/ {
|
13
|
+
match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
|
14
|
+
indent_level = get_indent_level()
|
15
|
+
context[indent_level] = matches[2]
|
16
|
+
for (i in context) {
|
17
|
+
if (i > indent_level) {
|
18
|
+
delete context[i]
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
/^\s*(def|define_combinator)\>/ {
|
24
|
+
indent_level = get_indent_level()
|
25
|
+
first = 1
|
26
|
+
for (i in context) {
|
27
|
+
if (i < indent_level) {
|
28
|
+
printf(first ? "%s" : "::%s", context[i])
|
29
|
+
first = 0
|
30
|
+
}
|
31
|
+
}
|
32
|
+
match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^,([:blank:]]+)/, matches)
|
33
|
+
print (matches[2] == "self." ? "." : "#") matches[3]
|
34
|
+
}
|
35
|
+
' | grep -v '[#.]included$' | sort
|
data/bin/console
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "parsby"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
require "pry"
|
11
|
+
|
12
|
+
def to_constant_representation(path)
|
13
|
+
path
|
14
|
+
.gsub("/", "::")
|
15
|
+
.gsub(/(\A|(?<=::)|_)./) {|x| x[-1].upcase }
|
16
|
+
.sub(/\.rb\z/, "")
|
17
|
+
end
|
18
|
+
|
19
|
+
def reload!
|
20
|
+
Dir["lib/**/*"]
|
21
|
+
.map {|p| Pathname.new(p).relative_path_from(Pathname.new("lib/")) }
|
22
|
+
.select {|p| p.to_s =~ /\.rb\z/ && p.to_s != "parsby/version.rb" }
|
23
|
+
.each do |p|
|
24
|
+
const = to_constant_representation(p.to_s)
|
25
|
+
# Don't bother removing children constants, since we'll remove the
|
26
|
+
# parents.
|
27
|
+
unless const =~ /::/ || !Object.const_defined?(const)
|
28
|
+
Object.send(:remove_const, const)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
.each(&method(:load)) # Load everything only after having removed everything.
|
32
|
+
|
33
|
+
include Parsby::Combinators
|
34
|
+
extend Parsby::Combinators::ModuleMethods
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
|
38
|
+
reload!
|
39
|
+
|
40
|
+
Pry.start
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script lists the methods that aren't immediately preceded by a
|
4
|
+
# comment. The output is empty and the exit status is successful if they
|
5
|
+
# all have a comment.
|
6
|
+
#
|
7
|
+
# This is used in a test in project_spec.rb to ensure documentation
|
8
|
+
# coverage.
|
9
|
+
|
10
|
+
find lib -type f -name \*.rb ! -path lib/parsby/example/\* \
|
11
|
+
| xargs grep -REh '^\s*(#|(class|module|def|define_combinator|end)\b)' \
|
12
|
+
| awk '
|
13
|
+
function get_indent_level() {
|
14
|
+
return match($0, "[^[:blank:]]") - 1
|
15
|
+
}
|
16
|
+
|
17
|
+
/^\s*(module|class)\>/ {
|
18
|
+
match($0, /^\s*(class|module)\s*([^[:blank:]]+)/, matches)
|
19
|
+
indent_level = get_indent_level()
|
20
|
+
context[indent_level] = matches[2]
|
21
|
+
for (i in context) {
|
22
|
+
if (i > indent_level) {
|
23
|
+
delete context[i]
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
/^\s*(def|define_combinator)\>/ && !prev_line_is_comment {
|
29
|
+
indent_level = get_indent_level()
|
30
|
+
first = 1
|
31
|
+
for (i in context) {
|
32
|
+
if (i < indent_level) {
|
33
|
+
printf(first ? "%s" : "::%s", context[i])
|
34
|
+
first = 0
|
35
|
+
}
|
36
|
+
}
|
37
|
+
match($0, /^\s*(def|define_combinator)\s*(self\.|:)?([^([:blank:]]+)/, matches)
|
38
|
+
print (matches[2] == "self." ? "." : "#") matches[3]
|
39
|
+
at_least_one_missing = 1
|
40
|
+
}
|
41
|
+
|
42
|
+
{
|
43
|
+
prev_line_is_comment = /^\s*#/
|
44
|
+
}
|
45
|
+
|
46
|
+
END {
|
47
|
+
exit at_least_one_missing
|
48
|
+
}
|
49
|
+
' | grep -v '[#.]included$'
|
data/bin/setup
ADDED
data/bin/tested-methods
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This lists all tested instance and class methods defined in the project.
|
4
|
+
# It's used along with the script all-methods in a test in projects_spec.rb
|
5
|
+
# to ensure test-coverage.
|
6
|
+
#
|
7
|
+
# It depends on rspec contexts being done in a specific format, and
|
8
|
+
# properly indented. The format can be exemplified by:
|
9
|
+
#
|
10
|
+
# RSpec.describe Foo do
|
11
|
+
# describe Foo::Bar do
|
12
|
+
# describe "#foo" do
|
13
|
+
# ...
|
14
|
+
# end
|
15
|
+
# describe ".bar" do
|
16
|
+
# ...
|
17
|
+
# end
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
|
21
|
+
grep -REh '^\s*(RSpec\.)?describe\(?\s*([A-Z]|"[#.])' spec | awk '
|
22
|
+
function get_indent_level() {
|
23
|
+
return match($0, "[^[:blank:]]") - 1
|
24
|
+
}
|
25
|
+
|
26
|
+
/^\s*(RSpec\.)?describe\(?\s*[A-Z]/ {
|
27
|
+
match($0, /^\s*(RSpec\.)?describe\(?\s*([A-Za-z0-9:_]+)/, matches)
|
28
|
+
indent_level = get_indent_level()
|
29
|
+
context[indent_level] = matches[2]
|
30
|
+
for (i in context) {
|
31
|
+
if (i > indent_level) {
|
32
|
+
delete context[i]
|
33
|
+
}
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
/^\s*(RSpec\.)?describe\(?\s*["'\''][#.]/ {
|
38
|
+
indent_level = get_indent_level()
|
39
|
+
for (i in context) {
|
40
|
+
if (i < indent_level) {
|
41
|
+
ctx = context[i]
|
42
|
+
}
|
43
|
+
}
|
44
|
+
match($0, /^\s*(RSpec\.)?describe\(?\s*(["'\''])([#.].+)["'\'']/, matches)
|
45
|
+
print ctx matches[3]
|
46
|
+
}
|
47
|
+
' | sort
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# Vestigial methods are those that are defined, but not used anywhere else
|
4
|
+
# lib/. This is to help us detect methods that we no longer use to
|
5
|
+
# determine whether to remove them. If a vestigial method is still useful
|
6
|
+
# for users of this library or otherwise, add it to the whitelist below.
|
7
|
+
|
8
|
+
for n in $(grep -Phro '^[^#]*(def (self\.)?|define_combinator :)\K\w+' lib | sort -u); do
|
9
|
+
printf "%s\t%s\n" \
|
10
|
+
"$(grep -Fwrn "$n" lib | grep -Ev '#[^{]|\b(def|define_combinator|require)\b' | wc -l)" \
|
11
|
+
"$n"
|
12
|
+
done | awk '
|
13
|
+
function is_ruby_hook() {
|
14
|
+
return $2 ~ /\<(included|initialize|message|method_missing)\>/
|
15
|
+
}
|
16
|
+
|
17
|
+
function whitelisted() {
|
18
|
+
return is_ruby_hook() \
|
19
|
+
|| $2 == "define_combinator" \
|
20
|
+
|| $2 == "parsby" \
|
21
|
+
|| $2 == "ilit" \
|
22
|
+
|| $2 == "recursive" \
|
23
|
+
|| $2 == "decimal_fraction" \
|
24
|
+
|| $2 == "left_tree_slice" \
|
25
|
+
|| $2 == "all" \
|
26
|
+
;
|
27
|
+
}
|
28
|
+
|
29
|
+
!$1 && !whitelisted() { print $2 }
|
30
|
+
'
|
data/lib/parsby.rb
ADDED
@@ -0,0 +1,804 @@
|
|
1
|
+
require "parsby/version"
|
2
|
+
require "parsby/combinators"
|
3
|
+
|
4
|
+
class Parsby
|
5
|
+
include Combinators
|
6
|
+
|
7
|
+
class Error < StandardError; end
|
8
|
+
|
9
|
+
class PosRange
|
10
|
+
attr_accessor :start, :end
|
11
|
+
|
12
|
+
# PosRanges are constructed with a starting and ending position. We
|
13
|
+
# consider the starting position to be inside the range, and the ending
|
14
|
+
# position to be outside the range. So, if start is 1 and end is 2,
|
15
|
+
# then only position 1 is inside the range. If start is 1 and end is 1,
|
16
|
+
# then there is no position inside the range.
|
17
|
+
def initialize(pos_start, pos_end)
|
18
|
+
@start = pos_start
|
19
|
+
@end = pos_end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Length of range.
|
23
|
+
def length
|
24
|
+
@end - @start
|
25
|
+
end
|
26
|
+
|
27
|
+
# Length of overlap. 0 for non-overlapping ranges.
|
28
|
+
def length_in(range)
|
29
|
+
(self & range)&.length || 0
|
30
|
+
end
|
31
|
+
|
32
|
+
# Intersection of two ranges. Touching ranges result in a range of
|
33
|
+
# length 0.
|
34
|
+
def &(range)
|
35
|
+
return nil unless overlaps?(range) || touching?(range)
|
36
|
+
PosRange.new [@start, range.start].max, [@end, range.end].min
|
37
|
+
end
|
38
|
+
|
39
|
+
# True when the end of one is the beginning of the other.
|
40
|
+
def touching?(range)
|
41
|
+
range.end == self.start || self.end == range.start
|
42
|
+
end
|
43
|
+
|
44
|
+
# True when one is not completely left of or right of the other.
|
45
|
+
# Touching ranges do not overlap, even though they have an intersection
|
46
|
+
# range of length 0.
|
47
|
+
def overlaps?(range)
|
48
|
+
!(completely_left_of?(range) || completely_right_of?(range))
|
49
|
+
end
|
50
|
+
|
51
|
+
def completely_left_of?(range)
|
52
|
+
@end <= range.start
|
53
|
+
end
|
54
|
+
|
55
|
+
def completely_right_of?(range)
|
56
|
+
range.end <= @start
|
57
|
+
end
|
58
|
+
|
59
|
+
def contains?(pos)
|
60
|
+
@start <= pos && pos < @end
|
61
|
+
end
|
62
|
+
|
63
|
+
def starts_inside_of?(range)
|
64
|
+
range.contains? @start
|
65
|
+
end
|
66
|
+
|
67
|
+
def ends_inside_of?(range)
|
68
|
+
range.contains?(@end) || range.end == @end
|
69
|
+
end
|
70
|
+
|
71
|
+
def completely_inside_of?(range)
|
72
|
+
starts_inside_of?(range) && ends_inside_of?(range)
|
73
|
+
end
|
74
|
+
|
75
|
+
def render_in(line_range)
|
76
|
+
return "<-" if completely_left_of?(line_range) && !starts_inside_of?(line_range)
|
77
|
+
return "->" if completely_right_of? line_range
|
78
|
+
indentation = " " * [0, start - line_range.start].max
|
79
|
+
r = "-" * length_in(line_range)
|
80
|
+
r[0] = "\\" if starts_inside_of? line_range
|
81
|
+
r[-1] = "/" if ends_inside_of? line_range
|
82
|
+
r[0] = "|" if length_in(line_range) == 0
|
83
|
+
r[0] = "V" if length_in(line_range) == 1 && completely_inside_of?(line_range)
|
84
|
+
indentation + r
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Splicer
|
89
|
+
def self.start(label = nil, &b)
|
90
|
+
m = new
|
91
|
+
p = b.call m
|
92
|
+
p % label if label
|
93
|
+
m.start p
|
94
|
+
end
|
95
|
+
|
96
|
+
def start(p)
|
97
|
+
Parsby.new("splicer.start(#{p.label})") { |c|
|
98
|
+
begin
|
99
|
+
p.parse c
|
100
|
+
ensure
|
101
|
+
c.parsed_ranges.splice_to! self
|
102
|
+
end
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def end(p)
|
107
|
+
Parsby.new("splicer.end(#{p.label})") { |c|
|
108
|
+
begin
|
109
|
+
p.parse c
|
110
|
+
ensure
|
111
|
+
c.parsed_ranges.children[0].markers << self
|
112
|
+
end
|
113
|
+
}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
module Tree
|
118
|
+
attr_accessor :parent
|
119
|
+
attr_reader :markers
|
120
|
+
attr_writer :children
|
121
|
+
|
122
|
+
def markers
|
123
|
+
@markers ||= []
|
124
|
+
end
|
125
|
+
|
126
|
+
def splice_to!(marker)
|
127
|
+
splice!(*select_paths {|n| n.markers.include? marker })
|
128
|
+
end
|
129
|
+
|
130
|
+
def children
|
131
|
+
@children ||= []
|
132
|
+
end
|
133
|
+
|
134
|
+
def <<(*ts)
|
135
|
+
ts.each do |t|
|
136
|
+
t.parent = self
|
137
|
+
children << t
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def root
|
142
|
+
if parent == nil
|
143
|
+
self
|
144
|
+
else
|
145
|
+
parent.root
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def sibling_reverse_index
|
150
|
+
parent&.children&.reverse&.index self
|
151
|
+
end
|
152
|
+
|
153
|
+
def sibling_index
|
154
|
+
parent&.children&.index self
|
155
|
+
end
|
156
|
+
|
157
|
+
def flatten
|
158
|
+
[self, *children.map(&:flatten).flatten]
|
159
|
+
end
|
160
|
+
|
161
|
+
alias_method :self_and_descendants, :flatten
|
162
|
+
|
163
|
+
def path
|
164
|
+
[*parent&.path, *sibling_index]
|
165
|
+
end
|
166
|
+
|
167
|
+
def each(&b)
|
168
|
+
b.call self
|
169
|
+
children.each {|c| c.each(&b) }
|
170
|
+
self
|
171
|
+
end
|
172
|
+
|
173
|
+
def right_uncles
|
174
|
+
if parent
|
175
|
+
sibling_reverse_index + parent.right_uncles
|
176
|
+
else
|
177
|
+
0
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def right_tree_slice
|
182
|
+
"*" + "|" * right_uncles
|
183
|
+
end
|
184
|
+
|
185
|
+
def dup(currently_descending: false)
|
186
|
+
self_path = path
|
187
|
+
if parent && !currently_descending
|
188
|
+
root.dup.get self_path
|
189
|
+
else
|
190
|
+
super().tap do |d|
|
191
|
+
d.children = d.children.map do |c|
|
192
|
+
c.dup(currently_descending: true).tap do |dc|
|
193
|
+
dc.parent = d
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def splice_self!
|
201
|
+
idx = sibling_index
|
202
|
+
parent.children.delete_at(idx)
|
203
|
+
parent.children.insert(idx, *children.each {|c| c.parent = parent })
|
204
|
+
parent
|
205
|
+
end
|
206
|
+
|
207
|
+
def splice!(*paths)
|
208
|
+
self.children = paths
|
209
|
+
.map {|p| get(p)&.tap {|d| d.parent = self } }
|
210
|
+
.reject(&:nil?)
|
211
|
+
self
|
212
|
+
end
|
213
|
+
|
214
|
+
def splice(*paths)
|
215
|
+
dup.splice!(*paths)
|
216
|
+
end
|
217
|
+
|
218
|
+
def trim_to_just!(*paths, &rejecting)
|
219
|
+
max_sibling = paths.map(&:first).reject(&:nil?).max
|
220
|
+
self.children = if max_sibling.nil?
|
221
|
+
[]
|
222
|
+
else
|
223
|
+
children[0..max_sibling]
|
224
|
+
.map.with_index {|c, i| [c, i] }
|
225
|
+
.reject {|(c, i)| rejecting.call c, i, max_sibling if rejecting }
|
226
|
+
.each do |(child, i)|
|
227
|
+
subpaths = paths
|
228
|
+
.select {|p| p.first == i}
|
229
|
+
.map {|p| p.drop 1 }
|
230
|
+
child.trim_to_just!(*subpaths, &rejecting)
|
231
|
+
end
|
232
|
+
.map(&:first)
|
233
|
+
end
|
234
|
+
self
|
235
|
+
end
|
236
|
+
|
237
|
+
def select(&b)
|
238
|
+
r = []
|
239
|
+
each do |n|
|
240
|
+
if b.call n
|
241
|
+
r << n
|
242
|
+
end
|
243
|
+
end
|
244
|
+
r
|
245
|
+
end
|
246
|
+
|
247
|
+
def select_paths(&b)
|
248
|
+
root_path = path
|
249
|
+
select(&b).map do |n|
|
250
|
+
n.path.drop root_path.length
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def get(path)
|
255
|
+
return self if path.empty?
|
256
|
+
idx, *sub_path = path
|
257
|
+
child = children[idx]
|
258
|
+
child&.get sub_path
|
259
|
+
end
|
260
|
+
|
261
|
+
def self_and_ancestors
|
262
|
+
[self, *parent&.self_and_ancestors]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
class ParsedRange < PosRange
|
267
|
+
attr_reader :label
|
268
|
+
attr_accessor :failed
|
269
|
+
|
270
|
+
include Tree
|
271
|
+
|
272
|
+
# Initialize failure with starting position, ending position, and
|
273
|
+
# label of what was expected.
|
274
|
+
def initialize(pos_start, pos_end, label)
|
275
|
+
@label = label
|
276
|
+
super(pos_start, pos_end)
|
277
|
+
end
|
278
|
+
|
279
|
+
alias_method :underline, :render_in
|
280
|
+
end
|
281
|
+
|
282
|
+
class ExpectationFailed < Error
|
283
|
+
attr_reader :ctx
|
284
|
+
|
285
|
+
# Initializes an ExpectationFailed from a backed_io and an optional
|
286
|
+
# expectation with which to start the list of expectations that lead to
|
287
|
+
# this failure.
|
288
|
+
def initialize(ctx)
|
289
|
+
@ctx = ctx
|
290
|
+
end
|
291
|
+
|
292
|
+
INDENTATION = 2
|
293
|
+
|
294
|
+
def message_hunk(failure_tree)
|
295
|
+
end
|
296
|
+
|
297
|
+
def failure_tree
|
298
|
+
@failure_tree ||= begin
|
299
|
+
other_ranges = ctx.parsed_ranges.flatten.select do |range|
|
300
|
+
range.start == parsed_range.start && range != parsed_range
|
301
|
+
end
|
302
|
+
relevant_paths = [parsed_range, *other_ranges].map(&:path)
|
303
|
+
parsed_range.dup.root.trim_to_just!(*relevant_paths) do |c, i, max_sibling|
|
304
|
+
c.failed && i != max_sibling && c.start != parsed_range.start
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def parsed_range
|
310
|
+
@parsed_range ||= ctx.furthest_parsed_range
|
311
|
+
end
|
312
|
+
|
313
|
+
def hunk_prelude
|
314
|
+
<<~EOF
|
315
|
+
line #{ctx.bio.line_number}:
|
316
|
+
#{" " * INDENTATION}#{ctx.bio.current_line}
|
317
|
+
EOF
|
318
|
+
end
|
319
|
+
|
320
|
+
def hunk_graph
|
321
|
+
line_range = ctx.bio.current_line_range
|
322
|
+
line_length = ctx.bio.current_line.length
|
323
|
+
tree_lines = []
|
324
|
+
max_tree_slice_length = failure_tree.flatten.map {|t| t.right_tree_slice.length }.max
|
325
|
+
prev_slice_length = nil
|
326
|
+
failure_tree.each do |range|
|
327
|
+
line = ""
|
328
|
+
line << " " * INDENTATION
|
329
|
+
line << range.underline(line_range)
|
330
|
+
line << " " * (line_length + INDENTATION - line.length)
|
331
|
+
this_slice_length = range.right_tree_slice.length
|
332
|
+
# If previous slice was a parent with multiple children (current
|
333
|
+
# slice being the first child), we'll want to draw the forking
|
334
|
+
# line.
|
335
|
+
if prev_slice_length && this_slice_length > prev_slice_length
|
336
|
+
# Current line already has the correct width to start drawing the
|
337
|
+
# tree. Copy it and substitute the rendered range with spaces.
|
338
|
+
fork_line = line.gsub(/./, " ")
|
339
|
+
fork_line << " "
|
340
|
+
i = 0
|
341
|
+
fork_line << range.right_tree_slice.rjust(max_tree_slice_length).gsub(/[*|]/) do |c|
|
342
|
+
i += 1
|
343
|
+
if i <= this_slice_length - prev_slice_length
|
344
|
+
"\\"
|
345
|
+
else
|
346
|
+
c
|
347
|
+
end
|
348
|
+
end
|
349
|
+
fork_line << "\n"
|
350
|
+
else
|
351
|
+
fork_line = ""
|
352
|
+
end
|
353
|
+
prev_slice_length = this_slice_length
|
354
|
+
line << " #{range.right_tree_slice.rjust(max_tree_slice_length)}"
|
355
|
+
line << " #{range.failed ? "failure" : "success"}: #{range.label}"
|
356
|
+
line << "\n"
|
357
|
+
tree_lines << fork_line << line
|
358
|
+
end
|
359
|
+
tree_lines.reverse.join
|
360
|
+
end
|
361
|
+
|
362
|
+
def hunk_at(pos)
|
363
|
+
ctx.bio.with_saved_pos do
|
364
|
+
ctx.bio.seek pos
|
365
|
+
hunk_prelude + hunk_graph
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
# The message of the exception. It's the current line, with a kind-of
|
370
|
+
# backtrace showing the failed expectations with a visualization of
|
371
|
+
# their range in the current line.
|
372
|
+
def message
|
373
|
+
hunk_at parsed_range.start
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
class Token
|
378
|
+
attr_reader :name
|
379
|
+
|
380
|
+
# Makes a token with the given name.
|
381
|
+
def initialize(name)
|
382
|
+
@name = name
|
383
|
+
end
|
384
|
+
|
385
|
+
# Renders token name by surrounding it in angle brackets.
|
386
|
+
def to_s
|
387
|
+
"<#{name}>"
|
388
|
+
end
|
389
|
+
|
390
|
+
# Compare tokens
|
391
|
+
def ==(t)
|
392
|
+
t.is_a?(self.class) && t.name == name
|
393
|
+
end
|
394
|
+
|
395
|
+
# Flipped version of Parsby#%, so you can specify the token of a parser
|
396
|
+
# at the beginning of a parser expression.
|
397
|
+
def %(p)
|
398
|
+
p % self
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class Backup < StringIO
|
403
|
+
def with_saved_pos(&b)
|
404
|
+
saved = pos
|
405
|
+
b.call saved
|
406
|
+
ensure
|
407
|
+
seek saved
|
408
|
+
end
|
409
|
+
|
410
|
+
def all
|
411
|
+
with_saved_pos do
|
412
|
+
seek 0
|
413
|
+
read
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
alias_method :back_size, :pos
|
418
|
+
|
419
|
+
def back(n = back_size)
|
420
|
+
with_saved_pos do |saved|
|
421
|
+
seek -n, IO::SEEK_CUR
|
422
|
+
read n
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def rest_of_line
|
427
|
+
with_saved_pos { readline }
|
428
|
+
rescue EOFError
|
429
|
+
""
|
430
|
+
end
|
431
|
+
|
432
|
+
def back_lines
|
433
|
+
(back + rest_of_line).lines
|
434
|
+
end
|
435
|
+
|
436
|
+
def col
|
437
|
+
back[/(?<=\A|\n).*\z/].length
|
438
|
+
end
|
439
|
+
|
440
|
+
def current_line
|
441
|
+
with_saved_pos do
|
442
|
+
seek(-col, IO::SEEK_CUR)
|
443
|
+
readline.chomp
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
class BackedIO
|
449
|
+
# Initializes a BackedIO out of the provided IO object or String. The
|
450
|
+
# String will be turned into an IO using StringIO.
|
451
|
+
def initialize(io)
|
452
|
+
io = StringIO.new io if io.is_a? String
|
453
|
+
@io = io
|
454
|
+
@backup = Backup.new
|
455
|
+
end
|
456
|
+
|
457
|
+
# Makes a new BackedIO out of the provided IO, calls the provided
|
458
|
+
# blocked and restores the IO on an exception.
|
459
|
+
def self.for(io, &b)
|
460
|
+
bio = new io
|
461
|
+
begin
|
462
|
+
b.call bio
|
463
|
+
rescue
|
464
|
+
bio.restore
|
465
|
+
raise
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
# Similar to BackedIO.for, but it always restores the IO, even when
|
470
|
+
# there's no exception.
|
471
|
+
def self.peek(io, &b)
|
472
|
+
self.for io do |bio|
|
473
|
+
begin
|
474
|
+
b.call bio
|
475
|
+
ensure
|
476
|
+
bio.restore
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
480
|
+
|
481
|
+
def with_saved_pos(&b)
|
482
|
+
saved = pos
|
483
|
+
begin
|
484
|
+
b.call saved
|
485
|
+
ensure
|
486
|
+
restore_to saved
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
# Like #read, but without consuming.
|
491
|
+
def peek(*args)
|
492
|
+
with_saved_pos { read(*args) }
|
493
|
+
end
|
494
|
+
|
495
|
+
# Delegates pos to inner io, and works around pipes' inability to
|
496
|
+
# return pos by getting the length of the innermost BackedIO.
|
497
|
+
def pos
|
498
|
+
@io.pos
|
499
|
+
rescue Errno::ESPIPE
|
500
|
+
backup.pos
|
501
|
+
end
|
502
|
+
|
503
|
+
# Returns line number of current line. This is 1-indexed.
|
504
|
+
def line_number
|
505
|
+
lines_read.length
|
506
|
+
end
|
507
|
+
|
508
|
+
def seek(amount, whence = IO::SEEK_SET)
|
509
|
+
if whence == IO::SEEK_END
|
510
|
+
read
|
511
|
+
restore(-amount)
|
512
|
+
return
|
513
|
+
end
|
514
|
+
new_pos = case whence
|
515
|
+
when IO::SEEK_SET
|
516
|
+
amount
|
517
|
+
when IO::SEEK_CUR
|
518
|
+
pos + amount
|
519
|
+
end
|
520
|
+
if new_pos > pos
|
521
|
+
read new_pos - pos
|
522
|
+
else
|
523
|
+
restore_to new_pos
|
524
|
+
end
|
525
|
+
end
|
526
|
+
|
527
|
+
# pos == current_line_pos + col. This is needed to convert a pos to a
|
528
|
+
# col.
|
529
|
+
def current_line_pos
|
530
|
+
pos - col
|
531
|
+
end
|
532
|
+
|
533
|
+
def col
|
534
|
+
backup.col
|
535
|
+
end
|
536
|
+
|
537
|
+
def current_line_range
|
538
|
+
start = current_line_pos
|
539
|
+
PosRange.new start, start + current_line.length
|
540
|
+
end
|
541
|
+
|
542
|
+
def load_rest_of_line
|
543
|
+
with_saved_pos { readline }
|
544
|
+
end
|
545
|
+
|
546
|
+
def lines_read
|
547
|
+
load_rest_of_line
|
548
|
+
backup.back_lines.map(&:chomp)
|
549
|
+
end
|
550
|
+
|
551
|
+
# Returns current line, including what's to come from #read, without
|
552
|
+
# consuming input.
|
553
|
+
def current_line
|
554
|
+
load_rest_of_line
|
555
|
+
backup.current_line
|
556
|
+
end
|
557
|
+
|
558
|
+
# Restore n chars from the backup.
|
559
|
+
def restore(n = backup.back_size)
|
560
|
+
# Handle negatives in consideration of #with_saved_pos.
|
561
|
+
if n < 0
|
562
|
+
read(-n)
|
563
|
+
else
|
564
|
+
backup.back(n).chars.reverse.each {|c| ungetc c}
|
565
|
+
end
|
566
|
+
nil
|
567
|
+
end
|
568
|
+
|
569
|
+
def restore_to(prev_pos)
|
570
|
+
restore(pos - prev_pos)
|
571
|
+
end
|
572
|
+
|
573
|
+
# This is to provide transparent delegation to methods of underlying
|
574
|
+
# IO.
|
575
|
+
def method_missing(m, *args, &b)
|
576
|
+
@io.send(m, *args, &b)
|
577
|
+
end
|
578
|
+
|
579
|
+
def readline(*args)
|
580
|
+
@io.readline(*args).tap {|r| backup.write r unless r.nil? }
|
581
|
+
end
|
582
|
+
|
583
|
+
# Reads from underlying IO and backs it up.
|
584
|
+
def read(*args)
|
585
|
+
@io.read(*args).tap {|r| backup.write r unless r.nil? }
|
586
|
+
end
|
587
|
+
|
588
|
+
# Pass to underlying IO's ungetc and discard a part of the same length
|
589
|
+
# from the backup. As specified with different IO classes, the argument
|
590
|
+
# should be a single character. To restore from the backup, use
|
591
|
+
# #restore.
|
592
|
+
def ungetc(c)
|
593
|
+
# Though c is supposed to be a single character, as specified by the
|
594
|
+
# ungetc of different IO objects, let's not assume that when
|
595
|
+
# adjusting the backup.
|
596
|
+
backup.seek(-c.length, IO::SEEK_CUR)
|
597
|
+
@io.ungetc(c)
|
598
|
+
end
|
599
|
+
|
600
|
+
private
|
601
|
+
|
602
|
+
def backup
|
603
|
+
@backup
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
class Context
|
608
|
+
attr_reader :bio
|
609
|
+
attr_accessor :parsed_ranges
|
610
|
+
|
611
|
+
def initialize(io)
|
612
|
+
@bio = BackedIO.new io
|
613
|
+
@failures = []
|
614
|
+
end
|
615
|
+
|
616
|
+
def furthest_parsed_range
|
617
|
+
parsed_ranges.flatten.max_by(&:start)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
# The parser's label. It's an "unknown" token by default.
|
622
|
+
def label
|
623
|
+
@label || Token.new("unknown")
|
624
|
+
end
|
625
|
+
|
626
|
+
# Assign label to parser. If given a symbol, it'll be turned into a
|
627
|
+
# Parsby::Token.
|
628
|
+
def label=(name)
|
629
|
+
@label = name.is_a?(Symbol) ? Token.new(name) : name
|
630
|
+
end
|
631
|
+
|
632
|
+
# Initialize parser with optional label argument, and parsing block. The
|
633
|
+
# parsing block is given an IO as argument, and its result is the result
|
634
|
+
# when parsing.
|
635
|
+
def initialize(label = nil, &b)
|
636
|
+
self.label = label if label
|
637
|
+
@parser = b
|
638
|
+
end
|
639
|
+
|
640
|
+
# Parse a String or IO object.
|
641
|
+
def parse(src)
|
642
|
+
ctx = src.is_a?(Context) ? src : Context.new(src)
|
643
|
+
parsed_range = ParsedRange.new(ctx.bio.pos, ctx.bio.pos, label)
|
644
|
+
ctx.parsed_ranges << parsed_range if ctx.parsed_ranges
|
645
|
+
parent_parsed_range = ctx.parsed_ranges
|
646
|
+
ctx.parsed_ranges = parsed_range
|
647
|
+
begin
|
648
|
+
r = @parser.call ctx
|
649
|
+
rescue ExpectationFailed => e
|
650
|
+
ctx.parsed_ranges.end = ctx.bio.pos
|
651
|
+
ctx.parsed_ranges.failed = true
|
652
|
+
ctx.bio.restore_to ctx.parsed_ranges.start
|
653
|
+
raise
|
654
|
+
else
|
655
|
+
ctx.parsed_ranges.end = ctx.bio.pos
|
656
|
+
r
|
657
|
+
ensure
|
658
|
+
# Keep the root one for use in ExceptionFailed#message
|
659
|
+
if parent_parsed_range
|
660
|
+
ctx.parsed_ranges = parent_parsed_range
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
# Parses without consuming input.
|
666
|
+
def peek(src)
|
667
|
+
ctx = src.is_a?(Context) ? src : Context.new(src)
|
668
|
+
starting_pos = ctx.bio.pos
|
669
|
+
begin
|
670
|
+
parse ctx
|
671
|
+
ensure
|
672
|
+
ctx.bio.restore_to starting_pos
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
# <tt>x | y</tt> tries y if x fails.
|
677
|
+
def |(p)
|
678
|
+
Parsby.new "(#{self.label} | #{p.label})" do |c|
|
679
|
+
begin
|
680
|
+
parse c
|
681
|
+
rescue Error
|
682
|
+
p.parse c
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
687
|
+
# x < y runs parser x then y and returns x.
|
688
|
+
def <(p)
|
689
|
+
self.then {|r| p.then { pure r } } % "(#{label} < #{p.label})"
|
690
|
+
end
|
691
|
+
|
692
|
+
# x > y runs parser x then y and returns y.
|
693
|
+
def >(p)
|
694
|
+
self.then { p } % "(#{label} > #{p.label})"
|
695
|
+
end
|
696
|
+
|
697
|
+
def ~
|
698
|
+
Parsby.new "(~ #{label})" do |c|
|
699
|
+
begin
|
700
|
+
parse c
|
701
|
+
ensure
|
702
|
+
c.parsed_ranges.children[0].splice_self!
|
703
|
+
if c.parsed_ranges.parent
|
704
|
+
c.parsed_ranges.splice_self!
|
705
|
+
end
|
706
|
+
end
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# p * n, runs parser p n times, grouping results in an array.
|
711
|
+
def *(n)
|
712
|
+
Parsby.new "(#{label} * #{n})" do |c|
|
713
|
+
n.times.map { parse c }
|
714
|
+
end
|
715
|
+
end
|
716
|
+
|
717
|
+
# x + y does + on the results of x and y. This is mostly meant to be used
|
718
|
+
# with arrays, but it would work with numbers and strings too.
|
719
|
+
def +(p)
|
720
|
+
group(self, p)
|
721
|
+
.fmap {|(x, y)| x + y }
|
722
|
+
.tap {|r| r.label = "(#{label} + #{p.label})" }
|
723
|
+
end
|
724
|
+
|
725
|
+
# xs << x appends result of parser x to list result of parser xs.
|
726
|
+
def <<(p)
|
727
|
+
Parsby.new "(#{label} << #{p.label})" do |c|
|
728
|
+
x = parse c
|
729
|
+
y = p.parse c
|
730
|
+
# like x << y, but without modifying x.
|
731
|
+
x + [y]
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
# Set the label and return self.
|
736
|
+
def %(name)
|
737
|
+
self.label = name
|
738
|
+
self
|
739
|
+
end
|
740
|
+
|
741
|
+
# Like map for arrays, this lets you work with the value "inside" the
|
742
|
+
# parser, i.e. the result.
|
743
|
+
#
|
744
|
+
# Example:
|
745
|
+
#
|
746
|
+
# decimal.fmap {|x| x + 1}.parse("2")
|
747
|
+
# => 3
|
748
|
+
def fmap(&b)
|
749
|
+
Parsby.new "#{label}.fmap" do |c|
|
750
|
+
b.call parse c
|
751
|
+
end
|
752
|
+
end
|
753
|
+
|
754
|
+
# Pass result of self parser to block to construct the next parser.
|
755
|
+
#
|
756
|
+
# For example, instead of writing:
|
757
|
+
#
|
758
|
+
# Parsby.new do |c|
|
759
|
+
# x = foo.parse c
|
760
|
+
# bar(x).parse c
|
761
|
+
# end
|
762
|
+
#
|
763
|
+
# you can write:
|
764
|
+
#
|
765
|
+
# foo.then {|x| bar x }
|
766
|
+
#
|
767
|
+
# This is analogous to Parsec's >>= operator in Haskell, where you could
|
768
|
+
# write:
|
769
|
+
#
|
770
|
+
# foo >>= bar
|
771
|
+
def then(&b)
|
772
|
+
Parsby.new "#{label}.then" do |c|
|
773
|
+
b.call(parse(c)).parse(c)
|
774
|
+
end
|
775
|
+
end
|
776
|
+
|
777
|
+
# <tt>x.that_fails(y)</tt> will try <tt>y</tt>, fail if <tt>y</tt>
|
778
|
+
# succeeds, or parse with <tt>x</tt> if <tt>y</tt>
|
779
|
+
# fails.
|
780
|
+
#
|
781
|
+
# Example:
|
782
|
+
#
|
783
|
+
# decimal.that_fails(string("10")).parse "3"
|
784
|
+
# => 3
|
785
|
+
# decimal.that_fails(string("10")).parse "10"
|
786
|
+
# Parsby::ExpectationFailed: line 1:
|
787
|
+
# 10
|
788
|
+
# \/ expected: (not "10")
|
789
|
+
def that_fails(p)
|
790
|
+
Parsby.new "#{label}.that_fails(#{p.label})" do |c|
|
791
|
+
orig_pos = c.bio.pos
|
792
|
+
begin
|
793
|
+
r = p.parse c.bio
|
794
|
+
rescue Error
|
795
|
+
c.bio.restore_to orig_pos
|
796
|
+
parse c.bio
|
797
|
+
else
|
798
|
+
raise ExpectationFailed.new c
|
799
|
+
end
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
803
|
+
alias_method :that_fail, :that_fails
|
804
|
+
end
|