parslet 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
@@ -20,7 +20,7 @@ module Parslet::Atoms
|
|
20
20
|
# naming something using <code>.as(...)</code>. It changes the folding
|
21
21
|
# semantics of repetition.
|
22
22
|
#
|
23
|
-
def flatten(value, named=false)
|
23
|
+
def flatten(value, named=false)
|
24
24
|
# Passes through everything that isn't an array of things
|
25
25
|
return value unless value.instance_of? Array
|
26
26
|
|
@@ -53,12 +53,15 @@ module Parslet::Atoms
|
|
53
53
|
|
54
54
|
# Flatten results from a sequence of parslets.
|
55
55
|
#
|
56
|
-
|
56
|
+
# @api private
|
57
|
+
#
|
58
|
+
def flatten_sequence(list)
|
57
59
|
foldl(list.compact) { |r, e| # and then merge flat elements
|
58
60
|
merge_fold(r, e)
|
59
61
|
}
|
60
62
|
end
|
61
|
-
|
63
|
+
# @api private
|
64
|
+
def merge_fold(l, r)
|
62
65
|
# equal pairs: merge. ----------------------------------------------------
|
63
66
|
if l.class == r.class
|
64
67
|
if l.is_a?(Hash)
|
@@ -96,7 +99,9 @@ module Parslet::Atoms
|
|
96
99
|
# the results, we want to leave an empty list alone - otherwise it is
|
97
100
|
# turned into an empty string.
|
98
101
|
#
|
99
|
-
|
102
|
+
# @api private
|
103
|
+
#
|
104
|
+
def flatten_repetition(list, named)
|
100
105
|
if list.any? { |e| e.instance_of?(Hash) }
|
101
106
|
# If keyed subtrees are in the array, we'll want to discard all
|
102
107
|
# strings inbetween. To keep them, name them.
|
@@ -3,11 +3,17 @@ module Parslet::Atoms
|
|
3
3
|
# parslet object to results. This is used for memoization in the packrat
|
4
4
|
# style.
|
5
5
|
#
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
7
|
+
# this class. This makes the reporting pluggable.
|
8
|
+
#
|
6
9
|
class Context
|
7
|
-
|
10
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
11
|
+
# reporter)
|
12
|
+
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
8
13
|
@cache = Hash.new { |h, k| h[k] = {} }
|
14
|
+
@reporter = reporter
|
9
15
|
end
|
10
|
-
|
16
|
+
|
11
17
|
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
12
18
|
# at one position of input always yields the same result, unless the input
|
13
19
|
# has changed.
|
@@ -16,12 +22,12 @@ module Parslet::Atoms
|
|
16
22
|
# were consumed by a successful parse. Imitation of such a parse must
|
17
23
|
# advance the input pos by the same amount of bytes.
|
18
24
|
#
|
19
|
-
def
|
25
|
+
def try_with_cache(obj, source)
|
20
26
|
beg = source.pos
|
21
27
|
|
22
28
|
# Not in cache yet? Return early.
|
23
29
|
unless entry = lookup(obj, beg)
|
24
|
-
result =
|
30
|
+
result = obj.try(source, self)
|
25
31
|
|
26
32
|
set obj, beg, [result, source.pos-beg]
|
27
33
|
return result
|
@@ -36,6 +42,22 @@ module Parslet::Atoms
|
|
36
42
|
source.pos = beg + advance
|
37
43
|
return result
|
38
44
|
end
|
45
|
+
|
46
|
+
# Report an error at a given position.
|
47
|
+
# @see ErrorReporter
|
48
|
+
#
|
49
|
+
def err_at(*args)
|
50
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
51
|
+
return [false, nil]
|
52
|
+
end
|
53
|
+
|
54
|
+
# Report an error.
|
55
|
+
# @see ErrorReporter
|
56
|
+
#
|
57
|
+
def err(*args)
|
58
|
+
return [false, @reporter.err(*args)] if @reporter
|
59
|
+
return [false, nil]
|
60
|
+
end
|
39
61
|
|
40
62
|
private
|
41
63
|
def lookup(obj, pos)
|
data/lib/parslet/atoms/entity.rb
CHANGED
@@ -10,14 +10,14 @@
|
|
10
10
|
#
|
11
11
|
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
12
|
attr_reader :name, :block
|
13
|
-
def initialize(name, &block)
|
13
|
+
def initialize(name, &block)
|
14
14
|
super()
|
15
15
|
|
16
16
|
@name = name
|
17
17
|
@block = block
|
18
18
|
end
|
19
19
|
|
20
|
-
def try(source, context)
|
20
|
+
def try(source, context)
|
21
21
|
parslet.apply(source, context)
|
22
22
|
end
|
23
23
|
|
@@ -27,16 +27,11 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
|
27
27
|
}
|
28
28
|
end
|
29
29
|
|
30
|
-
def to_s_inner(prec)
|
30
|
+
def to_s_inner(prec)
|
31
31
|
name.to_s.upcase
|
32
|
-
end
|
33
|
-
|
34
|
-
def error_tree # :nodoc:
|
35
|
-
parslet.error_tree
|
36
|
-
end
|
37
|
-
|
32
|
+
end
|
38
33
|
private
|
39
|
-
def raise_not_implemented
|
34
|
+
def raise_not_implemented
|
40
35
|
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
41
36
|
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
42
37
|
exception.set_backtrace(trace)
|
@@ -8,7 +8,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
8
8
|
attr_reader :positive
|
9
9
|
attr_reader :bound_parslet
|
10
10
|
|
11
|
-
def initialize(bound_parslet, positive=true)
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
12
|
super()
|
13
13
|
|
14
14
|
# Model positive and negative lookahead by testing this flag.
|
@@ -21,14 +21,18 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
21
21
|
}
|
22
22
|
end
|
23
23
|
|
24
|
-
def try(source, context)
|
24
|
+
def try(source, context)
|
25
25
|
pos = source.pos
|
26
26
|
|
27
|
-
value = bound_parslet.apply(source, context)
|
28
|
-
return success(nil) if positive ^ value.error?
|
27
|
+
success, value = bound_parslet.apply(source, context)
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
if positive
|
30
|
+
return succ(nil) if success
|
31
|
+
return context.err_at(self, source, @error_msgs[:positive], pos)
|
32
|
+
else
|
33
|
+
return succ(nil) unless success
|
34
|
+
return context.err_at(self, source, @error_msgs[:negative], pos)
|
35
|
+
end
|
32
36
|
|
33
37
|
# This is probably the only parslet that rewinds its input in #try.
|
34
38
|
# Lookaheads NEVER consume their input, even on success, that's why.
|
@@ -37,7 +41,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
37
41
|
end
|
38
42
|
|
39
43
|
precedence LOOKAHEAD
|
40
|
-
def to_s_inner(prec)
|
44
|
+
def to_s_inner(prec)
|
41
45
|
char = positive ? '&' : '!'
|
42
46
|
|
43
47
|
"#{char}#{bound_parslet.to_s(prec)}"
|
data/lib/parslet/atoms/named.rb
CHANGED
@@ -7,30 +7,26 @@
|
|
7
7
|
#
|
8
8
|
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
9
|
attr_reader :parslet, :name
|
10
|
-
def initialize(parslet, name)
|
10
|
+
def initialize(parslet, name)
|
11
11
|
super()
|
12
12
|
|
13
13
|
@parslet, @name = parslet, name
|
14
14
|
end
|
15
15
|
|
16
|
-
def apply(source, context)
|
17
|
-
value = parslet.apply(source, context)
|
16
|
+
def apply(source, context)
|
17
|
+
success, value = result = parslet.apply(source, context)
|
18
18
|
|
19
|
-
return
|
20
|
-
|
19
|
+
return result unless success
|
20
|
+
succ(
|
21
21
|
produce_return_value(
|
22
|
-
value
|
22
|
+
value))
|
23
23
|
end
|
24
24
|
|
25
|
-
def to_s_inner(prec)
|
25
|
+
def to_s_inner(prec)
|
26
26
|
"#{name}:#{parslet.to_s(prec)}"
|
27
27
|
end
|
28
|
-
|
29
|
-
def error_tree # :nodoc:
|
30
|
-
parslet.error_tree
|
31
|
-
end
|
32
28
|
private
|
33
|
-
def produce_return_value(val)
|
29
|
+
def produce_return_value(val)
|
34
30
|
{ name => flatten(val, true) }
|
35
31
|
end
|
36
32
|
end
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
#
|
10
10
|
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
11
|
attr_reader :match, :re
|
12
|
-
def initialize(match)
|
12
|
+
def initialize(match)
|
13
13
|
super()
|
14
14
|
|
15
15
|
@match = match.to_s
|
@@ -20,17 +20,18 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
|
|
20
20
|
}
|
21
21
|
end
|
22
22
|
|
23
|
-
def try(source, context)
|
24
|
-
|
25
|
-
s = source.read(1)
|
23
|
+
def try(source, context)
|
24
|
+
return succ(source.consume(1)) if source.matches?(re)
|
26
25
|
|
27
|
-
|
28
|
-
return
|
29
|
-
|
30
|
-
|
26
|
+
# No string could be read
|
27
|
+
return context.err(self, source, @error_msgs[:premature]) \
|
28
|
+
if source.eof?
|
29
|
+
|
30
|
+
# No match
|
31
|
+
return context.err(self, source, @error_msgs[:failed])
|
31
32
|
end
|
32
33
|
|
33
|
-
def to_s_inner(prec)
|
34
|
+
def to_s_inner(prec)
|
34
35
|
match.inspect[1..-2]
|
35
36
|
end
|
36
37
|
end
|
@@ -19,46 +19,45 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
|
19
19
|
}
|
20
20
|
end
|
21
21
|
|
22
|
-
def try(source, context)
|
22
|
+
def try(source, context)
|
23
23
|
occ = 0
|
24
|
-
|
24
|
+
accum = [@tag] # initialize the result array with the tag (for flattening)
|
25
25
|
start_pos = source.pos
|
26
|
+
|
27
|
+
break_on = nil
|
26
28
|
loop do
|
27
|
-
value = parslet.apply(source, context)
|
28
|
-
|
29
|
+
success, value = parslet.apply(source, context)
|
30
|
+
|
31
|
+
break_on = value
|
32
|
+
break unless success
|
29
33
|
|
30
34
|
occ += 1
|
31
|
-
|
35
|
+
accum << value
|
32
36
|
|
33
|
-
# If we're not greedy (max is defined), check if that has been
|
34
|
-
|
35
|
-
return success(result) if max && occ>=max
|
37
|
+
# If we're not greedy (max is defined), check if that has been reached.
|
38
|
+
return succ(accum) if max && occ>=max
|
36
39
|
end
|
37
40
|
|
41
|
+
# Last attempt to match parslet was a failure, failure reason in break_on.
|
42
|
+
|
38
43
|
# Greedy matcher has produced a failure. Check if occ (which will
|
39
|
-
# contain the number of sucesses) is
|
40
|
-
return
|
41
|
-
|
44
|
+
# contain the number of sucesses) is >= min.
|
45
|
+
return context.err_at(
|
46
|
+
self,
|
47
|
+
source,
|
48
|
+
@error_msgs[:minrep],
|
49
|
+
start_pos,
|
50
|
+
[break_on]) if occ < min
|
51
|
+
|
52
|
+
return succ(accum)
|
42
53
|
end
|
43
54
|
|
44
55
|
precedence REPETITION
|
45
|
-
def to_s_inner(prec)
|
56
|
+
def to_s_inner(prec)
|
46
57
|
minmax = "{#{min}, #{max}}"
|
47
58
|
minmax = '?' if min == 0 && max == 1
|
48
59
|
|
49
60
|
parslet.to_s(prec) + minmax
|
50
61
|
end
|
51
|
-
|
52
|
-
def cause # :nodoc:
|
53
|
-
# Either the repetition failed or the parslet inside failed to repeat.
|
54
|
-
super || parslet.cause
|
55
|
-
end
|
56
|
-
def error_tree # :nodoc:
|
57
|
-
if cause?
|
58
|
-
Parslet::ErrorTree.new(self, parslet.error_tree)
|
59
|
-
else
|
60
|
-
parslet.error_tree
|
61
|
-
end
|
62
|
-
end
|
63
62
|
end
|
64
63
|
|
@@ -15,30 +15,24 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
|
15
15
|
}
|
16
16
|
end
|
17
17
|
|
18
|
-
def >>(parslet)
|
18
|
+
def >>(parslet)
|
19
19
|
self.class.new(* @parslets+[parslet])
|
20
20
|
end
|
21
21
|
|
22
|
-
def try(source, context)
|
23
|
-
|
24
|
-
|
25
|
-
@offending_parslet = p
|
22
|
+
def try(source, context)
|
23
|
+
succ([:sequence]+parslets.map { |p|
|
24
|
+
success, value = p.apply(source, context)
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
value
|
26
|
+
unless success
|
27
|
+
return context.err(self, source, @error_msgs[:failed], [value])
|
28
|
+
end
|
29
|
+
|
30
|
+
value
|
32
31
|
})
|
33
32
|
end
|
34
33
|
|
35
34
|
precedence SEQUENCE
|
36
|
-
def to_s_inner(prec)
|
35
|
+
def to_s_inner(prec)
|
37
36
|
parslets.map { |p| p.to_s(prec) }.join(' ')
|
38
37
|
end
|
39
|
-
|
40
|
-
def error_tree # :nodoc:
|
41
|
-
Parslet::ErrorTree.new(self).tap { |t|
|
42
|
-
t.children << @offending_parslet.error_tree if @offending_parslet }
|
43
|
-
end
|
44
38
|
end
|
data/lib/parslet/atoms/str.rb
CHANGED
@@ -10,29 +10,27 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
10
10
|
super()
|
11
11
|
|
12
12
|
@str = str.to_s
|
13
|
+
@len = str.size
|
13
14
|
@error_msgs = {
|
14
15
|
:premature => "Premature end of input",
|
15
16
|
:failed => "Expected #{str.inspect}, but got "
|
16
17
|
}
|
17
18
|
end
|
18
19
|
|
19
|
-
def try(source, context)
|
20
|
-
|
21
|
-
# contents of parslets inner loop. Changes here affect parslets speed
|
22
|
-
# enormously.
|
23
|
-
error_pos = source.pos
|
24
|
-
s = source.read(str.bytesize)
|
25
|
-
|
26
|
-
return success(s) if s == str
|
20
|
+
def try(source, context)
|
21
|
+
return succ(source.consume(@len)) if source.matches?(str)
|
27
22
|
|
28
|
-
# assert: s != str
|
29
|
-
|
30
23
|
# Failures:
|
31
|
-
return
|
32
|
-
|
24
|
+
return context.err(self, source, @error_msgs[:premature]) \
|
25
|
+
if source.chars_left<@len
|
26
|
+
|
27
|
+
error_pos = source.pos
|
28
|
+
return context.err_at(
|
29
|
+
self, source,
|
30
|
+
[@error_msgs[:failed], source.consume(@len)], error_pos)
|
33
31
|
end
|
34
32
|
|
35
|
-
def to_s_inner(prec)
|
33
|
+
def to_s_inner(prec)
|
36
34
|
"'#{str}'"
|
37
35
|
end
|
38
36
|
end
|
data/lib/parslet/cause.rb
CHANGED
@@ -1,14 +1,48 @@
|
|
1
1
|
module Parslet
|
2
|
-
#
|
3
|
-
#
|
2
|
+
# Represents a cause why a parse did fail. A lot of these objects are
|
3
|
+
# constructed - not all of the causes turn out to be failures for the whole
|
4
|
+
# parse.
|
4
5
|
#
|
5
|
-
class Cause
|
6
|
-
|
6
|
+
class Cause
|
7
|
+
def initialize(message, source, pos, children)
|
8
|
+
@message, @source, @pos, @children =
|
9
|
+
message, source, pos, children
|
10
|
+
end
|
11
|
+
|
12
|
+
# @return [String, Array] A string or an array of message pieces that
|
13
|
+
# provide failure information. Use #to_s to get a formatted string.
|
14
|
+
attr_reader :message
|
15
|
+
|
16
|
+
# @return [Parslet::Source] Source that was parsed when this error
|
17
|
+
# happend. Mainly used for line number information.
|
18
|
+
attr_reader :source
|
19
|
+
|
20
|
+
# Location of the error.
|
21
|
+
#
|
22
|
+
# @return [Fixnum] Position where the error happened. (character offset)
|
23
|
+
attr_reader :pos
|
24
|
+
|
25
|
+
# When this cause is part of a tree of error causes: child nodes for this
|
26
|
+
# node. Very often carries the reasons for this cause.
|
27
|
+
#
|
28
|
+
# @return [Array<Parslet::Cause>] A list of reasons for this cause.
|
29
|
+
def children
|
30
|
+
@children ||= []
|
31
|
+
end
|
32
|
+
|
33
|
+
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
|
7
34
|
# override the position of the +source+. This method returns an object
|
8
35
|
# that can be turned into a string using #to_s.
|
9
36
|
#
|
10
|
-
|
11
|
-
|
37
|
+
# @param source [Parslet::Source] source that was parsed when this error
|
38
|
+
# happened
|
39
|
+
# @param pos [Fixnum] position of error
|
40
|
+
# @param str [String, Array<String>] message parts
|
41
|
+
# @param children [Array<Parslet::Cause>] child nodes for this error tree
|
42
|
+
# @return [Parslet::Cause] a new instance of {Parslet::Cause}
|
43
|
+
#
|
44
|
+
def self.format(source, pos, str, children=[])
|
45
|
+
self.new(str, source, pos, children)
|
12
46
|
end
|
13
47
|
|
14
48
|
def to_s
|
@@ -37,12 +71,9 @@ module Parslet
|
|
37
71
|
recursive_ascii_tree(self, io, [true]) }.
|
38
72
|
string
|
39
73
|
end
|
40
|
-
|
41
|
-
def children
|
42
|
-
@children ||= Array.new
|
43
|
-
end
|
74
|
+
|
44
75
|
private
|
45
|
-
def recursive_ascii_tree(node, stream, curved)
|
76
|
+
def recursive_ascii_tree(node, stream, curved)
|
46
77
|
append_prefix(stream, curved)
|
47
78
|
stream.puts node.to_s
|
48
79
|
|
@@ -52,8 +83,9 @@ module Parslet
|
|
52
83
|
recursive_ascii_tree(child, stream, curved + [last_child])
|
53
84
|
end
|
54
85
|
end
|
55
|
-
def append_prefix(stream, curved)
|
56
|
-
curved
|
86
|
+
def append_prefix(stream, curved)
|
87
|
+
return if curved.size < 2
|
88
|
+
curved[1..-2].each do |c|
|
57
89
|
stream.print c ? " " : "| "
|
58
90
|
end
|
59
91
|
stream.print curved.last ? "`- " : "|- "
|