parslet 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
@@ -20,7 +20,7 @@ module Parslet::Atoms
|
|
20
20
|
# naming something using <code>.as(...)</code>. It changes the folding
|
21
21
|
# semantics of repetition.
|
22
22
|
#
|
23
|
-
def flatten(value, named=false)
|
23
|
+
def flatten(value, named=false)
|
24
24
|
# Passes through everything that isn't an array of things
|
25
25
|
return value unless value.instance_of? Array
|
26
26
|
|
@@ -53,12 +53,15 @@ module Parslet::Atoms
|
|
53
53
|
|
54
54
|
# Flatten results from a sequence of parslets.
|
55
55
|
#
|
56
|
-
|
56
|
+
# @api private
|
57
|
+
#
|
58
|
+
def flatten_sequence(list)
|
57
59
|
foldl(list.compact) { |r, e| # and then merge flat elements
|
58
60
|
merge_fold(r, e)
|
59
61
|
}
|
60
62
|
end
|
61
|
-
|
63
|
+
# @api private
|
64
|
+
def merge_fold(l, r)
|
62
65
|
# equal pairs: merge. ----------------------------------------------------
|
63
66
|
if l.class == r.class
|
64
67
|
if l.is_a?(Hash)
|
@@ -96,7 +99,9 @@ module Parslet::Atoms
|
|
96
99
|
# the results, we want to leave an empty list alone - otherwise it is
|
97
100
|
# turned into an empty string.
|
98
101
|
#
|
99
|
-
|
102
|
+
# @api private
|
103
|
+
#
|
104
|
+
def flatten_repetition(list, named)
|
100
105
|
if list.any? { |e| e.instance_of?(Hash) }
|
101
106
|
# If keyed subtrees are in the array, we'll want to discard all
|
102
107
|
# strings inbetween. To keep them, name them.
|
@@ -3,11 +3,17 @@ module Parslet::Atoms
|
|
3
3
|
# parslet object to results. This is used for memoization in the packrat
|
4
4
|
# style.
|
5
5
|
#
|
6
|
+
# Also, error reporter is stored here and error reporting happens through
|
7
|
+
# this class. This makes the reporting pluggable.
|
8
|
+
#
|
6
9
|
class Context
|
7
|
-
|
10
|
+
# @param reporter [#err, #err_at] Error reporter (leave empty for default
|
11
|
+
# reporter)
|
12
|
+
def initialize(reporter=Parslet::ErrorReporter::Tree.new)
|
8
13
|
@cache = Hash.new { |h, k| h[k] = {} }
|
14
|
+
@reporter = reporter
|
9
15
|
end
|
10
|
-
|
16
|
+
|
11
17
|
# Caches a parse answer for obj at source.pos. Applying the same parslet
|
12
18
|
# at one position of input always yields the same result, unless the input
|
13
19
|
# has changed.
|
@@ -16,12 +22,12 @@ module Parslet::Atoms
|
|
16
22
|
# were consumed by a successful parse. Imitation of such a parse must
|
17
23
|
# advance the input pos by the same amount of bytes.
|
18
24
|
#
|
19
|
-
def
|
25
|
+
def try_with_cache(obj, source)
|
20
26
|
beg = source.pos
|
21
27
|
|
22
28
|
# Not in cache yet? Return early.
|
23
29
|
unless entry = lookup(obj, beg)
|
24
|
-
result =
|
30
|
+
result = obj.try(source, self)
|
25
31
|
|
26
32
|
set obj, beg, [result, source.pos-beg]
|
27
33
|
return result
|
@@ -36,6 +42,22 @@ module Parslet::Atoms
|
|
36
42
|
source.pos = beg + advance
|
37
43
|
return result
|
38
44
|
end
|
45
|
+
|
46
|
+
# Report an error at a given position.
|
47
|
+
# @see ErrorReporter
|
48
|
+
#
|
49
|
+
def err_at(*args)
|
50
|
+
return [false, @reporter.err_at(*args)] if @reporter
|
51
|
+
return [false, nil]
|
52
|
+
end
|
53
|
+
|
54
|
+
# Report an error.
|
55
|
+
# @see ErrorReporter
|
56
|
+
#
|
57
|
+
def err(*args)
|
58
|
+
return [false, @reporter.err(*args)] if @reporter
|
59
|
+
return [false, nil]
|
60
|
+
end
|
39
61
|
|
40
62
|
private
|
41
63
|
def lookup(obj, pos)
|
data/lib/parslet/atoms/entity.rb
CHANGED
@@ -10,14 +10,14 @@
|
|
10
10
|
#
|
11
11
|
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
12
|
attr_reader :name, :block
|
13
|
-
def initialize(name, &block)
|
13
|
+
def initialize(name, &block)
|
14
14
|
super()
|
15
15
|
|
16
16
|
@name = name
|
17
17
|
@block = block
|
18
18
|
end
|
19
19
|
|
20
|
-
def try(source, context)
|
20
|
+
def try(source, context)
|
21
21
|
parslet.apply(source, context)
|
22
22
|
end
|
23
23
|
|
@@ -27,16 +27,11 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
|
27
27
|
}
|
28
28
|
end
|
29
29
|
|
30
|
-
def to_s_inner(prec)
|
30
|
+
def to_s_inner(prec)
|
31
31
|
name.to_s.upcase
|
32
|
-
end
|
33
|
-
|
34
|
-
def error_tree # :nodoc:
|
35
|
-
parslet.error_tree
|
36
|
-
end
|
37
|
-
|
32
|
+
end
|
38
33
|
private
|
39
|
-
def raise_not_implemented
|
34
|
+
def raise_not_implemented
|
40
35
|
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
41
36
|
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
42
37
|
exception.set_backtrace(trace)
|
@@ -8,7 +8,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
8
8
|
attr_reader :positive
|
9
9
|
attr_reader :bound_parslet
|
10
10
|
|
11
|
-
def initialize(bound_parslet, positive=true)
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
12
|
super()
|
13
13
|
|
14
14
|
# Model positive and negative lookahead by testing this flag.
|
@@ -21,14 +21,18 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
21
21
|
}
|
22
22
|
end
|
23
23
|
|
24
|
-
def try(source, context)
|
24
|
+
def try(source, context)
|
25
25
|
pos = source.pos
|
26
26
|
|
27
|
-
value = bound_parslet.apply(source, context)
|
28
|
-
return success(nil) if positive ^ value.error?
|
27
|
+
success, value = bound_parslet.apply(source, context)
|
29
28
|
|
30
|
-
|
31
|
-
|
29
|
+
if positive
|
30
|
+
return succ(nil) if success
|
31
|
+
return context.err_at(self, source, @error_msgs[:positive], pos)
|
32
|
+
else
|
33
|
+
return succ(nil) unless success
|
34
|
+
return context.err_at(self, source, @error_msgs[:negative], pos)
|
35
|
+
end
|
32
36
|
|
33
37
|
# This is probably the only parslet that rewinds its input in #try.
|
34
38
|
# Lookaheads NEVER consume their input, even on success, that's why.
|
@@ -37,7 +41,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
37
41
|
end
|
38
42
|
|
39
43
|
precedence LOOKAHEAD
|
40
|
-
def to_s_inner(prec)
|
44
|
+
def to_s_inner(prec)
|
41
45
|
char = positive ? '&' : '!'
|
42
46
|
|
43
47
|
"#{char}#{bound_parslet.to_s(prec)}"
|
data/lib/parslet/atoms/named.rb
CHANGED
@@ -7,30 +7,26 @@
|
|
7
7
|
#
|
8
8
|
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
9
|
attr_reader :parslet, :name
|
10
|
-
def initialize(parslet, name)
|
10
|
+
def initialize(parslet, name)
|
11
11
|
super()
|
12
12
|
|
13
13
|
@parslet, @name = parslet, name
|
14
14
|
end
|
15
15
|
|
16
|
-
def apply(source, context)
|
17
|
-
value = parslet.apply(source, context)
|
16
|
+
def apply(source, context)
|
17
|
+
success, value = result = parslet.apply(source, context)
|
18
18
|
|
19
|
-
return
|
20
|
-
|
19
|
+
return result unless success
|
20
|
+
succ(
|
21
21
|
produce_return_value(
|
22
|
-
value
|
22
|
+
value))
|
23
23
|
end
|
24
24
|
|
25
|
-
def to_s_inner(prec)
|
25
|
+
def to_s_inner(prec)
|
26
26
|
"#{name}:#{parslet.to_s(prec)}"
|
27
27
|
end
|
28
|
-
|
29
|
-
def error_tree # :nodoc:
|
30
|
-
parslet.error_tree
|
31
|
-
end
|
32
28
|
private
|
33
|
-
def produce_return_value(val)
|
29
|
+
def produce_return_value(val)
|
34
30
|
{ name => flatten(val, true) }
|
35
31
|
end
|
36
32
|
end
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
#
|
10
10
|
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
11
|
attr_reader :match, :re
|
12
|
-
def initialize(match)
|
12
|
+
def initialize(match)
|
13
13
|
super()
|
14
14
|
|
15
15
|
@match = match.to_s
|
@@ -20,17 +20,18 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
|
|
20
20
|
}
|
21
21
|
end
|
22
22
|
|
23
|
-
def try(source, context)
|
24
|
-
|
25
|
-
s = source.read(1)
|
23
|
+
def try(source, context)
|
24
|
+
return succ(source.consume(1)) if source.matches?(re)
|
26
25
|
|
27
|
-
|
28
|
-
return
|
29
|
-
|
30
|
-
|
26
|
+
# No string could be read
|
27
|
+
return context.err(self, source, @error_msgs[:premature]) \
|
28
|
+
if source.eof?
|
29
|
+
|
30
|
+
# No match
|
31
|
+
return context.err(self, source, @error_msgs[:failed])
|
31
32
|
end
|
32
33
|
|
33
|
-
def to_s_inner(prec)
|
34
|
+
def to_s_inner(prec)
|
34
35
|
match.inspect[1..-2]
|
35
36
|
end
|
36
37
|
end
|
@@ -19,46 +19,45 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
|
19
19
|
}
|
20
20
|
end
|
21
21
|
|
22
|
-
def try(source, context)
|
22
|
+
def try(source, context)
|
23
23
|
occ = 0
|
24
|
-
|
24
|
+
accum = [@tag] # initialize the result array with the tag (for flattening)
|
25
25
|
start_pos = source.pos
|
26
|
+
|
27
|
+
break_on = nil
|
26
28
|
loop do
|
27
|
-
value = parslet.apply(source, context)
|
28
|
-
|
29
|
+
success, value = parslet.apply(source, context)
|
30
|
+
|
31
|
+
break_on = value
|
32
|
+
break unless success
|
29
33
|
|
30
34
|
occ += 1
|
31
|
-
|
35
|
+
accum << value
|
32
36
|
|
33
|
-
# If we're not greedy (max is defined), check if that has been
|
34
|
-
|
35
|
-
return success(result) if max && occ>=max
|
37
|
+
# If we're not greedy (max is defined), check if that has been reached.
|
38
|
+
return succ(accum) if max && occ>=max
|
36
39
|
end
|
37
40
|
|
41
|
+
# Last attempt to match parslet was a failure, failure reason in break_on.
|
42
|
+
|
38
43
|
# Greedy matcher has produced a failure. Check if occ (which will
|
39
|
-
# contain the number of sucesses) is
|
40
|
-
return
|
41
|
-
|
44
|
+
# contain the number of sucesses) is >= min.
|
45
|
+
return context.err_at(
|
46
|
+
self,
|
47
|
+
source,
|
48
|
+
@error_msgs[:minrep],
|
49
|
+
start_pos,
|
50
|
+
[break_on]) if occ < min
|
51
|
+
|
52
|
+
return succ(accum)
|
42
53
|
end
|
43
54
|
|
44
55
|
precedence REPETITION
|
45
|
-
def to_s_inner(prec)
|
56
|
+
def to_s_inner(prec)
|
46
57
|
minmax = "{#{min}, #{max}}"
|
47
58
|
minmax = '?' if min == 0 && max == 1
|
48
59
|
|
49
60
|
parslet.to_s(prec) + minmax
|
50
61
|
end
|
51
|
-
|
52
|
-
def cause # :nodoc:
|
53
|
-
# Either the repetition failed or the parslet inside failed to repeat.
|
54
|
-
super || parslet.cause
|
55
|
-
end
|
56
|
-
def error_tree # :nodoc:
|
57
|
-
if cause?
|
58
|
-
Parslet::ErrorTree.new(self, parslet.error_tree)
|
59
|
-
else
|
60
|
-
parslet.error_tree
|
61
|
-
end
|
62
|
-
end
|
63
62
|
end
|
64
63
|
|
@@ -15,30 +15,24 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
|
15
15
|
}
|
16
16
|
end
|
17
17
|
|
18
|
-
def >>(parslet)
|
18
|
+
def >>(parslet)
|
19
19
|
self.class.new(* @parslets+[parslet])
|
20
20
|
end
|
21
21
|
|
22
|
-
def try(source, context)
|
23
|
-
|
24
|
-
|
25
|
-
@offending_parslet = p
|
22
|
+
def try(source, context)
|
23
|
+
succ([:sequence]+parslets.map { |p|
|
24
|
+
success, value = p.apply(source, context)
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
value
|
26
|
+
unless success
|
27
|
+
return context.err(self, source, @error_msgs[:failed], [value])
|
28
|
+
end
|
29
|
+
|
30
|
+
value
|
32
31
|
})
|
33
32
|
end
|
34
33
|
|
35
34
|
precedence SEQUENCE
|
36
|
-
def to_s_inner(prec)
|
35
|
+
def to_s_inner(prec)
|
37
36
|
parslets.map { |p| p.to_s(prec) }.join(' ')
|
38
37
|
end
|
39
|
-
|
40
|
-
def error_tree # :nodoc:
|
41
|
-
Parslet::ErrorTree.new(self).tap { |t|
|
42
|
-
t.children << @offending_parslet.error_tree if @offending_parslet }
|
43
|
-
end
|
44
38
|
end
|
data/lib/parslet/atoms/str.rb
CHANGED
@@ -10,29 +10,27 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
10
10
|
super()
|
11
11
|
|
12
12
|
@str = str.to_s
|
13
|
+
@len = str.size
|
13
14
|
@error_msgs = {
|
14
15
|
:premature => "Premature end of input",
|
15
16
|
:failed => "Expected #{str.inspect}, but got "
|
16
17
|
}
|
17
18
|
end
|
18
19
|
|
19
|
-
def try(source, context)
|
20
|
-
|
21
|
-
# contents of parslets inner loop. Changes here affect parslets speed
|
22
|
-
# enormously.
|
23
|
-
error_pos = source.pos
|
24
|
-
s = source.read(str.bytesize)
|
25
|
-
|
26
|
-
return success(s) if s == str
|
20
|
+
def try(source, context)
|
21
|
+
return succ(source.consume(@len)) if source.matches?(str)
|
27
22
|
|
28
|
-
# assert: s != str
|
29
|
-
|
30
23
|
# Failures:
|
31
|
-
return
|
32
|
-
|
24
|
+
return context.err(self, source, @error_msgs[:premature]) \
|
25
|
+
if source.chars_left<@len
|
26
|
+
|
27
|
+
error_pos = source.pos
|
28
|
+
return context.err_at(
|
29
|
+
self, source,
|
30
|
+
[@error_msgs[:failed], source.consume(@len)], error_pos)
|
33
31
|
end
|
34
32
|
|
35
|
-
def to_s_inner(prec)
|
33
|
+
def to_s_inner(prec)
|
36
34
|
"'#{str}'"
|
37
35
|
end
|
38
36
|
end
|
data/lib/parslet/cause.rb
CHANGED
@@ -1,14 +1,48 @@
|
|
1
1
|
module Parslet
|
2
|
-
#
|
3
|
-
#
|
2
|
+
# Represents a cause why a parse did fail. A lot of these objects are
|
3
|
+
# constructed - not all of the causes turn out to be failures for the whole
|
4
|
+
# parse.
|
4
5
|
#
|
5
|
-
class Cause
|
6
|
-
|
6
|
+
class Cause
|
7
|
+
def initialize(message, source, pos, children)
|
8
|
+
@message, @source, @pos, @children =
|
9
|
+
message, source, pos, children
|
10
|
+
end
|
11
|
+
|
12
|
+
# @return [String, Array] A string or an array of message pieces that
|
13
|
+
# provide failure information. Use #to_s to get a formatted string.
|
14
|
+
attr_reader :message
|
15
|
+
|
16
|
+
# @return [Parslet::Source] Source that was parsed when this error
|
17
|
+
# happend. Mainly used for line number information.
|
18
|
+
attr_reader :source
|
19
|
+
|
20
|
+
# Location of the error.
|
21
|
+
#
|
22
|
+
# @return [Fixnum] Position where the error happened. (character offset)
|
23
|
+
attr_reader :pos
|
24
|
+
|
25
|
+
# When this cause is part of a tree of error causes: child nodes for this
|
26
|
+
# node. Very often carries the reasons for this cause.
|
27
|
+
#
|
28
|
+
# @return [Array<Parslet::Cause>] A list of reasons for this cause.
|
29
|
+
def children
|
30
|
+
@children ||= []
|
31
|
+
end
|
32
|
+
|
33
|
+
# Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
|
7
34
|
# override the position of the +source+. This method returns an object
|
8
35
|
# that can be turned into a string using #to_s.
|
9
36
|
#
|
10
|
-
|
11
|
-
|
37
|
+
# @param source [Parslet::Source] source that was parsed when this error
|
38
|
+
# happened
|
39
|
+
# @param pos [Fixnum] position of error
|
40
|
+
# @param str [String, Array<String>] message parts
|
41
|
+
# @param children [Array<Parslet::Cause>] child nodes for this error tree
|
42
|
+
# @return [Parslet::Cause] a new instance of {Parslet::Cause}
|
43
|
+
#
|
44
|
+
def self.format(source, pos, str, children=[])
|
45
|
+
self.new(str, source, pos, children)
|
12
46
|
end
|
13
47
|
|
14
48
|
def to_s
|
@@ -37,12 +71,9 @@ module Parslet
|
|
37
71
|
recursive_ascii_tree(self, io, [true]) }.
|
38
72
|
string
|
39
73
|
end
|
40
|
-
|
41
|
-
def children
|
42
|
-
@children ||= Array.new
|
43
|
-
end
|
74
|
+
|
44
75
|
private
|
45
|
-
def recursive_ascii_tree(node, stream, curved)
|
76
|
+
def recursive_ascii_tree(node, stream, curved)
|
46
77
|
append_prefix(stream, curved)
|
47
78
|
stream.puts node.to_s
|
48
79
|
|
@@ -52,8 +83,9 @@ module Parslet
|
|
52
83
|
recursive_ascii_tree(child, stream, curved + [last_child])
|
53
84
|
end
|
54
85
|
end
|
55
|
-
def append_prefix(stream, curved)
|
56
|
-
curved
|
86
|
+
def append_prefix(stream, curved)
|
87
|
+
return if curved.size < 2
|
88
|
+
curved[1..-2].each do |c|
|
57
89
|
stream.print c ? " " : "| "
|
58
90
|
end
|
59
91
|
stream.print curved.last ? "`- " : "|- "
|