parslet 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
data/lib/parslet/convenience.rb
CHANGED
@@ -5,8 +5,7 @@ class Parslet::Atoms::Base
|
|
5
5
|
# begin
|
6
6
|
# tree = parser.parse('something')
|
7
7
|
# rescue Parslet::ParseFailed => error
|
8
|
-
# puts
|
9
|
-
# puts parser.error_tree
|
8
|
+
# puts parser.cause.ascii_tree
|
10
9
|
# end
|
11
10
|
#
|
12
11
|
# into a convenient method.
|
@@ -23,13 +22,14 @@ class Parslet::Atoms::Base
|
|
23
22
|
#
|
24
23
|
# FooParser.new.parse_with_debug('bar')
|
25
24
|
#
|
26
|
-
|
27
|
-
|
25
|
+
# @see Parslet::Atoms::Base#parse
|
26
|
+
#
|
27
|
+
def parse_with_debug str, opts={}
|
28
|
+
parse str, opts
|
28
29
|
rescue Parslet::UnconsumedInput => error
|
29
30
|
puts error
|
30
31
|
rescue Parslet::ParseFailed => error
|
31
|
-
puts error
|
32
|
-
puts error_tree
|
32
|
+
puts error.cause.ascii_tree
|
33
33
|
end
|
34
34
|
|
35
35
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Parslet
|
2
|
+
module ErrorReporter
|
3
|
+
# Instead of reporting the latest error that happens like {Tree} does,
|
4
|
+
# this class reports the deepest error. Depth is defined here as how
|
5
|
+
# advanced into the input an error happens. The errors close to the
|
6
|
+
# greatest depth tend to be more relevant to the end user, since they
|
7
|
+
# specify what could be done to make them go away.
|
8
|
+
#
|
9
|
+
# More specifically, errors produced by this reporter won't be related to
|
10
|
+
# the structure of the grammar at all. The positions of the errors will
|
11
|
+
# be advanced and convey at every grammar level what the deepest rule
|
12
|
+
# was to fail.
|
13
|
+
#
|
14
|
+
class Deepest
|
15
|
+
def initialize
|
16
|
+
@deepest_cause = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# Produces an error cause that combines the message at the current level
|
20
|
+
# with the errors that happened at a level below (children).
|
21
|
+
#
|
22
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
23
|
+
# @param source [Source] Source that we're using for this parse. (line
|
24
|
+
# number information...)
|
25
|
+
# @param message [String, Array] Error message at this level.
|
26
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
27
|
+
# @return [Cause] An error tree combining children with message.
|
28
|
+
#
|
29
|
+
def err(atom, source, message, children=nil)
|
30
|
+
position = source.pos
|
31
|
+
cause = Cause.format(source, position, message, children)
|
32
|
+
return deepest(cause)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Produces an error cause that combines the message at the current level
|
36
|
+
# with the errors that happened at a level below (children).
|
37
|
+
#
|
38
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
39
|
+
# @param source [Source] Source that we're using for this parse. (line
|
40
|
+
# number information...)
|
41
|
+
# @param message [String, Array] Error message at this level.
|
42
|
+
# @param pos [Fixnum] The real position of the error.
|
43
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
44
|
+
# @return [Cause] An error tree combining children with message.
|
45
|
+
#
|
46
|
+
def err_at(atom, source, message, pos, children=nil)
|
47
|
+
position = pos
|
48
|
+
cause = Cause.format(source, position, message, children)
|
49
|
+
return deepest(cause)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the cause that is currently deepest. Mainly for specs.
|
53
|
+
#
|
54
|
+
attr_reader :deepest_cause
|
55
|
+
|
56
|
+
# Checks to see if the lineage of the cause given includes a cause with
|
57
|
+
# an error position deeper than the current deepest cause stored. If
|
58
|
+
# yes, it passes the cause through to the caller. If no, it returns the
|
59
|
+
# current deepest error that was saved as a reference.
|
60
|
+
#
|
61
|
+
def deepest(cause)
|
62
|
+
rank, leaf = deepest_child(cause)
|
63
|
+
|
64
|
+
if !deepest_cause || leaf.pos >= deepest_cause.pos
|
65
|
+
# This error reaches deeper into the input, save it as reference.
|
66
|
+
@deepest_cause = leaf
|
67
|
+
return cause
|
68
|
+
end
|
69
|
+
|
70
|
+
return deepest_cause
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
# Returns the leaf from a given error tree with the biggest rank.
|
75
|
+
#
|
76
|
+
def deepest_child(cause, rank=0)
|
77
|
+
max_child = cause
|
78
|
+
max_rank = rank
|
79
|
+
|
80
|
+
if cause.children && !cause.children.empty?
|
81
|
+
cause.children.each do |child|
|
82
|
+
c_rank, c_cause = deepest_child(child, rank+1)
|
83
|
+
|
84
|
+
if c_rank > max_rank
|
85
|
+
max_rank = c_rank
|
86
|
+
max_child = c_cause
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
return max_rank, max_child
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Parslet
|
2
|
+
module ErrorReporter
|
3
|
+
# An error reporter has two central methods, one for reporting errors at
|
4
|
+
# the current parse position (#err) and one for reporting errors at a
|
5
|
+
# given parse position (#err_at). The reporter can return an object (a
|
6
|
+
# 'cause') that will be returned to the caller along with the information
|
7
|
+
# that the parse failed.
|
8
|
+
#
|
9
|
+
# When reporting errors on the outer levels of your parser, these methods
|
10
|
+
# get passed a list of error objects ('causes') from the inner levels. In
|
11
|
+
# this default implementation, the inner levels are considered error
|
12
|
+
# subtrees and are appended to the generated tree node at each level,
|
13
|
+
# thereby constructing an error tree.
|
14
|
+
#
|
15
|
+
# This error tree will report in parallel with the grammar structure that
|
16
|
+
# failed. A one-to-one correspondence exists between each error in the
|
17
|
+
# tree and the parslet atom that produced that error.
|
18
|
+
#
|
19
|
+
# The implementor is really free to use these return values as he sees
|
20
|
+
# fit. One example would be to return an error state object from these
|
21
|
+
# methods that is then updated as errors cascade up the parse derivation
|
22
|
+
# tree.
|
23
|
+
#
|
24
|
+
class Tree
|
25
|
+
# Produces an error cause that combines the message at the current level
|
26
|
+
# with the errors that happened at a level below (children).
|
27
|
+
#
|
28
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
29
|
+
# @param source [Source] Source that we're using for this parse. (line
|
30
|
+
# number information...)
|
31
|
+
# @param message [String, Array] Error message at this level.
|
32
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
33
|
+
# @return [Cause] An error tree combining children with message.
|
34
|
+
#
|
35
|
+
def err(atom, source, message, children=nil)
|
36
|
+
position = source.pos
|
37
|
+
Cause.format(source, position, message, children)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Produces an error cause that combines the message at the current level
|
41
|
+
# with the errors that happened at a level below (children).
|
42
|
+
#
|
43
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
44
|
+
# @param source [Source] Source that we're using for this parse. (line
|
45
|
+
# number information...)
|
46
|
+
# @param message [String, Array] Error message at this level.
|
47
|
+
# @param pos [Fixnum] The real position of the error.
|
48
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
49
|
+
# @return [Cause] An error tree combining children with message.
|
50
|
+
#
|
51
|
+
def err_at(atom, source, message, pos, children=nil)
|
52
|
+
position = pos
|
53
|
+
Cause.format(source, position, message, children)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/parslet/export.rb
CHANGED
@@ -68,7 +68,7 @@ class Parslet::Parser
|
|
68
68
|
|
69
69
|
# A helper class that formats Citrus and Treetop grammars as a string.
|
70
70
|
#
|
71
|
-
class PrettyPrinter
|
71
|
+
class PrettyPrinter
|
72
72
|
attr_reader :visitor
|
73
73
|
def initialize(visitor_klass)
|
74
74
|
@visitor = visitor_klass.new(self)
|
@@ -78,7 +78,7 @@ class Parslet::Parser
|
|
78
78
|
# configured in initialize. Returns the string representation of the
|
79
79
|
# Citrus or Treetop grammar.
|
80
80
|
#
|
81
|
-
def pretty_print(name, parslet)
|
81
|
+
def pretty_print(name, parslet)
|
82
82
|
output = "grammar #{name}\n"
|
83
83
|
|
84
84
|
output << rule('root', parslet)
|
@@ -111,7 +111,7 @@ class Parslet::Parser
|
|
111
111
|
# Whenever the visitor encounters an rule in a parslet, it defers the
|
112
112
|
# pretty printing of the rule by calling this method.
|
113
113
|
#
|
114
|
-
def deferred(name, content)
|
114
|
+
def deferred(name, content)
|
115
115
|
@todo ||= []
|
116
116
|
@todo << [name, content]
|
117
117
|
end
|
@@ -120,7 +120,7 @@ class Parslet::Parser
|
|
120
120
|
# transforms some of the things that Ruby allows into other patterns. If
|
121
121
|
# there is collision, we will not detect it for now.
|
122
122
|
#
|
123
|
-
def mangle_name(str)
|
123
|
+
def mangle_name(str)
|
124
124
|
str.to_s.sub(/\?$/, '_p')
|
125
125
|
end
|
126
126
|
end
|
data/lib/parslet/expression.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
class Parslet::Expression::Treetop
|
2
|
-
class Parser < Parslet::Parser
|
2
|
+
class Parser < Parslet::Parser
|
3
3
|
root(:expression)
|
4
4
|
|
5
5
|
rule(:expression) { alternatives }
|
@@ -69,7 +69,7 @@ class Parslet::Expression::Treetop
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
class Transform < Parslet::Transform
|
72
|
+
class Transform < Parslet::Transform
|
73
73
|
|
74
74
|
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
|
75
75
|
min = sign=='+' ? 1 : 0
|
data/lib/parslet/parser.rb
CHANGED
@@ -57,15 +57,11 @@ class Parslet::Parser < Parslet::Atoms::Base
|
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
60
|
-
def try(source, context)
|
60
|
+
def try(source, context)
|
61
61
|
root.try(source, context)
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
65
|
-
root.error_tree
|
66
|
-
end
|
67
|
-
|
68
|
-
def to_s_inner(prec) # :nodoc:
|
64
|
+
def to_s_inner(prec)
|
69
65
|
root.to_s(prec)
|
70
66
|
end
|
71
67
|
end
|
data/lib/parslet/pattern.rb
CHANGED
@@ -28,10 +28,13 @@ class Parslet::Pattern
|
|
28
28
|
# bindings to be a hash, the mappings in it will be treated like bindings
|
29
29
|
# made during an attempted match.
|
30
30
|
#
|
31
|
-
# Example:
|
32
|
-
#
|
33
31
|
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
34
32
|
#
|
33
|
+
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
34
|
+
# @param bindings [Hash] variable bindings to be verified
|
35
|
+
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
36
|
+
# failure: nil
|
37
|
+
#
|
35
38
|
def match(subtree, bindings=nil)
|
36
39
|
bindings = bindings && bindings.dup || Hash.new
|
37
40
|
return bindings if element_match(subtree, @pattern, bindings)
|
@@ -41,6 +44,8 @@ class Parslet::Pattern
|
|
41
44
|
# given by +exp+. This match must respect bindings already made in
|
42
45
|
# +bindings+. Note that bindings is carried along and modified.
|
43
46
|
#
|
47
|
+
# @api private
|
48
|
+
#
|
44
49
|
def element_match(tree, exp, bindings)
|
45
50
|
# p [:elm, tree, exp]
|
46
51
|
case [tree, exp].map { |e| e.class }
|
@@ -63,7 +68,9 @@ class Parslet::Pattern
|
|
63
68
|
end
|
64
69
|
end
|
65
70
|
|
66
|
-
|
71
|
+
# @api private
|
72
|
+
#
|
73
|
+
def element_match_binding(tree, exp, bindings)
|
67
74
|
var_name = exp.variable_name
|
68
75
|
|
69
76
|
# TODO test for the hidden :_ feature.
|
@@ -77,13 +84,17 @@ class Parslet::Pattern
|
|
77
84
|
return true
|
78
85
|
end
|
79
86
|
|
80
|
-
|
87
|
+
# @api private
|
88
|
+
#
|
89
|
+
def element_match_ary_single(sequence, exp, bindings)
|
81
90
|
return false if sequence.size != exp.size
|
82
91
|
|
83
92
|
return sequence.zip(exp).all? { |elt, subexp|
|
84
93
|
element_match(elt, subexp, bindings) }
|
85
94
|
end
|
86
95
|
|
96
|
+
# @api private
|
97
|
+
#
|
87
98
|
def element_match_hash(tree, exp, bindings)
|
88
99
|
# Early failure when one hash is bigger than the other
|
89
100
|
return false unless exp.size == tree.size
|
@@ -5,7 +5,7 @@
|
|
5
5
|
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
6
|
# whatever it looks like.
|
7
7
|
#
|
8
|
-
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
9
9
|
def variable_name
|
10
10
|
symbol
|
11
11
|
end
|
@@ -33,7 +33,7 @@ end
|
|
33
33
|
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
34
34
|
# elements or a collection of attributes.
|
35
35
|
#
|
36
|
-
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
37
37
|
def can_bind?(subtree)
|
38
38
|
not [Hash, Array].include?(subtree.class)
|
39
39
|
end
|
@@ -41,7 +41,7 @@ end
|
|
41
41
|
|
42
42
|
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
43
43
|
#
|
44
|
-
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
45
45
|
def can_bind?(subtree)
|
46
46
|
subtree.kind_of?(Array) &&
|
47
47
|
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
data/lib/parslet/rig/rspec.rb
CHANGED
@@ -7,8 +7,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
|
|
7
7
|
block ?
|
8
8
|
block.call(result) :
|
9
9
|
(as == result || as.nil?)
|
10
|
-
rescue Parslet::ParseFailed
|
11
|
-
trace =
|
10
|
+
rescue Parslet::ParseFailed => ex
|
11
|
+
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
12
12
|
false
|
13
13
|
end
|
14
14
|
end
|
data/lib/parslet/slice.rb
CHANGED
@@ -98,10 +98,4 @@ class Parslet::Slice
|
|
98
98
|
def inspect
|
99
99
|
str.inspect << "@#{offset}"
|
100
100
|
end
|
101
|
-
end
|
102
|
-
|
103
|
-
# Raised when trying to do an operation on slices that cannot succeed, like
|
104
|
-
# adding non-adjacent slices. See Parslet::Slice.
|
105
|
-
#
|
106
|
-
class Parslet::InvalidSliceOperation < StandardError
|
107
101
|
end
|
data/lib/parslet/source.rb
CHANGED
@@ -4,82 +4,63 @@ require 'stringio'
|
|
4
4
|
require 'parslet/source/line_cache'
|
5
5
|
|
6
6
|
module Parslet
|
7
|
-
# Wraps the input
|
8
|
-
# smaller than what IO offers, but enhances it with a #column and #line
|
9
|
-
# method for the current position.
|
7
|
+
# Wraps the input string for parslet.
|
10
8
|
#
|
11
9
|
class Source
|
12
|
-
def initialize(
|
13
|
-
|
14
|
-
io = StringIO.new(io)
|
15
|
-
end
|
10
|
+
def initialize(str)
|
11
|
+
raise ArgumentError unless str.respond_to?(:to_str)
|
16
12
|
|
17
|
-
@
|
13
|
+
@pos = 0
|
14
|
+
@str = str
|
15
|
+
|
18
16
|
@line_cache = LineCache.new
|
17
|
+
@line_cache.scan_for_line_endings(0, @str)
|
19
18
|
end
|
20
19
|
|
21
|
-
#
|
22
|
-
# bytes end in the middle of a multibyte representation of a char, that
|
23
|
-
# char is returned fully.
|
20
|
+
# Checks if the given pattern matches at the current input position.
|
24
21
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# source.read(7) # reads 7 bytes, then to the next char boundary.
|
22
|
+
# @param pattern [Regexp, String] pattern to check for
|
23
|
+
# @return [Boolean] true if the pattern matches at #pos
|
28
24
|
#
|
29
|
-
def
|
30
|
-
|
31
|
-
read_slice(n)
|
32
|
-
end
|
33
|
-
|
34
|
-
def eof?
|
35
|
-
@io.eof?
|
36
|
-
end
|
37
|
-
def pos
|
38
|
-
@io.pos
|
39
|
-
end
|
40
|
-
def pos=(new_pos)
|
41
|
-
@io.pos = new_pos
|
25
|
+
def matches?(pattern)
|
26
|
+
@str.index(pattern, @pos) == @pos
|
42
27
|
end
|
43
|
-
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
28
|
+
alias match matches?
|
29
|
+
|
30
|
+
# Consumes n characters from the input, returning them as a slice of the
|
31
|
+
# input.
|
47
32
|
#
|
48
|
-
def
|
49
|
-
@
|
33
|
+
def consume(n)
|
34
|
+
slice_str = @str.slice(@pos, n)
|
35
|
+
slice = Parslet::Slice.new(
|
36
|
+
slice_str,
|
37
|
+
pos,
|
38
|
+
@line_cache)
|
39
|
+
|
40
|
+
@pos += slice_str.size
|
41
|
+
return slice
|
50
42
|
end
|
51
|
-
|
52
|
-
#
|
53
|
-
# by pos. If pos is nil, the current source position will be the error
|
54
|
-
# position.
|
43
|
+
|
44
|
+
# Returns how many chars remain in the input.
|
55
45
|
#
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
Cause.format(self, real_pos, message)
|
46
|
+
def chars_left
|
47
|
+
@str.size - @pos
|
60
48
|
end
|
61
|
-
|
62
|
-
private
|
63
|
-
def read_slice(needed)
|
64
|
-
start = @io.pos
|
65
|
-
buf = @io.gets(nil, needed)
|
66
|
-
|
67
|
-
# cache line ends
|
68
|
-
@line_cache.scan_for_line_endings(start, buf)
|
69
49
|
|
70
|
-
|
50
|
+
def eof?
|
51
|
+
@pos >= @str.size
|
71
52
|
end
|
72
|
-
|
73
|
-
if RUBY_VERSION !~ /^1.9/
|
74
|
-
def read_slice(needed)
|
75
|
-
start = @io.pos
|
76
|
-
buf = @io.read(needed)
|
77
53
|
|
78
|
-
|
79
|
-
|
54
|
+
# Position of the parse as a character offset into the original string.
|
55
|
+
# @note: Encodings...
|
56
|
+
attr_accessor :pos
|
80
57
|
|
81
|
-
|
82
|
-
|
58
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
59
|
+
# given, line/column information is returned for the current position
|
60
|
+
# given by #pos.
|
61
|
+
#
|
62
|
+
def line_and_column(position=nil)
|
63
|
+
@line_cache.line_and_column(position || self.pos)
|
83
64
|
end
|
84
65
|
end
|
85
66
|
end
|