parslet 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
data/lib/parslet/convenience.rb
CHANGED
@@ -5,8 +5,7 @@ class Parslet::Atoms::Base
|
|
5
5
|
# begin
|
6
6
|
# tree = parser.parse('something')
|
7
7
|
# rescue Parslet::ParseFailed => error
|
8
|
-
# puts
|
9
|
-
# puts parser.error_tree
|
8
|
+
# puts parser.cause.ascii_tree
|
10
9
|
# end
|
11
10
|
#
|
12
11
|
# into a convenient method.
|
@@ -23,13 +22,14 @@ class Parslet::Atoms::Base
|
|
23
22
|
#
|
24
23
|
# FooParser.new.parse_with_debug('bar')
|
25
24
|
#
|
26
|
-
|
27
|
-
|
25
|
+
# @see Parslet::Atoms::Base#parse
|
26
|
+
#
|
27
|
+
def parse_with_debug str, opts={}
|
28
|
+
parse str, opts
|
28
29
|
rescue Parslet::UnconsumedInput => error
|
29
30
|
puts error
|
30
31
|
rescue Parslet::ParseFailed => error
|
31
|
-
puts error
|
32
|
-
puts error_tree
|
32
|
+
puts error.cause.ascii_tree
|
33
33
|
end
|
34
34
|
|
35
35
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module Parslet
|
2
|
+
module ErrorReporter
|
3
|
+
# Instead of reporting the latest error that happens like {Tree} does,
|
4
|
+
# this class reports the deepest error. Depth is defined here as how
|
5
|
+
# advanced into the input an error happens. The errors close to the
|
6
|
+
# greatest depth tend to be more relevant to the end user, since they
|
7
|
+
# specify what could be done to make them go away.
|
8
|
+
#
|
9
|
+
# More specifically, errors produced by this reporter won't be related to
|
10
|
+
# the structure of the grammar at all. The positions of the errors will
|
11
|
+
# be advanced and convey at every grammar level what the deepest rule
|
12
|
+
# was to fail.
|
13
|
+
#
|
14
|
+
class Deepest
|
15
|
+
def initialize
|
16
|
+
@deepest_cause = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# Produces an error cause that combines the message at the current level
|
20
|
+
# with the errors that happened at a level below (children).
|
21
|
+
#
|
22
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
23
|
+
# @param source [Source] Source that we're using for this parse. (line
|
24
|
+
# number information...)
|
25
|
+
# @param message [String, Array] Error message at this level.
|
26
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
27
|
+
# @return [Cause] An error tree combining children with message.
|
28
|
+
#
|
29
|
+
def err(atom, source, message, children=nil)
|
30
|
+
position = source.pos
|
31
|
+
cause = Cause.format(source, position, message, children)
|
32
|
+
return deepest(cause)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Produces an error cause that combines the message at the current level
|
36
|
+
# with the errors that happened at a level below (children).
|
37
|
+
#
|
38
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
39
|
+
# @param source [Source] Source that we're using for this parse. (line
|
40
|
+
# number information...)
|
41
|
+
# @param message [String, Array] Error message at this level.
|
42
|
+
# @param pos [Fixnum] The real position of the error.
|
43
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
44
|
+
# @return [Cause] An error tree combining children with message.
|
45
|
+
#
|
46
|
+
def err_at(atom, source, message, pos, children=nil)
|
47
|
+
position = pos
|
48
|
+
cause = Cause.format(source, position, message, children)
|
49
|
+
return deepest(cause)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the cause that is currently deepest. Mainly for specs.
|
53
|
+
#
|
54
|
+
attr_reader :deepest_cause
|
55
|
+
|
56
|
+
# Checks to see if the lineage of the cause given includes a cause with
|
57
|
+
# an error position deeper than the current deepest cause stored. If
|
58
|
+
# yes, it passes the cause through to the caller. If no, it returns the
|
59
|
+
# current deepest error that was saved as a reference.
|
60
|
+
#
|
61
|
+
def deepest(cause)
|
62
|
+
rank, leaf = deepest_child(cause)
|
63
|
+
|
64
|
+
if !deepest_cause || leaf.pos >= deepest_cause.pos
|
65
|
+
# This error reaches deeper into the input, save it as reference.
|
66
|
+
@deepest_cause = leaf
|
67
|
+
return cause
|
68
|
+
end
|
69
|
+
|
70
|
+
return deepest_cause
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
# Returns the leaf from a given error tree with the biggest rank.
|
75
|
+
#
|
76
|
+
def deepest_child(cause, rank=0)
|
77
|
+
max_child = cause
|
78
|
+
max_rank = rank
|
79
|
+
|
80
|
+
if cause.children && !cause.children.empty?
|
81
|
+
cause.children.each do |child|
|
82
|
+
c_rank, c_cause = deepest_child(child, rank+1)
|
83
|
+
|
84
|
+
if c_rank > max_rank
|
85
|
+
max_rank = c_rank
|
86
|
+
max_child = c_cause
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
return max_rank, max_child
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Parslet
|
2
|
+
module ErrorReporter
|
3
|
+
# An error reporter has two central methods, one for reporting errors at
|
4
|
+
# the current parse position (#err) and one for reporting errors at a
|
5
|
+
# given parse position (#err_at). The reporter can return an object (a
|
6
|
+
# 'cause') that will be returned to the caller along with the information
|
7
|
+
# that the parse failed.
|
8
|
+
#
|
9
|
+
# When reporting errors on the outer levels of your parser, these methods
|
10
|
+
# get passed a list of error objects ('causes') from the inner levels. In
|
11
|
+
# this default implementation, the inner levels are considered error
|
12
|
+
# subtrees and are appended to the generated tree node at each level,
|
13
|
+
# thereby constructing an error tree.
|
14
|
+
#
|
15
|
+
# This error tree will report in parallel with the grammar structure that
|
16
|
+
# failed. A one-to-one correspondence exists between each error in the
|
17
|
+
# tree and the parslet atom that produced that error.
|
18
|
+
#
|
19
|
+
# The implementor is really free to use these return values as he sees
|
20
|
+
# fit. One example would be to return an error state object from these
|
21
|
+
# methods that is then updated as errors cascade up the parse derivation
|
22
|
+
# tree.
|
23
|
+
#
|
24
|
+
class Tree
|
25
|
+
# Produces an error cause that combines the message at the current level
|
26
|
+
# with the errors that happened at a level below (children).
|
27
|
+
#
|
28
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
29
|
+
# @param source [Source] Source that we're using for this parse. (line
|
30
|
+
# number information...)
|
31
|
+
# @param message [String, Array] Error message at this level.
|
32
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
33
|
+
# @return [Cause] An error tree combining children with message.
|
34
|
+
#
|
35
|
+
def err(atom, source, message, children=nil)
|
36
|
+
position = source.pos
|
37
|
+
Cause.format(source, position, message, children)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Produces an error cause that combines the message at the current level
|
41
|
+
# with the errors that happened at a level below (children).
|
42
|
+
#
|
43
|
+
# @param atom [Parslet::Atoms::Base] parslet that failed
|
44
|
+
# @param source [Source] Source that we're using for this parse. (line
|
45
|
+
# number information...)
|
46
|
+
# @param message [String, Array] Error message at this level.
|
47
|
+
# @param pos [Fixnum] The real position of the error.
|
48
|
+
# @param children [Array] A list of errors from a deeper level (or nil).
|
49
|
+
# @return [Cause] An error tree combining children with message.
|
50
|
+
#
|
51
|
+
def err_at(atom, source, message, pos, children=nil)
|
52
|
+
position = pos
|
53
|
+
Cause.format(source, position, message, children)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/parslet/export.rb
CHANGED
@@ -68,7 +68,7 @@ class Parslet::Parser
|
|
68
68
|
|
69
69
|
# A helper class that formats Citrus and Treetop grammars as a string.
|
70
70
|
#
|
71
|
-
class PrettyPrinter
|
71
|
+
class PrettyPrinter
|
72
72
|
attr_reader :visitor
|
73
73
|
def initialize(visitor_klass)
|
74
74
|
@visitor = visitor_klass.new(self)
|
@@ -78,7 +78,7 @@ class Parslet::Parser
|
|
78
78
|
# configured in initialize. Returns the string representation of the
|
79
79
|
# Citrus or Treetop grammar.
|
80
80
|
#
|
81
|
-
def pretty_print(name, parslet)
|
81
|
+
def pretty_print(name, parslet)
|
82
82
|
output = "grammar #{name}\n"
|
83
83
|
|
84
84
|
output << rule('root', parslet)
|
@@ -111,7 +111,7 @@ class Parslet::Parser
|
|
111
111
|
# Whenever the visitor encounters an rule in a parslet, it defers the
|
112
112
|
# pretty printing of the rule by calling this method.
|
113
113
|
#
|
114
|
-
def deferred(name, content)
|
114
|
+
def deferred(name, content)
|
115
115
|
@todo ||= []
|
116
116
|
@todo << [name, content]
|
117
117
|
end
|
@@ -120,7 +120,7 @@ class Parslet::Parser
|
|
120
120
|
# transforms some of the things that Ruby allows into other patterns. If
|
121
121
|
# there is collision, we will not detect it for now.
|
122
122
|
#
|
123
|
-
def mangle_name(str)
|
123
|
+
def mangle_name(str)
|
124
124
|
str.to_s.sub(/\?$/, '_p')
|
125
125
|
end
|
126
126
|
end
|
data/lib/parslet/expression.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
class Parslet::Expression::Treetop
|
2
|
-
class Parser < Parslet::Parser
|
2
|
+
class Parser < Parslet::Parser
|
3
3
|
root(:expression)
|
4
4
|
|
5
5
|
rule(:expression) { alternatives }
|
@@ -69,7 +69,7 @@ class Parslet::Expression::Treetop
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
class Transform < Parslet::Transform
|
72
|
+
class Transform < Parslet::Transform
|
73
73
|
|
74
74
|
rule(:repetition => simple(:rep), :sign => simple(:sign)) {
|
75
75
|
min = sign=='+' ? 1 : 0
|
data/lib/parslet/parser.rb
CHANGED
@@ -57,15 +57,11 @@ class Parslet::Parser < Parslet::Atoms::Base
|
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
60
|
-
def try(source, context)
|
60
|
+
def try(source, context)
|
61
61
|
root.try(source, context)
|
62
62
|
end
|
63
63
|
|
64
|
-
def
|
65
|
-
root.error_tree
|
66
|
-
end
|
67
|
-
|
68
|
-
def to_s_inner(prec) # :nodoc:
|
64
|
+
def to_s_inner(prec)
|
69
65
|
root.to_s(prec)
|
70
66
|
end
|
71
67
|
end
|
data/lib/parslet/pattern.rb
CHANGED
@@ -28,10 +28,13 @@ class Parslet::Pattern
|
|
28
28
|
# bindings to be a hash, the mappings in it will be treated like bindings
|
29
29
|
# made during an attempted match.
|
30
30
|
#
|
31
|
-
# Example:
|
32
|
-
#
|
33
31
|
# Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
|
34
32
|
#
|
33
|
+
# @param subtree [String, Hash, Array] poro subtree returned by a parse
|
34
|
+
# @param bindings [Hash] variable bindings to be verified
|
35
|
+
# @return [Hash, nil] On success: variable bindings that allow a match. On
|
36
|
+
# failure: nil
|
37
|
+
#
|
35
38
|
def match(subtree, bindings=nil)
|
36
39
|
bindings = bindings && bindings.dup || Hash.new
|
37
40
|
return bindings if element_match(subtree, @pattern, bindings)
|
@@ -41,6 +44,8 @@ class Parslet::Pattern
|
|
41
44
|
# given by +exp+. This match must respect bindings already made in
|
42
45
|
# +bindings+. Note that bindings is carried along and modified.
|
43
46
|
#
|
47
|
+
# @api private
|
48
|
+
#
|
44
49
|
def element_match(tree, exp, bindings)
|
45
50
|
# p [:elm, tree, exp]
|
46
51
|
case [tree, exp].map { |e| e.class }
|
@@ -63,7 +68,9 @@ class Parslet::Pattern
|
|
63
68
|
end
|
64
69
|
end
|
65
70
|
|
66
|
-
|
71
|
+
# @api private
|
72
|
+
#
|
73
|
+
def element_match_binding(tree, exp, bindings)
|
67
74
|
var_name = exp.variable_name
|
68
75
|
|
69
76
|
# TODO test for the hidden :_ feature.
|
@@ -77,13 +84,17 @@ class Parslet::Pattern
|
|
77
84
|
return true
|
78
85
|
end
|
79
86
|
|
80
|
-
|
87
|
+
# @api private
|
88
|
+
#
|
89
|
+
def element_match_ary_single(sequence, exp, bindings)
|
81
90
|
return false if sequence.size != exp.size
|
82
91
|
|
83
92
|
return sequence.zip(exp).all? { |elt, subexp|
|
84
93
|
element_match(elt, subexp, bindings) }
|
85
94
|
end
|
86
95
|
|
96
|
+
# @api private
|
97
|
+
#
|
87
98
|
def element_match_hash(tree, exp, bindings)
|
88
99
|
# Early failure when one hash is bigger than the other
|
89
100
|
return false unless exp.size == tree.size
|
@@ -5,7 +5,7 @@
|
|
5
5
|
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
6
|
# whatever it looks like.
|
7
7
|
#
|
8
|
-
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
9
9
|
def variable_name
|
10
10
|
symbol
|
11
11
|
end
|
@@ -33,7 +33,7 @@ end
|
|
33
33
|
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
34
34
|
# elements or a collection of attributes.
|
35
35
|
#
|
36
|
-
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
37
37
|
def can_bind?(subtree)
|
38
38
|
not [Hash, Array].include?(subtree.class)
|
39
39
|
end
|
@@ -41,7 +41,7 @@ end
|
|
41
41
|
|
42
42
|
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
43
43
|
#
|
44
|
-
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
45
45
|
def can_bind?(subtree)
|
46
46
|
subtree.kind_of?(Array) &&
|
47
47
|
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
data/lib/parslet/rig/rspec.rb
CHANGED
@@ -7,8 +7,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
|
|
7
7
|
block ?
|
8
8
|
block.call(result) :
|
9
9
|
(as == result || as.nil?)
|
10
|
-
rescue Parslet::ParseFailed
|
11
|
-
trace =
|
10
|
+
rescue Parslet::ParseFailed => ex
|
11
|
+
trace = ex.cause.ascii_tree if opts && opts[:trace]
|
12
12
|
false
|
13
13
|
end
|
14
14
|
end
|
data/lib/parslet/slice.rb
CHANGED
@@ -98,10 +98,4 @@ class Parslet::Slice
|
|
98
98
|
def inspect
|
99
99
|
str.inspect << "@#{offset}"
|
100
100
|
end
|
101
|
-
end
|
102
|
-
|
103
|
-
# Raised when trying to do an operation on slices that cannot succeed, like
|
104
|
-
# adding non-adjacent slices. See Parslet::Slice.
|
105
|
-
#
|
106
|
-
class Parslet::InvalidSliceOperation < StandardError
|
107
101
|
end
|
data/lib/parslet/source.rb
CHANGED
@@ -4,82 +4,63 @@ require 'stringio'
|
|
4
4
|
require 'parslet/source/line_cache'
|
5
5
|
|
6
6
|
module Parslet
|
7
|
-
# Wraps the input
|
8
|
-
# smaller than what IO offers, but enhances it with a #column and #line
|
9
|
-
# method for the current position.
|
7
|
+
# Wraps the input string for parslet.
|
10
8
|
#
|
11
9
|
class Source
|
12
|
-
def initialize(
|
13
|
-
|
14
|
-
io = StringIO.new(io)
|
15
|
-
end
|
10
|
+
def initialize(str)
|
11
|
+
raise ArgumentError unless str.respond_to?(:to_str)
|
16
12
|
|
17
|
-
@
|
13
|
+
@pos = 0
|
14
|
+
@str = str
|
15
|
+
|
18
16
|
@line_cache = LineCache.new
|
17
|
+
@line_cache.scan_for_line_endings(0, @str)
|
19
18
|
end
|
20
19
|
|
21
|
-
#
|
22
|
-
# bytes end in the middle of a multibyte representation of a char, that
|
23
|
-
# char is returned fully.
|
20
|
+
# Checks if the given pattern matches at the current input position.
|
24
21
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# source.read(7) # reads 7 bytes, then to the next char boundary.
|
22
|
+
# @param pattern [Regexp, String] pattern to check for
|
23
|
+
# @return [Boolean] true if the pattern matches at #pos
|
28
24
|
#
|
29
|
-
def
|
30
|
-
|
31
|
-
read_slice(n)
|
32
|
-
end
|
33
|
-
|
34
|
-
def eof?
|
35
|
-
@io.eof?
|
36
|
-
end
|
37
|
-
def pos
|
38
|
-
@io.pos
|
39
|
-
end
|
40
|
-
def pos=(new_pos)
|
41
|
-
@io.pos = new_pos
|
25
|
+
def matches?(pattern)
|
26
|
+
@str.index(pattern, @pos) == @pos
|
42
27
|
end
|
43
|
-
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
28
|
+
alias match matches?
|
29
|
+
|
30
|
+
# Consumes n characters from the input, returning them as a slice of the
|
31
|
+
# input.
|
47
32
|
#
|
48
|
-
def
|
49
|
-
@
|
33
|
+
def consume(n)
|
34
|
+
slice_str = @str.slice(@pos, n)
|
35
|
+
slice = Parslet::Slice.new(
|
36
|
+
slice_str,
|
37
|
+
pos,
|
38
|
+
@line_cache)
|
39
|
+
|
40
|
+
@pos += slice_str.size
|
41
|
+
return slice
|
50
42
|
end
|
51
|
-
|
52
|
-
#
|
53
|
-
# by pos. If pos is nil, the current source position will be the error
|
54
|
-
# position.
|
43
|
+
|
44
|
+
# Returns how many chars remain in the input.
|
55
45
|
#
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
Cause.format(self, real_pos, message)
|
46
|
+
def chars_left
|
47
|
+
@str.size - @pos
|
60
48
|
end
|
61
|
-
|
62
|
-
private
|
63
|
-
def read_slice(needed)
|
64
|
-
start = @io.pos
|
65
|
-
buf = @io.gets(nil, needed)
|
66
|
-
|
67
|
-
# cache line ends
|
68
|
-
@line_cache.scan_for_line_endings(start, buf)
|
69
49
|
|
70
|
-
|
50
|
+
def eof?
|
51
|
+
@pos >= @str.size
|
71
52
|
end
|
72
|
-
|
73
|
-
if RUBY_VERSION !~ /^1.9/
|
74
|
-
def read_slice(needed)
|
75
|
-
start = @io.pos
|
76
|
-
buf = @io.read(needed)
|
77
53
|
|
78
|
-
|
79
|
-
|
54
|
+
# Position of the parse as a character offset into the original string.
|
55
|
+
# @note: Encodings...
|
56
|
+
attr_accessor :pos
|
80
57
|
|
81
|
-
|
82
|
-
|
58
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
59
|
+
# given, line/column information is returned for the current position
|
60
|
+
# given by #pos.
|
61
|
+
#
|
62
|
+
def line_and_column(position=nil)
|
63
|
+
@line_cache.line_and_column(position || self.pos)
|
83
64
|
end
|
84
65
|
end
|
85
66
|
end
|