parslet 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -5,8 +5,7 @@ class Parslet::Atoms::Base
5
5
  # begin
6
6
  # tree = parser.parse('something')
7
7
  # rescue Parslet::ParseFailed => error
8
- # puts error
9
- # puts parser.error_tree
8
+ # puts parser.cause.ascii_tree
10
9
  # end
11
10
  #
12
11
  # into a convenient method.
@@ -23,13 +22,14 @@ class Parslet::Atoms::Base
23
22
  #
24
23
  # FooParser.new.parse_with_debug('bar')
25
24
  #
26
- def parse_with_debug str
27
- parse str
25
+ # @see Parslet::Atoms::Base#parse
26
+ #
27
+ def parse_with_debug str, opts={}
28
+ parse str, opts
28
29
  rescue Parslet::UnconsumedInput => error
29
30
  puts error
30
31
  rescue Parslet::ParseFailed => error
31
- puts error
32
- puts error_tree
32
+ puts error.cause.ascii_tree
33
33
  end
34
34
 
35
35
  end
@@ -0,0 +1,7 @@
1
+ # A namespace for all error reporters.
2
+ #
3
+ module Parslet::ErrorReporter
4
+ end
5
+
6
+ require 'parslet/error_reporter/tree'
7
+ require 'parslet/error_reporter/deepest'
@@ -0,0 +1,95 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # Instead of reporting the latest error that happens like {Tree} does,
4
+ # this class reports the deepest error. Depth is defined here as how
5
+ # advanced into the input an error happens. The errors close to the
6
+ # greatest depth tend to be more relevant to the end user, since they
7
+ # specify what could be done to make them go away.
8
+ #
9
+ # More specifically, errors produced by this reporter won't be related to
10
+ # the structure of the grammar at all. The positions of the errors will
11
+ # be advanced and convey at every grammar level what the deepest rule
12
+ # was to fail.
13
+ #
14
+ class Deepest
15
+ def initialize
16
+ @deepest_cause = nil
17
+ end
18
+
19
+ # Produces an error cause that combines the message at the current level
20
+ # with the errors that happened at a level below (children).
21
+ #
22
+ # @param atom [Parslet::Atoms::Base] parslet that failed
23
+ # @param source [Source] Source that we're using for this parse. (line
24
+ # number information...)
25
+ # @param message [String, Array] Error message at this level.
26
+ # @param children [Array] A list of errors from a deeper level (or nil).
27
+ # @return [Cause] An error tree combining children with message.
28
+ #
29
+ def err(atom, source, message, children=nil)
30
+ position = source.pos
31
+ cause = Cause.format(source, position, message, children)
32
+ return deepest(cause)
33
+ end
34
+
35
+ # Produces an error cause that combines the message at the current level
36
+ # with the errors that happened at a level below (children).
37
+ #
38
+ # @param atom [Parslet::Atoms::Base] parslet that failed
39
+ # @param source [Source] Source that we're using for this parse. (line
40
+ # number information...)
41
+ # @param message [String, Array] Error message at this level.
42
+ # @param pos [Fixnum] The real position of the error.
43
+ # @param children [Array] A list of errors from a deeper level (or nil).
44
+ # @return [Cause] An error tree combining children with message.
45
+ #
46
+ def err_at(atom, source, message, pos, children=nil)
47
+ position = pos
48
+ cause = Cause.format(source, position, message, children)
49
+ return deepest(cause)
50
+ end
51
+
52
+ # Returns the cause that is currently deepest. Mainly for specs.
53
+ #
54
+ attr_reader :deepest_cause
55
+
56
+ # Checks to see if the lineage of the cause given includes a cause with
57
+ # an error position deeper than the current deepest cause stored. If
58
+ # yes, it passes the cause through to the caller. If no, it returns the
59
+ # current deepest error that was saved as a reference.
60
+ #
61
+ def deepest(cause)
62
+ rank, leaf = deepest_child(cause)
63
+
64
+ if !deepest_cause || leaf.pos >= deepest_cause.pos
65
+ # This error reaches deeper into the input, save it as reference.
66
+ @deepest_cause = leaf
67
+ return cause
68
+ end
69
+
70
+ return deepest_cause
71
+ end
72
+
73
+ private
74
+ # Returns the leaf from a given error tree with the biggest rank.
75
+ #
76
+ def deepest_child(cause, rank=0)
77
+ max_child = cause
78
+ max_rank = rank
79
+
80
+ if cause.children && !cause.children.empty?
81
+ cause.children.each do |child|
82
+ c_rank, c_cause = deepest_child(child, rank+1)
83
+
84
+ if c_rank > max_rank
85
+ max_rank = c_rank
86
+ max_child = c_cause
87
+ end
88
+ end
89
+ end
90
+
91
+ return max_rank, max_child
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,57 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # An error reporter has two central methods, one for reporting errors at
4
+ # the current parse position (#err) and one for reporting errors at a
5
+ # given parse position (#err_at). The reporter can return an object (a
6
+ # 'cause') that will be returned to the caller along with the information
7
+ # that the parse failed.
8
+ #
9
+ # When reporting errors on the outer levels of your parser, these methods
10
+ # get passed a list of error objects ('causes') from the inner levels. In
11
+ # this default implementation, the inner levels are considered error
12
+ # subtrees and are appended to the generated tree node at each level,
13
+ # thereby constructing an error tree.
14
+ #
15
+ # This error tree will report in parallel with the grammar structure that
16
+ # failed. A one-to-one correspondence exists between each error in the
17
+ # tree and the parslet atom that produced that error.
18
+ #
19
+ # The implementor is really free to use these return values as he sees
20
+ # fit. One example would be to return an error state object from these
21
+ # methods that is then updated as errors cascade up the parse derivation
22
+ # tree.
23
+ #
24
+ class Tree
25
+ # Produces an error cause that combines the message at the current level
26
+ # with the errors that happened at a level below (children).
27
+ #
28
+ # @param atom [Parslet::Atoms::Base] parslet that failed
29
+ # @param source [Source] Source that we're using for this parse. (line
30
+ # number information...)
31
+ # @param message [String, Array] Error message at this level.
32
+ # @param children [Array] A list of errors from a deeper level (or nil).
33
+ # @return [Cause] An error tree combining children with message.
34
+ #
35
+ def err(atom, source, message, children=nil)
36
+ position = source.pos
37
+ Cause.format(source, position, message, children)
38
+ end
39
+
40
+ # Produces an error cause that combines the message at the current level
41
+ # with the errors that happened at a level below (children).
42
+ #
43
+ # @param atom [Parslet::Atoms::Base] parslet that failed
44
+ # @param source [Source] Source that we're using for this parse. (line
45
+ # number information...)
46
+ # @param message [String, Array] Error message at this level.
47
+ # @param pos [Fixnum] The real position of the error.
48
+ # @param children [Array] A list of errors from a deeper level (or nil).
49
+ # @return [Cause] An error tree combining children with message.
50
+ #
51
+ def err_at(atom, source, message, pos, children=nil)
52
+ position = pos
53
+ Cause.format(source, position, message, children)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -68,7 +68,7 @@ class Parslet::Parser
68
68
 
69
69
  # A helper class that formats Citrus and Treetop grammars as a string.
70
70
  #
71
- class PrettyPrinter # :nodoc:
71
+ class PrettyPrinter
72
72
  attr_reader :visitor
73
73
  def initialize(visitor_klass)
74
74
  @visitor = visitor_klass.new(self)
@@ -78,7 +78,7 @@ class Parslet::Parser
78
78
  # configured in initialize. Returns the string representation of the
79
79
  # Citrus or Treetop grammar.
80
80
  #
81
- def pretty_print(name, parslet) # :nodoc:
81
+ def pretty_print(name, parslet)
82
82
  output = "grammar #{name}\n"
83
83
 
84
84
  output << rule('root', parslet)
@@ -111,7 +111,7 @@ class Parslet::Parser
111
111
  # Whenever the visitor encounters an rule in a parslet, it defers the
112
112
  # pretty printing of the rule by calling this method.
113
113
  #
114
- def deferred(name, content) # :nodoc:
114
+ def deferred(name, content)
115
115
  @todo ||= []
116
116
  @todo << [name, content]
117
117
  end
@@ -120,7 +120,7 @@ class Parslet::Parser
120
120
  # transforms some of the things that Ruby allows into other patterns. If
121
121
  # there is collision, we will not detect it for now.
122
122
  #
123
- def mangle_name(str) # :nodoc:
123
+ def mangle_name(str)
124
124
  str.to_s.sub(/\?$/, '_p')
125
125
  end
126
126
  end
@@ -6,8 +6,6 @@
6
6
  # This can be viewed as an extension to parslet and might even be hosted in
7
7
  # its own gem one fine day.
8
8
  #
9
- # NOT FINISHED & EXPERIMENTAL
10
- #
11
9
  class Parslet::Expression
12
10
  include Parslet
13
11
 
@@ -1,5 +1,5 @@
1
1
  class Parslet::Expression::Treetop
2
- class Parser < Parslet::Parser # :nodoc:
2
+ class Parser < Parslet::Parser
3
3
  root(:expression)
4
4
 
5
5
  rule(:expression) { alternatives }
@@ -69,7 +69,7 @@ class Parslet::Expression::Treetop
69
69
  end
70
70
  end
71
71
 
72
- class Transform < Parslet::Transform # :nodoc:
72
+ class Transform < Parslet::Transform
73
73
 
74
74
  rule(:repetition => simple(:rep), :sign => simple(:sign)) {
75
75
  min = sign=='+' ? 1 : 0
@@ -57,15 +57,11 @@ class Parslet::Parser < Parslet::Atoms::Base
57
57
  end
58
58
  end
59
59
 
60
- def try(source, context) # :nodoc:
60
+ def try(source, context)
61
61
  root.try(source, context)
62
62
  end
63
63
 
64
- def error_tree # :nodoc:
65
- root.error_tree
66
- end
67
-
68
- def to_s_inner(prec) # :nodoc:
64
+ def to_s_inner(prec)
69
65
  root.to_s(prec)
70
66
  end
71
67
  end
@@ -28,10 +28,13 @@ class Parslet::Pattern
28
28
  # bindings to be a hash, the mappings in it will be treated like bindings
29
29
  # made during an attempted match.
30
30
  #
31
- # Example:
32
- #
33
31
  # Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
34
32
  #
33
+ # @param subtree [String, Hash, Array] poro subtree returned by a parse
34
+ # @param bindings [Hash] variable bindings to be verified
35
+ # @return [Hash, nil] On success: variable bindings that allow a match. On
36
+ # failure: nil
37
+ #
35
38
  def match(subtree, bindings=nil)
36
39
  bindings = bindings && bindings.dup || Hash.new
37
40
  return bindings if element_match(subtree, @pattern, bindings)
@@ -41,6 +44,8 @@ class Parslet::Pattern
41
44
  # given by +exp+. This match must respect bindings already made in
42
45
  # +bindings+. Note that bindings is carried along and modified.
43
46
  #
47
+ # @api private
48
+ #
44
49
  def element_match(tree, exp, bindings)
45
50
  # p [:elm, tree, exp]
46
51
  case [tree, exp].map { |e| e.class }
@@ -63,7 +68,9 @@ class Parslet::Pattern
63
68
  end
64
69
  end
65
70
 
66
- def element_match_binding(tree, exp, bindings) # :nodoc:
71
+ # @api private
72
+ #
73
+ def element_match_binding(tree, exp, bindings)
67
74
  var_name = exp.variable_name
68
75
 
69
76
  # TODO test for the hidden :_ feature.
@@ -77,13 +84,17 @@ class Parslet::Pattern
77
84
  return true
78
85
  end
79
86
 
80
- def element_match_ary_single(sequence, exp, bindings) # :nodoc:
87
+ # @api private
88
+ #
89
+ def element_match_ary_single(sequence, exp, bindings)
81
90
  return false if sequence.size != exp.size
82
91
 
83
92
  return sequence.zip(exp).all? { |elt, subexp|
84
93
  element_match(elt, subexp, bindings) }
85
94
  end
86
95
 
96
+ # @api private
97
+ #
87
98
  def element_match_hash(tree, exp, bindings)
88
99
  # Early failure when one hash is bigger than the other
89
100
  return false unless exp.size == tree.size
@@ -5,7 +5,7 @@
5
5
  # It defines the most permissive kind of bind, the one that matches any subtree
6
6
  # whatever it looks like.
7
7
  #
8
- class Parslet::Pattern::SubtreeBind < Struct.new(:symbol) # :nodoc:
8
+ class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
9
9
  def variable_name
10
10
  symbol
11
11
  end
@@ -33,7 +33,7 @@ end
33
33
  # Binds a symbol to a simple subtree, one that is not either a sequence of
34
34
  # elements or a collection of attributes.
35
35
  #
36
- class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind # :nodoc:
36
+ class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
37
37
  def can_bind?(subtree)
38
38
  not [Hash, Array].include?(subtree.class)
39
39
  end
@@ -41,7 +41,7 @@ end
41
41
 
42
42
  # Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
43
43
  #
44
- class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind # :nodoc:
44
+ class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
45
45
  def can_bind?(subtree)
46
46
  subtree.kind_of?(Array) &&
47
47
  (not subtree.any? { |el| [Hash, Array].include?(el.class) })
@@ -7,8 +7,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
7
7
  block ?
8
8
  block.call(result) :
9
9
  (as == result || as.nil?)
10
- rescue Parslet::ParseFailed
11
- trace = parser.error_tree.ascii_tree if opts && opts[:trace]
10
+ rescue Parslet::ParseFailed => ex
11
+ trace = ex.cause.ascii_tree if opts && opts[:trace]
12
12
  false
13
13
  end
14
14
  end
data/lib/parslet/slice.rb CHANGED
@@ -98,10 +98,4 @@ class Parslet::Slice
98
98
  def inspect
99
99
  str.inspect << "@#{offset}"
100
100
  end
101
- end
102
-
103
- # Raised when trying to do an operation on slices that cannot succeed, like
104
- # adding non-adjacent slices. See Parslet::Slice.
105
- #
106
- class Parslet::InvalidSliceOperation < StandardError
107
101
  end
@@ -4,82 +4,63 @@ require 'stringio'
4
4
  require 'parslet/source/line_cache'
5
5
 
6
6
  module Parslet
7
- # Wraps the input IO to parslet. The interface defined by this class is
8
- # smaller than what IO offers, but enhances it with a #column and #line
9
- # method for the current position.
7
+ # Wraps the input string for parslet.
10
8
  #
11
9
  class Source
12
- def initialize(io)
13
- if io.respond_to? :to_str
14
- io = StringIO.new(io)
15
- end
10
+ def initialize(str)
11
+ raise ArgumentError unless str.respond_to?(:to_str)
16
12
 
17
- @io = io
13
+ @pos = 0
14
+ @str = str
15
+
18
16
  @line_cache = LineCache.new
17
+ @line_cache.scan_for_line_endings(0, @str)
19
18
  end
20
19
 
21
- # Reads n bytes from the input and returns a Range instance. If the n
22
- # bytes end in the middle of a multibyte representation of a char, that
23
- # char is returned fully.
20
+ # Checks if the given pattern matches at the current input position.
24
21
  #
25
- # Example:
26
- # source.read(1) # always returns at least one valid char
27
- # source.read(7) # reads 7 bytes, then to the next char boundary.
22
+ # @param pattern [Regexp, String] pattern to check for
23
+ # @return [Boolean] true if the pattern matches at #pos
28
24
  #
29
- def read(n)
30
- raise ArgumentError, "Cannot read < 1 characters at a time." if n < 1
31
- read_slice(n)
32
- end
33
-
34
- def eof?
35
- @io.eof?
36
- end
37
- def pos
38
- @io.pos
39
- end
40
- def pos=(new_pos)
41
- @io.pos = new_pos
25
+ def matches?(pattern)
26
+ @str.index(pattern, @pos) == @pos
42
27
  end
43
-
44
- # Returns a <line, column> tuple for the given position. If no position is
45
- # given, line/column information is returned for the current position given
46
- # by #pos.
28
+ alias match matches?
29
+
30
+ # Consumes n characters from the input, returning them as a slice of the
31
+ # input.
47
32
  #
48
- def line_and_column(position=nil)
49
- @line_cache.line_and_column(position || self.pos)
33
+ def consume(n)
34
+ slice_str = @str.slice(@pos, n)
35
+ slice = Parslet::Slice.new(
36
+ slice_str,
37
+ pos,
38
+ @line_cache)
39
+
40
+ @pos += slice_str.size
41
+ return slice
50
42
  end
51
-
52
- # Formats an error cause at the current position or at the position given
53
- # by pos. If pos is nil, the current source position will be the error
54
- # position.
43
+
44
+ # Returns how many chars remain in the input.
55
45
  #
56
- def error(message, error_pos=nil)
57
- real_pos = (error_pos||self.pos)
58
-
59
- Cause.format(self, real_pos, message)
46
+ def chars_left
47
+ @str.size - @pos
60
48
  end
61
-
62
- private
63
- def read_slice(needed)
64
- start = @io.pos
65
- buf = @io.gets(nil, needed)
66
-
67
- # cache line ends
68
- @line_cache.scan_for_line_endings(start, buf)
69
49
 
70
- Parslet::Slice.new(buf || '', start, @line_cache)
50
+ def eof?
51
+ @pos >= @str.size
71
52
  end
72
-
73
- if RUBY_VERSION !~ /^1.9/
74
- def read_slice(needed)
75
- start = @io.pos
76
- buf = @io.read(needed)
77
53
 
78
- # cache line ends
79
- @line_cache.scan_for_line_endings(start, buf)
54
+ # Position of the parse as a character offset into the original string.
55
+ # @note: Encodings...
56
+ attr_accessor :pos
80
57
 
81
- Parslet::Slice.new(buf || '', start, @line_cache)
82
- end
58
+ # Returns a <line, column> tuple for the given position. If no position is
59
+ # given, line/column information is returned for the current position
60
+ # given by #pos.
61
+ #
62
+ def line_and_column(position=nil)
63
+ @line_cache.line_and_column(position || self.pos)
83
64
  end
84
65
  end
85
66
  end