parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -5,8 +5,7 @@ class Parslet::Atoms::Base
5
5
  # begin
6
6
  # tree = parser.parse('something')
7
7
  # rescue Parslet::ParseFailed => error
8
- # puts error
9
- # puts parser.error_tree
8
+ # puts parser.cause.ascii_tree
10
9
  # end
11
10
  #
12
11
  # into a convenient method.
@@ -23,13 +22,14 @@ class Parslet::Atoms::Base
23
22
  #
24
23
  # FooParser.new.parse_with_debug('bar')
25
24
  #
26
- def parse_with_debug str
27
- parse str
25
+ # @see Parslet::Atoms::Base#parse
26
+ #
27
+ def parse_with_debug str, opts={}
28
+ parse str, opts
28
29
  rescue Parslet::UnconsumedInput => error
29
30
  puts error
30
31
  rescue Parslet::ParseFailed => error
31
- puts error
32
- puts error_tree
32
+ puts error.cause.ascii_tree
33
33
  end
34
34
 
35
35
  end
@@ -0,0 +1,7 @@
1
+ # A namespace for all error reporters.
2
+ #
3
+ module Parslet::ErrorReporter
4
+ end
5
+
6
+ require 'parslet/error_reporter/tree'
7
+ require 'parslet/error_reporter/deepest'
@@ -0,0 +1,95 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # Instead of reporting the latest error that happens like {Tree} does,
4
+ # this class reports the deepest error. Depth is defined here as how
5
+ # advanced into the input an error happens. The errors close to the
6
+ # greatest depth tend to be more relevant to the end user, since they
7
+ # specify what could be done to make them go away.
8
+ #
9
+ # More specifically, errors produced by this reporter won't be related to
10
+ # the structure of the grammar at all. The positions of the errors will
11
+ # be advanced and convey at every grammar level what the deepest rule
12
+ # was to fail.
13
+ #
14
+ class Deepest
15
+ def initialize
16
+ @deepest_cause = nil
17
+ end
18
+
19
+ # Produces an error cause that combines the message at the current level
20
+ # with the errors that happened at a level below (children).
21
+ #
22
+ # @param atom [Parslet::Atoms::Base] parslet that failed
23
+ # @param source [Source] Source that we're using for this parse. (line
24
+ # number information...)
25
+ # @param message [String, Array] Error message at this level.
26
+ # @param children [Array] A list of errors from a deeper level (or nil).
27
+ # @return [Cause] An error tree combining children with message.
28
+ #
29
+ def err(atom, source, message, children=nil)
30
+ position = source.pos
31
+ cause = Cause.format(source, position, message, children)
32
+ return deepest(cause)
33
+ end
34
+
35
+ # Produces an error cause that combines the message at the current level
36
+ # with the errors that happened at a level below (children).
37
+ #
38
+ # @param atom [Parslet::Atoms::Base] parslet that failed
39
+ # @param source [Source] Source that we're using for this parse. (line
40
+ # number information...)
41
+ # @param message [String, Array] Error message at this level.
42
+ # @param pos [Fixnum] The real position of the error.
43
+ # @param children [Array] A list of errors from a deeper level (or nil).
44
+ # @return [Cause] An error tree combining children with message.
45
+ #
46
+ def err_at(atom, source, message, pos, children=nil)
47
+ position = pos
48
+ cause = Cause.format(source, position, message, children)
49
+ return deepest(cause)
50
+ end
51
+
52
+ # Returns the cause that is currently deepest. Mainly for specs.
53
+ #
54
+ attr_reader :deepest_cause
55
+
56
+ # Checks to see if the lineage of the cause given includes a cause with
57
+ # an error position deeper than the current deepest cause stored. If
58
+ # yes, it passes the cause through to the caller. If no, it returns the
59
+ # current deepest error that was saved as a reference.
60
+ #
61
+ def deepest(cause)
62
+ rank, leaf = deepest_child(cause)
63
+
64
+ if !deepest_cause || leaf.pos >= deepest_cause.pos
65
+ # This error reaches deeper into the input, save it as reference.
66
+ @deepest_cause = leaf
67
+ return cause
68
+ end
69
+
70
+ return deepest_cause
71
+ end
72
+
73
+ private
74
+ # Returns the leaf from a given error tree with the biggest rank.
75
+ #
76
+ def deepest_child(cause, rank=0)
77
+ max_child = cause
78
+ max_rank = rank
79
+
80
+ if cause.children && !cause.children.empty?
81
+ cause.children.each do |child|
82
+ c_rank, c_cause = deepest_child(child, rank+1)
83
+
84
+ if c_rank > max_rank
85
+ max_rank = c_rank
86
+ max_child = c_cause
87
+ end
88
+ end
89
+ end
90
+
91
+ return max_rank, max_child
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,57 @@
1
+ module Parslet
2
+ module ErrorReporter
3
+ # An error reporter has two central methods, one for reporting errors at
4
+ # the current parse position (#err) and one for reporting errors at a
5
+ # given parse position (#err_at). The reporter can return an object (a
6
+ # 'cause') that will be returned to the caller along with the information
7
+ # that the parse failed.
8
+ #
9
+ # When reporting errors on the outer levels of your parser, these methods
10
+ # get passed a list of error objects ('causes') from the inner levels. In
11
+ # this default implementation, the inner levels are considered error
12
+ # subtrees and are appended to the generated tree node at each level,
13
+ # thereby constructing an error tree.
14
+ #
15
+ # This error tree will report in parallel with the grammar structure that
16
+ # failed. A one-to-one correspondence exists between each error in the
17
+ # tree and the parslet atom that produced that error.
18
+ #
19
+ # The implementor is really free to use these return values as he sees
20
+ # fit. One example would be to return an error state object from these
21
+ # methods that is then updated as errors cascade up the parse derivation
22
+ # tree.
23
+ #
24
+ class Tree
25
+ # Produces an error cause that combines the message at the current level
26
+ # with the errors that happened at a level below (children).
27
+ #
28
+ # @param atom [Parslet::Atoms::Base] parslet that failed
29
+ # @param source [Source] Source that we're using for this parse. (line
30
+ # number information...)
31
+ # @param message [String, Array] Error message at this level.
32
+ # @param children [Array] A list of errors from a deeper level (or nil).
33
+ # @return [Cause] An error tree combining children with message.
34
+ #
35
+ def err(atom, source, message, children=nil)
36
+ position = source.pos
37
+ Cause.format(source, position, message, children)
38
+ end
39
+
40
+ # Produces an error cause that combines the message at the current level
41
+ # with the errors that happened at a level below (children).
42
+ #
43
+ # @param atom [Parslet::Atoms::Base] parslet that failed
44
+ # @param source [Source] Source that we're using for this parse. (line
45
+ # number information...)
46
+ # @param message [String, Array] Error message at this level.
47
+ # @param pos [Fixnum] The real position of the error.
48
+ # @param children [Array] A list of errors from a deeper level (or nil).
49
+ # @return [Cause] An error tree combining children with message.
50
+ #
51
+ def err_at(atom, source, message, pos, children=nil)
52
+ position = pos
53
+ Cause.format(source, position, message, children)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -68,7 +68,7 @@ class Parslet::Parser
68
68
 
69
69
  # A helper class that formats Citrus and Treetop grammars as a string.
70
70
  #
71
- class PrettyPrinter # :nodoc:
71
+ class PrettyPrinter
72
72
  attr_reader :visitor
73
73
  def initialize(visitor_klass)
74
74
  @visitor = visitor_klass.new(self)
@@ -78,7 +78,7 @@ class Parslet::Parser
78
78
  # configured in initialize. Returns the string representation of the
79
79
  # Citrus or Treetop grammar.
80
80
  #
81
- def pretty_print(name, parslet) # :nodoc:
81
+ def pretty_print(name, parslet)
82
82
  output = "grammar #{name}\n"
83
83
 
84
84
  output << rule('root', parslet)
@@ -111,7 +111,7 @@ class Parslet::Parser
111
111
  # Whenever the visitor encounters an rule in a parslet, it defers the
112
112
  # pretty printing of the rule by calling this method.
113
113
  #
114
- def deferred(name, content) # :nodoc:
114
+ def deferred(name, content)
115
115
  @todo ||= []
116
116
  @todo << [name, content]
117
117
  end
@@ -120,7 +120,7 @@ class Parslet::Parser
120
120
  # transforms some of the things that Ruby allows into other patterns. If
121
121
  # there is collision, we will not detect it for now.
122
122
  #
123
- def mangle_name(str) # :nodoc:
123
+ def mangle_name(str)
124
124
  str.to_s.sub(/\?$/, '_p')
125
125
  end
126
126
  end
@@ -6,8 +6,6 @@
6
6
  # This can be viewed as an extension to parslet and might even be hosted in
7
7
  # its own gem one fine day.
8
8
  #
9
- # NOT FINISHED & EXPERIMENTAL
10
- #
11
9
  class Parslet::Expression
12
10
  include Parslet
13
11
 
@@ -1,5 +1,5 @@
1
1
  class Parslet::Expression::Treetop
2
- class Parser < Parslet::Parser # :nodoc:
2
+ class Parser < Parslet::Parser
3
3
  root(:expression)
4
4
 
5
5
  rule(:expression) { alternatives }
@@ -69,7 +69,7 @@ class Parslet::Expression::Treetop
69
69
  end
70
70
  end
71
71
 
72
- class Transform < Parslet::Transform # :nodoc:
72
+ class Transform < Parslet::Transform
73
73
 
74
74
  rule(:repetition => simple(:rep), :sign => simple(:sign)) {
75
75
  min = sign=='+' ? 1 : 0
@@ -57,15 +57,11 @@ class Parslet::Parser < Parslet::Atoms::Base
57
57
  end
58
58
  end
59
59
 
60
- def try(source, context) # :nodoc:
60
+ def try(source, context)
61
61
  root.try(source, context)
62
62
  end
63
63
 
64
- def error_tree # :nodoc:
65
- root.error_tree
66
- end
67
-
68
- def to_s_inner(prec) # :nodoc:
64
+ def to_s_inner(prec)
69
65
  root.to_s(prec)
70
66
  end
71
67
  end
@@ -28,10 +28,13 @@ class Parslet::Pattern
28
28
  # bindings to be a hash, the mappings in it will be treated like bindings
29
29
  # made during an attempted match.
30
30
  #
31
- # Example:
32
- #
33
31
  # Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' }
34
32
  #
33
+ # @param subtree [String, Hash, Array] poro subtree returned by a parse
34
+ # @param bindings [Hash] variable bindings to be verified
35
+ # @return [Hash, nil] On success: variable bindings that allow a match. On
36
+ # failure: nil
37
+ #
35
38
  def match(subtree, bindings=nil)
36
39
  bindings = bindings && bindings.dup || Hash.new
37
40
  return bindings if element_match(subtree, @pattern, bindings)
@@ -41,6 +44,8 @@ class Parslet::Pattern
41
44
  # given by +exp+. This match must respect bindings already made in
42
45
  # +bindings+. Note that bindings is carried along and modified.
43
46
  #
47
+ # @api private
48
+ #
44
49
  def element_match(tree, exp, bindings)
45
50
  # p [:elm, tree, exp]
46
51
  case [tree, exp].map { |e| e.class }
@@ -63,7 +68,9 @@ class Parslet::Pattern
63
68
  end
64
69
  end
65
70
 
66
- def element_match_binding(tree, exp, bindings) # :nodoc:
71
+ # @api private
72
+ #
73
+ def element_match_binding(tree, exp, bindings)
67
74
  var_name = exp.variable_name
68
75
 
69
76
  # TODO test for the hidden :_ feature.
@@ -77,13 +84,17 @@ class Parslet::Pattern
77
84
  return true
78
85
  end
79
86
 
80
- def element_match_ary_single(sequence, exp, bindings) # :nodoc:
87
+ # @api private
88
+ #
89
+ def element_match_ary_single(sequence, exp, bindings)
81
90
  return false if sequence.size != exp.size
82
91
 
83
92
  return sequence.zip(exp).all? { |elt, subexp|
84
93
  element_match(elt, subexp, bindings) }
85
94
  end
86
95
 
96
+ # @api private
97
+ #
87
98
  def element_match_hash(tree, exp, bindings)
88
99
  # Early failure when one hash is bigger than the other
89
100
  return false unless exp.size == tree.size
@@ -5,7 +5,7 @@
5
5
  # It defines the most permissive kind of bind, the one that matches any subtree
6
6
  # whatever it looks like.
7
7
  #
8
- class Parslet::Pattern::SubtreeBind < Struct.new(:symbol) # :nodoc:
8
+ class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
9
9
  def variable_name
10
10
  symbol
11
11
  end
@@ -33,7 +33,7 @@ end
33
33
  # Binds a symbol to a simple subtree, one that is not either a sequence of
34
34
  # elements or a collection of attributes.
35
35
  #
36
- class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind # :nodoc:
36
+ class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
37
37
  def can_bind?(subtree)
38
38
  not [Hash, Array].include?(subtree.class)
39
39
  end
@@ -41,7 +41,7 @@ end
41
41
 
42
42
  # Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
43
43
  #
44
- class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind # :nodoc:
44
+ class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
45
45
  def can_bind?(subtree)
46
46
  subtree.kind_of?(Array) &&
47
47
  (not subtree.any? { |el| [Hash, Array].include?(el.class) })
@@ -7,8 +7,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
7
7
  block ?
8
8
  block.call(result) :
9
9
  (as == result || as.nil?)
10
- rescue Parslet::ParseFailed
11
- trace = parser.error_tree.ascii_tree if opts && opts[:trace]
10
+ rescue Parslet::ParseFailed => ex
11
+ trace = ex.cause.ascii_tree if opts && opts[:trace]
12
12
  false
13
13
  end
14
14
  end
data/lib/parslet/slice.rb CHANGED
@@ -98,10 +98,4 @@ class Parslet::Slice
98
98
  def inspect
99
99
  str.inspect << "@#{offset}"
100
100
  end
101
- end
102
-
103
- # Raised when trying to do an operation on slices that cannot succeed, like
104
- # adding non-adjacent slices. See Parslet::Slice.
105
- #
106
- class Parslet::InvalidSliceOperation < StandardError
107
101
  end
@@ -4,82 +4,63 @@ require 'stringio'
4
4
  require 'parslet/source/line_cache'
5
5
 
6
6
  module Parslet
7
- # Wraps the input IO to parslet. The interface defined by this class is
8
- # smaller than what IO offers, but enhances it with a #column and #line
9
- # method for the current position.
7
+ # Wraps the input string for parslet.
10
8
  #
11
9
  class Source
12
- def initialize(io)
13
- if io.respond_to? :to_str
14
- io = StringIO.new(io)
15
- end
10
+ def initialize(str)
11
+ raise ArgumentError unless str.respond_to?(:to_str)
16
12
 
17
- @io = io
13
+ @pos = 0
14
+ @str = str
15
+
18
16
  @line_cache = LineCache.new
17
+ @line_cache.scan_for_line_endings(0, @str)
19
18
  end
20
19
 
21
- # Reads n bytes from the input and returns a Range instance. If the n
22
- # bytes end in the middle of a multibyte representation of a char, that
23
- # char is returned fully.
20
+ # Checks if the given pattern matches at the current input position.
24
21
  #
25
- # Example:
26
- # source.read(1) # always returns at least one valid char
27
- # source.read(7) # reads 7 bytes, then to the next char boundary.
22
+ # @param pattern [Regexp, String] pattern to check for
23
+ # @return [Boolean] true if the pattern matches at #pos
28
24
  #
29
- def read(n)
30
- raise ArgumentError, "Cannot read < 1 characters at a time." if n < 1
31
- read_slice(n)
32
- end
33
-
34
- def eof?
35
- @io.eof?
36
- end
37
- def pos
38
- @io.pos
39
- end
40
- def pos=(new_pos)
41
- @io.pos = new_pos
25
+ def matches?(pattern)
26
+ @str.index(pattern, @pos) == @pos
42
27
  end
43
-
44
- # Returns a <line, column> tuple for the given position. If no position is
45
- # given, line/column information is returned for the current position given
46
- # by #pos.
28
+ alias match matches?
29
+
30
+ # Consumes n characters from the input, returning them as a slice of the
31
+ # input.
47
32
  #
48
- def line_and_column(position=nil)
49
- @line_cache.line_and_column(position || self.pos)
33
+ def consume(n)
34
+ slice_str = @str.slice(@pos, n)
35
+ slice = Parslet::Slice.new(
36
+ slice_str,
37
+ pos,
38
+ @line_cache)
39
+
40
+ @pos += slice_str.size
41
+ return slice
50
42
  end
51
-
52
- # Formats an error cause at the current position or at the position given
53
- # by pos. If pos is nil, the current source position will be the error
54
- # position.
43
+
44
+ # Returns how many chars remain in the input.
55
45
  #
56
- def error(message, error_pos=nil)
57
- real_pos = (error_pos||self.pos)
58
-
59
- Cause.format(self, real_pos, message)
46
+ def chars_left
47
+ @str.size - @pos
60
48
  end
61
-
62
- private
63
- def read_slice(needed)
64
- start = @io.pos
65
- buf = @io.gets(nil, needed)
66
-
67
- # cache line ends
68
- @line_cache.scan_for_line_endings(start, buf)
69
49
 
70
- Parslet::Slice.new(buf || '', start, @line_cache)
50
+ def eof?
51
+ @pos >= @str.size
71
52
  end
72
-
73
- if RUBY_VERSION !~ /^1.9/
74
- def read_slice(needed)
75
- start = @io.pos
76
- buf = @io.read(needed)
77
53
 
78
- # cache line ends
79
- @line_cache.scan_for_line_endings(start, buf)
54
+ # Position of the parse as a character offset into the original string.
55
+ # @note: Encodings...
56
+ attr_accessor :pos
80
57
 
81
- Parslet::Slice.new(buf || '', start, @line_cache)
82
- end
58
+ # Returns a <line, column> tuple for the given position. If no position is
59
+ # given, line/column information is returned for the current position
60
+ # given by #pos.
61
+ #
62
+ def line_and_column(position=nil)
63
+ @line_cache.line_and_column(position || self.pos)
83
64
  end
84
65
  end
85
66
  end