parslet 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
@@ -9,12 +9,11 @@
9
9
  # using the structuring method Parslet.rule.
10
10
  #
11
11
  class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
- attr_reader :name, :context, :block
13
- def initialize(name, context, block) # :nodoc:
12
+ attr_reader :name, :block
13
+ def initialize(name, &block) # :nodoc:
14
14
  super()
15
15
 
16
16
  @name = name
17
- @context = context
18
17
  @block = block
19
18
  end
20
19
 
@@ -23,7 +22,7 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
23
22
  end
24
23
 
25
24
  def parslet
26
- @parslet ||= context.instance_eval(&block).tap { |p|
25
+ @parslet ||= @block.call.tap { |p|
27
26
  raise_not_implemented unless p
28
27
  }
29
28
  end
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # Example:
4
4
  #
5
- # str('foo').prsnt? # matches when the input contains 'foo', but leaves it
5
+ # str('foo').present? # matches when the input contains 'foo', but leaves it
6
6
  #
7
7
  class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
8
  attr_reader :positive
@@ -12,8 +12,8 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
12
12
  def initialize(match) # :nodoc:
13
13
  super()
14
14
 
15
- @match = match
16
- @re = Regexp.new(match, Regexp::MULTILINE)
15
+ @match = match.to_s
16
+ @re = Regexp.new(self.match, Regexp::MULTILINE)
17
17
  @error_msgs = {
18
18
  :premature => "Premature end of input",
19
19
  :failed => "Failed to match #{match.inspect[1..-2]}"
@@ -9,7 +9,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
9
9
  def initialize(str)
10
10
  super()
11
11
 
12
- @str = str
12
+ @str = str.to_s
13
13
  @error_msgs = {
14
14
  :premature => "Premature end of input",
15
15
  :failed => "Expected #{str.inspect}, but got "
@@ -17,6 +17,9 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
17
17
  end
18
18
 
19
19
  def try(source, context) # :nodoc:
20
+ # NOTE: Even though it doesn't look that way, this is the hotspot, the
21
+ # contents of parslets inner loop. Changes here affect parslets speed
22
+ # enormously.
20
23
  error_pos = source.pos
21
24
  s = source.read(str.size)
22
25
 
@@ -26,7 +29,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
26
29
 
27
30
  # Failures:
28
31
  return error(source, @error_msgs[:premature]) unless s && s.size==str.size
29
- return error(source, @error_msgs[:failed]+s.inspect, error_pos)
32
+ return error(source, [@error_msgs[:failed], s], error_pos)
30
33
  end
31
34
 
32
35
  def to_s_inner(prec) # :nodoc:
@@ -0,0 +1,75 @@
1
+
2
+ require 'parslet/atoms/visitor'
3
+
4
+ # A helper class that allows transforming one grammar into another. You can
5
+ # use this class as a base class:
6
+ #
7
+ # Example:
8
+ # class MyTransform < Parslet::Atoms::Transform
9
+ # def visit_str(str)
10
+ # # mangle string here
11
+ # super(str)
12
+ # end
13
+ # end
14
+ #
15
+ # Note that all the methods in a Transform must return parser atoms. The
16
+ # quickest way to do so is to call super with your own arguments. This will
17
+ # just create the same kind of atom that was just visited.
18
+ #
19
+ # In essence, this base class performs what is called an 'identity transform'
20
+ # with one small caveat: It returns a brand new grammar composed of brand new
21
+ # parser atoms. This is like a deep clone of your grammar.
22
+ #
23
+ # But nothing stops you from doing something that is far from a deep clone.
24
+ # You can totally transform the language your grammar accepts. Or maybe
25
+ # turn all repetitions into non-greedy ones? Go wild.
26
+ #
27
+ class Parslet::Atoms::Transform
28
+ # Applies a transformation to a grammar and returns a new grammar that
29
+ # is the result of the transform.
30
+ #
31
+ # Example:
32
+ # Parslet::Atoms::Transform.new.apply(my_grammar) # => deep clone of my_grammar
33
+ #
34
+ def apply(grammar)
35
+ grammar.accept(self)
36
+ end
37
+
38
+ def visit_str(str)
39
+ Parslet.str(str)
40
+ end
41
+
42
+ def visit_sequence(parslets)
43
+ parslets[1..-1].inject(parslets[0]) { |a,p| a >> p.accept(self) }
44
+ end
45
+
46
+ def visit_re(match)
47
+ Parslet.match(match)
48
+ end
49
+
50
+ def visit_alternative(parslets)
51
+ parslets[1..-1].inject(parslets[0]) { |a,p| a | p.accept(self) }
52
+ end
53
+
54
+ def visit_lookahead(positive, parslet)
55
+ Parslet::Atoms::Lookahead.new(positive, parslet.accept(self))
56
+ end
57
+
58
+ def visit_entity(name, block)
59
+ # NOTE: This is kinda tricky. We return a new entity that keeps a reference
60
+ # to the transformer around. Once somebody accesses the parslet in that
61
+ # entity, the original block will produce the original parslet, and then
62
+ # we transform that then and there. Its lazy and futuristic!
63
+ transformer = self
64
+ transformed_block = proc { block.call.accept(transformer) }
65
+ Parslet::Atoms::Entity.new(name, &transformed_block)
66
+ end
67
+
68
+ def visit_named(name, parslet)
69
+ parslet.accept(self).as(name)
70
+ end
71
+
72
+ def visit_repetition(min, max, parslet)
73
+ parslet.accept(self).repeat(min, max)
74
+ end
75
+ end
@@ -5,7 +5,7 @@
5
5
  module Parslet::Atoms
6
6
  class Base
7
7
  def accept(visitor)
8
- raise NotImplementedError, "No visit method on #{self.class.name}."
8
+ raise NotImplementedError, "No #accept method on #{self.class.name}."
9
9
  end
10
10
  end
11
11
 
@@ -13,7 +13,7 @@ module Parslet::Atoms
13
13
  # Call back visitors #str method. See parslet/export for an example.
14
14
  #
15
15
  def accept(visitor)
16
- visitor.str(str)
16
+ visitor.visit_str(str)
17
17
  end
18
18
  end
19
19
 
@@ -21,7 +21,7 @@ module Parslet::Atoms
21
21
  # Call back visitors #entity method. See parslet/export for an example.
22
22
  #
23
23
  def accept(visitor)
24
- visitor.entity(name, context, block)
24
+ visitor.visit_entity(name, block)
25
25
  end
26
26
  end
27
27
 
@@ -29,7 +29,7 @@ module Parslet::Atoms
29
29
  # Call back visitors #named method. See parslet/export for an example.
30
30
  #
31
31
  def accept(visitor)
32
- visitor.named(name, parslet)
32
+ visitor.visit_named(name, parslet)
33
33
  end
34
34
  end
35
35
 
@@ -37,7 +37,7 @@ module Parslet::Atoms
37
37
  # Call back visitors #sequence method. See parslet/export for an example.
38
38
  #
39
39
  def accept(visitor)
40
- visitor.sequence(parslets)
40
+ visitor.visit_sequence(parslets)
41
41
  end
42
42
  end
43
43
 
@@ -45,7 +45,7 @@ module Parslet::Atoms
45
45
  # Call back visitors #repetition method. See parslet/export for an example.
46
46
  #
47
47
  def accept(visitor)
48
- visitor.repetition(min, max, parslet)
48
+ visitor.visit_repetition(min, max, parslet)
49
49
  end
50
50
  end
51
51
 
@@ -53,7 +53,7 @@ module Parslet::Atoms
53
53
  # Call back visitors #alternative method. See parslet/export for an example.
54
54
  #
55
55
  def accept(visitor)
56
- visitor.alternative(alternatives)
56
+ visitor.visit_alternative(alternatives)
57
57
  end
58
58
  end
59
59
 
@@ -61,7 +61,7 @@ module Parslet::Atoms
61
61
  # Call back visitors #lookahead method. See parslet/export for an example.
62
62
  #
63
63
  def accept(visitor)
64
- visitor.lookahead(positive, bound_parslet)
64
+ visitor.visit_lookahead(positive, bound_parslet)
65
65
  end
66
66
  end
67
67
 
@@ -69,7 +69,7 @@ module Parslet::Atoms
69
69
  # Call back visitors #re method. See parslet/export for an example.
70
70
  #
71
71
  def accept(visitor)
72
- visitor.re(match)
72
+ visitor.visit_re(match)
73
73
  end
74
74
  end
75
75
  end
@@ -1,4 +1,4 @@
1
- class Parslet::Parser
1
+ class Parslet::Atoms::Base
2
2
 
3
3
  # Packages the common idiom
4
4
  #
@@ -6,7 +6,7 @@ class Parslet::Parser
6
6
  # tree = parser.parse('something')
7
7
  # rescue Parslet::ParseFailed => error
8
8
  # puts error
9
- # puts parser.root.error_tree
9
+ # puts parser.error_tree
10
10
  # end
11
11
  #
12
12
  # into a convenient method.
@@ -27,7 +27,7 @@ class Parslet::Parser
27
27
  parse str
28
28
  rescue Parslet::ParseFailed => error
29
29
  puts error
30
- puts root.error_tree
30
+ puts error_tree
31
31
  end
32
32
 
33
33
  end
@@ -11,33 +11,33 @@ class Parslet::Parser
11
11
  @context = context
12
12
  end
13
13
 
14
- def str(str)
14
+ def visit_str(str)
15
15
  "\"#{str.inspect[1..-2]}\""
16
16
  end
17
- def re(match)
17
+ def visit_re(match)
18
18
  match.to_s
19
19
  end
20
20
 
21
- def entity(name, ctx, block)
22
- context.deferred(name, [ctx, block])
21
+ def visit_entity(name, block)
22
+ context.deferred(name, block)
23
23
 
24
24
  "(#{context.mangle_name(name)})"
25
25
  end
26
- def named(name, parslet)
26
+ def visit_named(name, parslet)
27
27
  parslet.accept(self)
28
28
  end
29
29
 
30
- def sequence(parslets)
30
+ def visit_sequence(parslets)
31
31
  '(' <<
32
32
  parslets.
33
33
  map { |el| el.accept(self) }.
34
34
  join(' ') <<
35
35
  ')'
36
36
  end
37
- def repetition(min, max, parslet)
37
+ def visit_repetition(min, max, parslet)
38
38
  parslet.accept(self) << "#{min}*#{max}"
39
39
  end
40
- def alternative(alternatives)
40
+ def visit_alternative(alternatives)
41
41
  '(' <<
42
42
  alternatives.
43
43
  map { |el| el.accept(self) }.
@@ -45,18 +45,18 @@ class Parslet::Parser
45
45
  ')'
46
46
  end
47
47
 
48
- def lookahead(positive, bound_parslet)
48
+ def visit_lookahead(positive, bound_parslet)
49
49
  (positive ? '&' : '!') <<
50
50
  bound_parslet.accept(self)
51
51
  end
52
52
  end
53
53
 
54
54
  class Treetop < Citrus
55
- def repetition(min, max, parslet)
55
+ def visit_repetition(min, max, parslet)
56
56
  parslet.accept(self) << "#{min}..#{max}"
57
57
  end
58
58
 
59
- def alternative(alternatives)
59
+ def visit_alternative(alternatives)
60
60
  '(' <<
61
61
  alternatives.
62
62
  map { |el| el.accept(self) }.
@@ -88,13 +88,13 @@ class Parslet::Parser
88
88
  # @todo is constantly filled by the visitor (see #deferred). We
89
89
  # keep going until it is empty.
90
90
  break if @todo.empty?
91
- name, (context, block) = @todo.shift
91
+ name, block = @todo.shift
92
92
 
93
93
  # Track what rules we've already seen. This breaks loops.
94
94
  next if seen.include?(name)
95
95
  seen << name
96
96
 
97
- output << rule(name, context.instance_eval(&block))
97
+ output << rule(name, block.call)
98
98
  end
99
99
 
100
100
  output << "end\n"
@@ -33,7 +33,7 @@ class Parslet::Expression::Treetop
33
33
  rule(:char_class) {
34
34
  (str('[') >>
35
35
  (str('\\') >> any |
36
- str(']').absnt? >> any).repeat(1) >>
36
+ str(']').absent? >> any).repeat(1) >>
37
37
  str(']')).as(:match) >> space?
38
38
  }
39
39
 
@@ -45,7 +45,7 @@ class Parslet::Expression::Treetop
45
45
  str('\'') >>
46
46
  (
47
47
  (str('\\') >> any) |
48
- (str("'").absnt? >> any)
48
+ (str("'").absent? >> any)
49
49
  ).repeat.as(:string) >>
50
50
  str('\'') >> space?
51
51
  }
@@ -12,6 +12,60 @@
12
12
  # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
13
13
  # # Don't know what to do with bbbb at line 1 char 1.
14
14
  #
15
- class Parslet::Parser
15
+ # Parslet::Parser is also a grammar atom. This means that you can mix full
16
+ # fledged parsers freely with small parts of a different parser.
17
+ #
18
+ # Example:
19
+ # class ParserA < Parslet::Parser
20
+ # root :aaa
21
+ # rule(:aaa) { str('a').repeat(3,3) }
22
+ # end
23
+ # class ParserB < Parslet::Parser
24
+ # root :expression
25
+ # rule(:expression) { str('b') >> ParserA.new >> str('b') }
26
+ # end
27
+ #
28
+ # In the above example, ParserB would parse something like 'baaab'.
29
+ #
30
+ class Parslet::Parser < Parslet::Atoms::Base
16
31
  include Parslet
32
+
33
+ class <<self # class methods
34
+ # Define the parsers #root function. This is the place where you start
35
+ # parsing; if you have a rule for 'file' that describes what should be
36
+ # in a file, this would be your root declaration:
37
+ #
38
+ # class Parser
39
+ # root :file
40
+ # rule(:file) { ... }
41
+ # end
42
+ #
43
+ # #root declares a 'parse' function that works just like the parse
44
+ # function that you can call on a simple parslet, taking a string as input
45
+ # and producing parse output.
46
+ #
47
+ # In a way, #root is a shorthand for:
48
+ #
49
+ # def parse(str)
50
+ # your_parser_root.parse(str)
51
+ # end
52
+ #
53
+ def root(name)
54
+ define_method(:root) do
55
+ self.send(name)
56
+ end
57
+ end
58
+ end
59
+
60
+ def try(source, context) # :nodoc:
61
+ root.try(source, context)
62
+ end
63
+
64
+ def error_tree # :nodoc:
65
+ root.error_tree
66
+ end
67
+
68
+ def to_s_inner(prec) # :nodoc:
69
+ root.to_s(prec)
70
+ end
17
71
  end
@@ -1,24 +1,50 @@
1
- RSpec::Matchers.define(:parse) do |input|
2
- chain(:as) { |as| @as = as }
3
-
1
+ RSpec::Matchers.define(:parse) do |input, opts|
4
2
  match do |parser|
5
3
  begin
6
4
  @result = parser.parse(input)
7
- @as == @result or @as.nil?
5
+ @block ?
6
+ @block.call(@result) :
7
+ (@as == @result || @as.nil?)
8
8
  rescue Parslet::ParseFailed
9
+ @trace = parser.error_tree.ascii_tree if opts && opts[:trace]
9
10
  false
10
11
  end
11
12
  end
12
13
 
13
14
  failure_message_for_should do |is|
14
- "expected " << (@result ?
15
- "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
- "#{is.inspect} to be able to parse #{input.inspect}")
15
+ if @block
16
+ "expected output of parsing #{input.inspect}" <<
17
+ " with #{is.inspect} to meet block conditions, but it didn't"
18
+ else
19
+ "expected " <<
20
+ (@as ?
21
+ "output of parsing #{input.inspect}"<<
22
+ " with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
23
+ "#{is.inspect} to be able to parse #{input.inspect}") <<
24
+ (@trace ?
25
+ "\n"+@trace :
26
+ '')
27
+ end
17
28
  end
18
29
 
19
30
  failure_message_for_should_not do |is|
20
- "expected " << (@as ?
21
- "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
- "#{is.inspect} to not parse #{input.inspect}, but it did")
31
+ if @block
32
+ "expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
33
+ else
34
+ "expected " <<
35
+ (@as ?
36
+ "output of parsing #{input.inspect}"<<
37
+ " with #{is.inspect} not to equal #{@as.inspect}" :
38
+
39
+ "#{is.inspect} to not parse #{input.inspect}, but it did")
40
+ end
41
+ end
42
+
43
+ # NOTE: This has a nodoc tag since the rdoc parser puts this into
44
+ # Object, a thing I would never allow.
45
+ def as(expected_output = nil, &block) # :nodoc:
46
+ @as = expected_output
47
+ @block = block
48
+ self
23
49
  end
24
50
  end