parslet 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
@@ -9,12 +9,11 @@
9
9
  # using the structuring method Parslet.rule.
10
10
  #
11
11
  class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
- attr_reader :name, :context, :block
13
- def initialize(name, context, block) # :nodoc:
12
+ attr_reader :name, :block
13
+ def initialize(name, &block) # :nodoc:
14
14
  super()
15
15
 
16
16
  @name = name
17
- @context = context
18
17
  @block = block
19
18
  end
20
19
 
@@ -23,7 +22,7 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
23
22
  end
24
23
 
25
24
  def parslet
26
- @parslet ||= context.instance_eval(&block).tap { |p|
25
+ @parslet ||= @block.call.tap { |p|
27
26
  raise_not_implemented unless p
28
27
  }
29
28
  end
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # Example:
4
4
  #
5
- # str('foo').prsnt? # matches when the input contains 'foo', but leaves it
5
+ # str('foo').present? # matches when the input contains 'foo', but leaves it
6
6
  #
7
7
  class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
8
  attr_reader :positive
@@ -12,8 +12,8 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
12
12
  def initialize(match) # :nodoc:
13
13
  super()
14
14
 
15
- @match = match
16
- @re = Regexp.new(match, Regexp::MULTILINE)
15
+ @match = match.to_s
16
+ @re = Regexp.new(self.match, Regexp::MULTILINE)
17
17
  @error_msgs = {
18
18
  :premature => "Premature end of input",
19
19
  :failed => "Failed to match #{match.inspect[1..-2]}"
@@ -9,7 +9,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
9
9
  def initialize(str)
10
10
  super()
11
11
 
12
- @str = str
12
+ @str = str.to_s
13
13
  @error_msgs = {
14
14
  :premature => "Premature end of input",
15
15
  :failed => "Expected #{str.inspect}, but got "
@@ -17,6 +17,9 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
17
17
  end
18
18
 
19
19
  def try(source, context) # :nodoc:
20
+ # NOTE: Even though it doesn't look that way, this is the hotspot, the
21
+ # contents of parslets inner loop. Changes here affect parslets speed
22
+ # enormously.
20
23
  error_pos = source.pos
21
24
  s = source.read(str.size)
22
25
 
@@ -26,7 +29,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
26
29
 
27
30
  # Failures:
28
31
  return error(source, @error_msgs[:premature]) unless s && s.size==str.size
29
- return error(source, @error_msgs[:failed]+s.inspect, error_pos)
32
+ return error(source, [@error_msgs[:failed], s], error_pos)
30
33
  end
31
34
 
32
35
  def to_s_inner(prec) # :nodoc:
@@ -0,0 +1,75 @@
1
+
2
+ require 'parslet/atoms/visitor'
3
+
4
+ # A helper class that allows transforming one grammar into another. You can
5
+ # use this class as a base class:
6
+ #
7
+ # Example:
8
+ # class MyTransform < Parslet::Atoms::Transform
9
+ # def visit_str(str)
10
+ # # mangle string here
11
+ # super(str)
12
+ # end
13
+ # end
14
+ #
15
+ # Note that all the methods in a Transform must return parser atoms. The
16
+ # quickest way to do so is to call super with your own arguments. This will
17
+ # just create the same kind of atom that was just visited.
18
+ #
19
+ # In essence, this base class performs what is called an 'identity transform'
20
+ # with one small caveat: It returns a brand new grammar composed of brand new
21
+ # parser atoms. This is like a deep clone of your grammar.
22
+ #
23
+ # But nothing stops you from doing something that is far from a deep clone.
24
+ # You can totally transform the language your grammar accepts. Or maybe
25
+ # turn all repetitions into non-greedy ones? Go wild.
26
+ #
27
+ class Parslet::Atoms::Transform
28
+ # Applies a transformation to a grammar and returns a new grammar that
29
+ # is the result of the transform.
30
+ #
31
+ # Example:
32
+ # Parslet::Atoms::Transform.new.apply(my_grammar) # => deep clone of my_grammar
33
+ #
34
+ def apply(grammar)
35
+ grammar.accept(self)
36
+ end
37
+
38
+ def visit_str(str)
39
+ Parslet.str(str)
40
+ end
41
+
42
+ def visit_sequence(parslets)
43
+ parslets[1..-1].inject(parslets[0]) { |a,p| a >> p.accept(self) }
44
+ end
45
+
46
+ def visit_re(match)
47
+ Parslet.match(match)
48
+ end
49
+
50
+ def visit_alternative(parslets)
51
+ parslets[1..-1].inject(parslets[0]) { |a,p| a | p.accept(self) }
52
+ end
53
+
54
+ def visit_lookahead(positive, parslet)
55
+ Parslet::Atoms::Lookahead.new(positive, parslet.accept(self))
56
+ end
57
+
58
+ def visit_entity(name, block)
59
+ # NOTE: This is kinda tricky. We return a new entity that keeps a reference
60
+ # to the transformer around. Once somebody accesses the parslet in that
61
+ # entity, the original block will produce the original parslet, and then
62
+ # we transform that then and there. Its lazy and futuristic!
63
+ transformer = self
64
+ transformed_block = proc { block.call.accept(transformer) }
65
+ Parslet::Atoms::Entity.new(name, &transformed_block)
66
+ end
67
+
68
+ def visit_named(name, parslet)
69
+ parslet.accept(self).as(name)
70
+ end
71
+
72
+ def visit_repetition(min, max, parslet)
73
+ parslet.accept(self).repeat(min, max)
74
+ end
75
+ end
@@ -5,7 +5,7 @@
5
5
  module Parslet::Atoms
6
6
  class Base
7
7
  def accept(visitor)
8
- raise NotImplementedError, "No visit method on #{self.class.name}."
8
+ raise NotImplementedError, "No #accept method on #{self.class.name}."
9
9
  end
10
10
  end
11
11
 
@@ -13,7 +13,7 @@ module Parslet::Atoms
13
13
  # Call back visitors #str method. See parslet/export for an example.
14
14
  #
15
15
  def accept(visitor)
16
- visitor.str(str)
16
+ visitor.visit_str(str)
17
17
  end
18
18
  end
19
19
 
@@ -21,7 +21,7 @@ module Parslet::Atoms
21
21
  # Call back visitors #entity method. See parslet/export for an example.
22
22
  #
23
23
  def accept(visitor)
24
- visitor.entity(name, context, block)
24
+ visitor.visit_entity(name, block)
25
25
  end
26
26
  end
27
27
 
@@ -29,7 +29,7 @@ module Parslet::Atoms
29
29
  # Call back visitors #named method. See parslet/export for an example.
30
30
  #
31
31
  def accept(visitor)
32
- visitor.named(name, parslet)
32
+ visitor.visit_named(name, parslet)
33
33
  end
34
34
  end
35
35
 
@@ -37,7 +37,7 @@ module Parslet::Atoms
37
37
  # Call back visitors #sequence method. See parslet/export for an example.
38
38
  #
39
39
  def accept(visitor)
40
- visitor.sequence(parslets)
40
+ visitor.visit_sequence(parslets)
41
41
  end
42
42
  end
43
43
 
@@ -45,7 +45,7 @@ module Parslet::Atoms
45
45
  # Call back visitors #repetition method. See parslet/export for an example.
46
46
  #
47
47
  def accept(visitor)
48
- visitor.repetition(min, max, parslet)
48
+ visitor.visit_repetition(min, max, parslet)
49
49
  end
50
50
  end
51
51
 
@@ -53,7 +53,7 @@ module Parslet::Atoms
53
53
  # Call back visitors #alternative method. See parslet/export for an example.
54
54
  #
55
55
  def accept(visitor)
56
- visitor.alternative(alternatives)
56
+ visitor.visit_alternative(alternatives)
57
57
  end
58
58
  end
59
59
 
@@ -61,7 +61,7 @@ module Parslet::Atoms
61
61
  # Call back visitors #lookahead method. See parslet/export for an example.
62
62
  #
63
63
  def accept(visitor)
64
- visitor.lookahead(positive, bound_parslet)
64
+ visitor.visit_lookahead(positive, bound_parslet)
65
65
  end
66
66
  end
67
67
 
@@ -69,7 +69,7 @@ module Parslet::Atoms
69
69
  # Call back visitors #re method. See parslet/export for an example.
70
70
  #
71
71
  def accept(visitor)
72
- visitor.re(match)
72
+ visitor.visit_re(match)
73
73
  end
74
74
  end
75
75
  end
@@ -1,4 +1,4 @@
1
- class Parslet::Parser
1
+ class Parslet::Atoms::Base
2
2
 
3
3
  # Packages the common idiom
4
4
  #
@@ -6,7 +6,7 @@ class Parslet::Parser
6
6
  # tree = parser.parse('something')
7
7
  # rescue Parslet::ParseFailed => error
8
8
  # puts error
9
- # puts parser.root.error_tree
9
+ # puts parser.error_tree
10
10
  # end
11
11
  #
12
12
  # into a convenient method.
@@ -27,7 +27,7 @@ class Parslet::Parser
27
27
  parse str
28
28
  rescue Parslet::ParseFailed => error
29
29
  puts error
30
- puts root.error_tree
30
+ puts error_tree
31
31
  end
32
32
 
33
33
  end
@@ -11,33 +11,33 @@ class Parslet::Parser
11
11
  @context = context
12
12
  end
13
13
 
14
- def str(str)
14
+ def visit_str(str)
15
15
  "\"#{str.inspect[1..-2]}\""
16
16
  end
17
- def re(match)
17
+ def visit_re(match)
18
18
  match.to_s
19
19
  end
20
20
 
21
- def entity(name, ctx, block)
22
- context.deferred(name, [ctx, block])
21
+ def visit_entity(name, block)
22
+ context.deferred(name, block)
23
23
 
24
24
  "(#{context.mangle_name(name)})"
25
25
  end
26
- def named(name, parslet)
26
+ def visit_named(name, parslet)
27
27
  parslet.accept(self)
28
28
  end
29
29
 
30
- def sequence(parslets)
30
+ def visit_sequence(parslets)
31
31
  '(' <<
32
32
  parslets.
33
33
  map { |el| el.accept(self) }.
34
34
  join(' ') <<
35
35
  ')'
36
36
  end
37
- def repetition(min, max, parslet)
37
+ def visit_repetition(min, max, parslet)
38
38
  parslet.accept(self) << "#{min}*#{max}"
39
39
  end
40
- def alternative(alternatives)
40
+ def visit_alternative(alternatives)
41
41
  '(' <<
42
42
  alternatives.
43
43
  map { |el| el.accept(self) }.
@@ -45,18 +45,18 @@ class Parslet::Parser
45
45
  ')'
46
46
  end
47
47
 
48
- def lookahead(positive, bound_parslet)
48
+ def visit_lookahead(positive, bound_parslet)
49
49
  (positive ? '&' : '!') <<
50
50
  bound_parslet.accept(self)
51
51
  end
52
52
  end
53
53
 
54
54
  class Treetop < Citrus
55
- def repetition(min, max, parslet)
55
+ def visit_repetition(min, max, parslet)
56
56
  parslet.accept(self) << "#{min}..#{max}"
57
57
  end
58
58
 
59
- def alternative(alternatives)
59
+ def visit_alternative(alternatives)
60
60
  '(' <<
61
61
  alternatives.
62
62
  map { |el| el.accept(self) }.
@@ -88,13 +88,13 @@ class Parslet::Parser
88
88
  # @todo is constantly filled by the visitor (see #deferred). We
89
89
  # keep going until it is empty.
90
90
  break if @todo.empty?
91
- name, (context, block) = @todo.shift
91
+ name, block = @todo.shift
92
92
 
93
93
  # Track what rules we've already seen. This breaks loops.
94
94
  next if seen.include?(name)
95
95
  seen << name
96
96
 
97
- output << rule(name, context.instance_eval(&block))
97
+ output << rule(name, block.call)
98
98
  end
99
99
 
100
100
  output << "end\n"
@@ -33,7 +33,7 @@ class Parslet::Expression::Treetop
33
33
  rule(:char_class) {
34
34
  (str('[') >>
35
35
  (str('\\') >> any |
36
- str(']').absnt? >> any).repeat(1) >>
36
+ str(']').absent? >> any).repeat(1) >>
37
37
  str(']')).as(:match) >> space?
38
38
  }
39
39
 
@@ -45,7 +45,7 @@ class Parslet::Expression::Treetop
45
45
  str('\'') >>
46
46
  (
47
47
  (str('\\') >> any) |
48
- (str("'").absnt? >> any)
48
+ (str("'").absent? >> any)
49
49
  ).repeat.as(:string) >>
50
50
  str('\'') >> space?
51
51
  }
@@ -12,6 +12,60 @@
12
12
  # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
13
13
  # # Don't know what to do with bbbb at line 1 char 1.
14
14
  #
15
- class Parslet::Parser
15
+ # Parslet::Parser is also a grammar atom. This means that you can mix full
16
+ # fledged parsers freely with small parts of a different parser.
17
+ #
18
+ # Example:
19
+ # class ParserA < Parslet::Parser
20
+ # root :aaa
21
+ # rule(:aaa) { str('a').repeat(3,3) }
22
+ # end
23
+ # class ParserB < Parslet::Parser
24
+ # root :expression
25
+ # rule(:expression) { str('b') >> ParserA.new >> str('b') }
26
+ # end
27
+ #
28
+ # In the above example, ParserB would parse something like 'baaab'.
29
+ #
30
+ class Parslet::Parser < Parslet::Atoms::Base
16
31
  include Parslet
32
+
33
+ class <<self # class methods
34
+ # Define the parsers #root function. This is the place where you start
35
+ # parsing; if you have a rule for 'file' that describes what should be
36
+ # in a file, this would be your root declaration:
37
+ #
38
+ # class Parser
39
+ # root :file
40
+ # rule(:file) { ... }
41
+ # end
42
+ #
43
+ # #root declares a 'parse' function that works just like the parse
44
+ # function that you can call on a simple parslet, taking a string as input
45
+ # and producing parse output.
46
+ #
47
+ # In a way, #root is a shorthand for:
48
+ #
49
+ # def parse(str)
50
+ # your_parser_root.parse(str)
51
+ # end
52
+ #
53
+ def root(name)
54
+ define_method(:root) do
55
+ self.send(name)
56
+ end
57
+ end
58
+ end
59
+
60
+ def try(source, context) # :nodoc:
61
+ root.try(source, context)
62
+ end
63
+
64
+ def error_tree # :nodoc:
65
+ root.error_tree
66
+ end
67
+
68
+ def to_s_inner(prec) # :nodoc:
69
+ root.to_s(prec)
70
+ end
17
71
  end
@@ -1,24 +1,50 @@
1
- RSpec::Matchers.define(:parse) do |input|
2
- chain(:as) { |as| @as = as }
3
-
1
+ RSpec::Matchers.define(:parse) do |input, opts|
4
2
  match do |parser|
5
3
  begin
6
4
  @result = parser.parse(input)
7
- @as == @result or @as.nil?
5
+ @block ?
6
+ @block.call(@result) :
7
+ (@as == @result || @as.nil?)
8
8
  rescue Parslet::ParseFailed
9
+ @trace = parser.error_tree.ascii_tree if opts && opts[:trace]
9
10
  false
10
11
  end
11
12
  end
12
13
 
13
14
  failure_message_for_should do |is|
14
- "expected " << (@result ?
15
- "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
- "#{is.inspect} to be able to parse #{input.inspect}")
15
+ if @block
16
+ "expected output of parsing #{input.inspect}" <<
17
+ " with #{is.inspect} to meet block conditions, but it didn't"
18
+ else
19
+ "expected " <<
20
+ (@as ?
21
+ "output of parsing #{input.inspect}"<<
22
+ " with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
23
+ "#{is.inspect} to be able to parse #{input.inspect}") <<
24
+ (@trace ?
25
+ "\n"+@trace :
26
+ '')
27
+ end
17
28
  end
18
29
 
19
30
  failure_message_for_should_not do |is|
20
- "expected " << (@as ?
21
- "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
- "#{is.inspect} to not parse #{input.inspect}, but it did")
31
+ if @block
32
+ "expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
33
+ else
34
+ "expected " <<
35
+ (@as ?
36
+ "output of parsing #{input.inspect}"<<
37
+ " with #{is.inspect} not to equal #{@as.inspect}" :
38
+
39
+ "#{is.inspect} to not parse #{input.inspect}, but it did")
40
+ end
41
+ end
42
+
43
+ # NOTE: This has a nodoc tag since the rdoc parser puts this into
44
+ # Object, a thing I would never allow.
45
+ def as(expected_output = nil, &block) # :nodoc:
46
+ @as = expected_output
47
+ @block = block
48
+ self
23
49
  end
24
50
  end