parslet 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/lib/parslet/atoms/entity.rb
CHANGED
@@ -9,12 +9,11 @@
|
|
9
9
|
# using the structuring method Parslet.rule.
|
10
10
|
#
|
11
11
|
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
|
-
attr_reader :name, :
|
13
|
-
def initialize(name,
|
12
|
+
attr_reader :name, :block
|
13
|
+
def initialize(name, &block) # :nodoc:
|
14
14
|
super()
|
15
15
|
|
16
16
|
@name = name
|
17
|
-
@context = context
|
18
17
|
@block = block
|
19
18
|
end
|
20
19
|
|
@@ -23,7 +22,7 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
|
23
22
|
end
|
24
23
|
|
25
24
|
def parslet
|
26
|
-
@parslet ||=
|
25
|
+
@parslet ||= @block.call.tap { |p|
|
27
26
|
raise_not_implemented unless p
|
28
27
|
}
|
29
28
|
end
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -12,8 +12,8 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
|
|
12
12
|
def initialize(match) # :nodoc:
|
13
13
|
super()
|
14
14
|
|
15
|
-
@match = match
|
16
|
-
@re = Regexp.new(match, Regexp::MULTILINE)
|
15
|
+
@match = match.to_s
|
16
|
+
@re = Regexp.new(self.match, Regexp::MULTILINE)
|
17
17
|
@error_msgs = {
|
18
18
|
:premature => "Premature end of input",
|
19
19
|
:failed => "Failed to match #{match.inspect[1..-2]}"
|
data/lib/parslet/atoms/str.rb
CHANGED
@@ -9,7 +9,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
9
9
|
def initialize(str)
|
10
10
|
super()
|
11
11
|
|
12
|
-
@str = str
|
12
|
+
@str = str.to_s
|
13
13
|
@error_msgs = {
|
14
14
|
:premature => "Premature end of input",
|
15
15
|
:failed => "Expected #{str.inspect}, but got "
|
@@ -17,6 +17,9 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def try(source, context) # :nodoc:
|
20
|
+
# NOTE: Even though it doesn't look that way, this is the hotspot, the
|
21
|
+
# contents of parslets inner loop. Changes here affect parslets speed
|
22
|
+
# enormously.
|
20
23
|
error_pos = source.pos
|
21
24
|
s = source.read(str.size)
|
22
25
|
|
@@ -26,7 +29,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
26
29
|
|
27
30
|
# Failures:
|
28
31
|
return error(source, @error_msgs[:premature]) unless s && s.size==str.size
|
29
|
-
return error(source, @error_msgs[:failed]
|
32
|
+
return error(source, [@error_msgs[:failed], s], error_pos)
|
30
33
|
end
|
31
34
|
|
32
35
|
def to_s_inner(prec) # :nodoc:
|
@@ -0,0 +1,75 @@
|
|
1
|
+
|
2
|
+
require 'parslet/atoms/visitor'
|
3
|
+
|
4
|
+
# A helper class that allows transforming one grammar into another. You can
|
5
|
+
# use this class as a base class:
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# class MyTransform < Parslet::Atoms::Transform
|
9
|
+
# def visit_str(str)
|
10
|
+
# # mangle string here
|
11
|
+
# super(str)
|
12
|
+
# end
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# Note that all the methods in a Transform must return parser atoms. The
|
16
|
+
# quickest way to do so is to call super with your own arguments. This will
|
17
|
+
# just create the same kind of atom that was just visited.
|
18
|
+
#
|
19
|
+
# In essence, this base class performs what is called an 'identity transform'
|
20
|
+
# with one small caveat: It returns a brand new grammar composed of brand new
|
21
|
+
# parser atoms. This is like a deep clone of your grammar.
|
22
|
+
#
|
23
|
+
# But nothing stops you from doing something that is far from a deep clone.
|
24
|
+
# You can totally transform the language your grammar accepts. Or maybe
|
25
|
+
# turn all repetitions into non-greedy ones? Go wild.
|
26
|
+
#
|
27
|
+
class Parslet::Atoms::Transform
|
28
|
+
# Applies a transformation to a grammar and returns a new grammar that
|
29
|
+
# is the result of the transform.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# Parslet::Atoms::Transform.new.apply(my_grammar) # => deep clone of my_grammar
|
33
|
+
#
|
34
|
+
def apply(grammar)
|
35
|
+
grammar.accept(self)
|
36
|
+
end
|
37
|
+
|
38
|
+
def visit_str(str)
|
39
|
+
Parslet.str(str)
|
40
|
+
end
|
41
|
+
|
42
|
+
def visit_sequence(parslets)
|
43
|
+
parslets[1..-1].inject(parslets[0]) { |a,p| a >> p.accept(self) }
|
44
|
+
end
|
45
|
+
|
46
|
+
def visit_re(match)
|
47
|
+
Parslet.match(match)
|
48
|
+
end
|
49
|
+
|
50
|
+
def visit_alternative(parslets)
|
51
|
+
parslets[1..-1].inject(parslets[0]) { |a,p| a | p.accept(self) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def visit_lookahead(positive, parslet)
|
55
|
+
Parslet::Atoms::Lookahead.new(positive, parslet.accept(self))
|
56
|
+
end
|
57
|
+
|
58
|
+
def visit_entity(name, block)
|
59
|
+
# NOTE: This is kinda tricky. We return a new entity that keeps a reference
|
60
|
+
# to the transformer around. Once somebody accesses the parslet in that
|
61
|
+
# entity, the original block will produce the original parslet, and then
|
62
|
+
# we transform that then and there. Its lazy and futuristic!
|
63
|
+
transformer = self
|
64
|
+
transformed_block = proc { block.call.accept(transformer) }
|
65
|
+
Parslet::Atoms::Entity.new(name, &transformed_block)
|
66
|
+
end
|
67
|
+
|
68
|
+
def visit_named(name, parslet)
|
69
|
+
parslet.accept(self).as(name)
|
70
|
+
end
|
71
|
+
|
72
|
+
def visit_repetition(min, max, parslet)
|
73
|
+
parslet.accept(self).repeat(min, max)
|
74
|
+
end
|
75
|
+
end
|
@@ -5,7 +5,7 @@
|
|
5
5
|
module Parslet::Atoms
|
6
6
|
class Base
|
7
7
|
def accept(visitor)
|
8
|
-
raise NotImplementedError, "No
|
8
|
+
raise NotImplementedError, "No #accept method on #{self.class.name}."
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
@@ -13,7 +13,7 @@ module Parslet::Atoms
|
|
13
13
|
# Call back visitors #str method. See parslet/export for an example.
|
14
14
|
#
|
15
15
|
def accept(visitor)
|
16
|
-
visitor.
|
16
|
+
visitor.visit_str(str)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
@@ -21,7 +21,7 @@ module Parslet::Atoms
|
|
21
21
|
# Call back visitors #entity method. See parslet/export for an example.
|
22
22
|
#
|
23
23
|
def accept(visitor)
|
24
|
-
visitor.
|
24
|
+
visitor.visit_entity(name, block)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
@@ -29,7 +29,7 @@ module Parslet::Atoms
|
|
29
29
|
# Call back visitors #named method. See parslet/export for an example.
|
30
30
|
#
|
31
31
|
def accept(visitor)
|
32
|
-
visitor.
|
32
|
+
visitor.visit_named(name, parslet)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -37,7 +37,7 @@ module Parslet::Atoms
|
|
37
37
|
# Call back visitors #sequence method. See parslet/export for an example.
|
38
38
|
#
|
39
39
|
def accept(visitor)
|
40
|
-
visitor.
|
40
|
+
visitor.visit_sequence(parslets)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
@@ -45,7 +45,7 @@ module Parslet::Atoms
|
|
45
45
|
# Call back visitors #repetition method. See parslet/export for an example.
|
46
46
|
#
|
47
47
|
def accept(visitor)
|
48
|
-
visitor.
|
48
|
+
visitor.visit_repetition(min, max, parslet)
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
@@ -53,7 +53,7 @@ module Parslet::Atoms
|
|
53
53
|
# Call back visitors #alternative method. See parslet/export for an example.
|
54
54
|
#
|
55
55
|
def accept(visitor)
|
56
|
-
visitor.
|
56
|
+
visitor.visit_alternative(alternatives)
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ module Parslet::Atoms
|
|
61
61
|
# Call back visitors #lookahead method. See parslet/export for an example.
|
62
62
|
#
|
63
63
|
def accept(visitor)
|
64
|
-
visitor.
|
64
|
+
visitor.visit_lookahead(positive, bound_parslet)
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
@@ -69,7 +69,7 @@ module Parslet::Atoms
|
|
69
69
|
# Call back visitors #re method. See parslet/export for an example.
|
70
70
|
#
|
71
71
|
def accept(visitor)
|
72
|
-
visitor.
|
72
|
+
visitor.visit_re(match)
|
73
73
|
end
|
74
74
|
end
|
75
75
|
end
|
data/lib/parslet/convenience.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class Parslet::
|
1
|
+
class Parslet::Atoms::Base
|
2
2
|
|
3
3
|
# Packages the common idiom
|
4
4
|
#
|
@@ -6,7 +6,7 @@ class Parslet::Parser
|
|
6
6
|
# tree = parser.parse('something')
|
7
7
|
# rescue Parslet::ParseFailed => error
|
8
8
|
# puts error
|
9
|
-
# puts parser.
|
9
|
+
# puts parser.error_tree
|
10
10
|
# end
|
11
11
|
#
|
12
12
|
# into a convenient method.
|
@@ -27,7 +27,7 @@ class Parslet::Parser
|
|
27
27
|
parse str
|
28
28
|
rescue Parslet::ParseFailed => error
|
29
29
|
puts error
|
30
|
-
puts
|
30
|
+
puts error_tree
|
31
31
|
end
|
32
32
|
|
33
33
|
end
|
data/lib/parslet/export.rb
CHANGED
@@ -11,33 +11,33 @@ class Parslet::Parser
|
|
11
11
|
@context = context
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def visit_str(str)
|
15
15
|
"\"#{str.inspect[1..-2]}\""
|
16
16
|
end
|
17
|
-
def
|
17
|
+
def visit_re(match)
|
18
18
|
match.to_s
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
22
|
-
context.deferred(name,
|
21
|
+
def visit_entity(name, block)
|
22
|
+
context.deferred(name, block)
|
23
23
|
|
24
24
|
"(#{context.mangle_name(name)})"
|
25
25
|
end
|
26
|
-
def
|
26
|
+
def visit_named(name, parslet)
|
27
27
|
parslet.accept(self)
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def visit_sequence(parslets)
|
31
31
|
'(' <<
|
32
32
|
parslets.
|
33
33
|
map { |el| el.accept(self) }.
|
34
34
|
join(' ') <<
|
35
35
|
')'
|
36
36
|
end
|
37
|
-
def
|
37
|
+
def visit_repetition(min, max, parslet)
|
38
38
|
parslet.accept(self) << "#{min}*#{max}"
|
39
39
|
end
|
40
|
-
def
|
40
|
+
def visit_alternative(alternatives)
|
41
41
|
'(' <<
|
42
42
|
alternatives.
|
43
43
|
map { |el| el.accept(self) }.
|
@@ -45,18 +45,18 @@ class Parslet::Parser
|
|
45
45
|
')'
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
48
|
+
def visit_lookahead(positive, bound_parslet)
|
49
49
|
(positive ? '&' : '!') <<
|
50
50
|
bound_parslet.accept(self)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
class Treetop < Citrus
|
55
|
-
def
|
55
|
+
def visit_repetition(min, max, parslet)
|
56
56
|
parslet.accept(self) << "#{min}..#{max}"
|
57
57
|
end
|
58
58
|
|
59
|
-
def
|
59
|
+
def visit_alternative(alternatives)
|
60
60
|
'(' <<
|
61
61
|
alternatives.
|
62
62
|
map { |el| el.accept(self) }.
|
@@ -88,13 +88,13 @@ class Parslet::Parser
|
|
88
88
|
# @todo is constantly filled by the visitor (see #deferred). We
|
89
89
|
# keep going until it is empty.
|
90
90
|
break if @todo.empty?
|
91
|
-
name,
|
91
|
+
name, block = @todo.shift
|
92
92
|
|
93
93
|
# Track what rules we've already seen. This breaks loops.
|
94
94
|
next if seen.include?(name)
|
95
95
|
seen << name
|
96
96
|
|
97
|
-
output << rule(name,
|
97
|
+
output << rule(name, block.call)
|
98
98
|
end
|
99
99
|
|
100
100
|
output << "end\n"
|
@@ -33,7 +33,7 @@ class Parslet::Expression::Treetop
|
|
33
33
|
rule(:char_class) {
|
34
34
|
(str('[') >>
|
35
35
|
(str('\\') >> any |
|
36
|
-
str(']').
|
36
|
+
str(']').absent? >> any).repeat(1) >>
|
37
37
|
str(']')).as(:match) >> space?
|
38
38
|
}
|
39
39
|
|
@@ -45,7 +45,7 @@ class Parslet::Expression::Treetop
|
|
45
45
|
str('\'') >>
|
46
46
|
(
|
47
47
|
(str('\\') >> any) |
|
48
|
-
(str("'").
|
48
|
+
(str("'").absent? >> any)
|
49
49
|
).repeat.as(:string) >>
|
50
50
|
str('\'') >> space?
|
51
51
|
}
|
data/lib/parslet/parser.rb
CHANGED
@@ -12,6 +12,60 @@
|
|
12
12
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
13
|
# # Don't know what to do with bbbb at line 1 char 1.
|
14
14
|
#
|
15
|
-
|
15
|
+
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
16
|
+
# fledged parsers freely with small parts of a different parser.
|
17
|
+
#
|
18
|
+
# Example:
|
19
|
+
# class ParserA < Parslet::Parser
|
20
|
+
# root :aaa
|
21
|
+
# rule(:aaa) { str('a').repeat(3,3) }
|
22
|
+
# end
|
23
|
+
# class ParserB < Parslet::Parser
|
24
|
+
# root :expression
|
25
|
+
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# In the above example, ParserB would parse something like 'baaab'.
|
29
|
+
#
|
30
|
+
class Parslet::Parser < Parslet::Atoms::Base
|
16
31
|
include Parslet
|
32
|
+
|
33
|
+
class <<self # class methods
|
34
|
+
# Define the parsers #root function. This is the place where you start
|
35
|
+
# parsing; if you have a rule for 'file' that describes what should be
|
36
|
+
# in a file, this would be your root declaration:
|
37
|
+
#
|
38
|
+
# class Parser
|
39
|
+
# root :file
|
40
|
+
# rule(:file) { ... }
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# #root declares a 'parse' function that works just like the parse
|
44
|
+
# function that you can call on a simple parslet, taking a string as input
|
45
|
+
# and producing parse output.
|
46
|
+
#
|
47
|
+
# In a way, #root is a shorthand for:
|
48
|
+
#
|
49
|
+
# def parse(str)
|
50
|
+
# your_parser_root.parse(str)
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def root(name)
|
54
|
+
define_method(:root) do
|
55
|
+
self.send(name)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def try(source, context) # :nodoc:
|
61
|
+
root.try(source, context)
|
62
|
+
end
|
63
|
+
|
64
|
+
def error_tree # :nodoc:
|
65
|
+
root.error_tree
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_s_inner(prec) # :nodoc:
|
69
|
+
root.to_s(prec)
|
70
|
+
end
|
17
71
|
end
|
data/lib/parslet/rig/rspec.rb
CHANGED
@@ -1,24 +1,50 @@
|
|
1
|
-
RSpec::Matchers.define(:parse) do |input|
|
2
|
-
chain(:as) { |as| @as = as }
|
3
|
-
|
1
|
+
RSpec::Matchers.define(:parse) do |input, opts|
|
4
2
|
match do |parser|
|
5
3
|
begin
|
6
4
|
@result = parser.parse(input)
|
7
|
-
@
|
5
|
+
@block ?
|
6
|
+
@block.call(@result) :
|
7
|
+
(@as == @result || @as.nil?)
|
8
8
|
rescue Parslet::ParseFailed
|
9
|
+
@trace = parser.error_tree.ascii_tree if opts && opts[:trace]
|
9
10
|
false
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
14
|
failure_message_for_should do |is|
|
14
|
-
|
15
|
-
"output of parsing #{input.inspect}
|
16
|
-
"#{is.inspect} to
|
15
|
+
if @block
|
16
|
+
"expected output of parsing #{input.inspect}" <<
|
17
|
+
" with #{is.inspect} to meet block conditions, but it didn't"
|
18
|
+
else
|
19
|
+
"expected " <<
|
20
|
+
(@as ?
|
21
|
+
"output of parsing #{input.inspect}"<<
|
22
|
+
" with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
|
23
|
+
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
24
|
+
(@trace ?
|
25
|
+
"\n"+@trace :
|
26
|
+
'')
|
27
|
+
end
|
17
28
|
end
|
18
29
|
|
19
30
|
failure_message_for_should_not do |is|
|
20
|
-
|
21
|
-
"output of parsing #{input.inspect} with #{is.inspect} not to
|
22
|
-
|
31
|
+
if @block
|
32
|
+
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
33
|
+
else
|
34
|
+
"expected " <<
|
35
|
+
(@as ?
|
36
|
+
"output of parsing #{input.inspect}"<<
|
37
|
+
" with #{is.inspect} not to equal #{@as.inspect}" :
|
38
|
+
|
39
|
+
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
44
|
+
# Object, a thing I would never allow.
|
45
|
+
def as(expected_output = nil, &block) # :nodoc:
|
46
|
+
@as = expected_output
|
47
|
+
@block = block
|
48
|
+
self
|
23
49
|
end
|
24
50
|
end
|