parslet 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/lib/parslet/atoms/entity.rb
CHANGED
@@ -9,12 +9,11 @@
|
|
9
9
|
# using the structuring method Parslet.rule.
|
10
10
|
#
|
11
11
|
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
12
|
-
attr_reader :name, :
|
13
|
-
def initialize(name,
|
12
|
+
attr_reader :name, :block
|
13
|
+
def initialize(name, &block) # :nodoc:
|
14
14
|
super()
|
15
15
|
|
16
16
|
@name = name
|
17
|
-
@context = context
|
18
17
|
@block = block
|
19
18
|
end
|
20
19
|
|
@@ -23,7 +22,7 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
|
23
22
|
end
|
24
23
|
|
25
24
|
def parslet
|
26
|
-
@parslet ||=
|
25
|
+
@parslet ||= @block.call.tap { |p|
|
27
26
|
raise_not_implemented unless p
|
28
27
|
}
|
29
28
|
end
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -12,8 +12,8 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
|
|
12
12
|
def initialize(match) # :nodoc:
|
13
13
|
super()
|
14
14
|
|
15
|
-
@match = match
|
16
|
-
@re = Regexp.new(match, Regexp::MULTILINE)
|
15
|
+
@match = match.to_s
|
16
|
+
@re = Regexp.new(self.match, Regexp::MULTILINE)
|
17
17
|
@error_msgs = {
|
18
18
|
:premature => "Premature end of input",
|
19
19
|
:failed => "Failed to match #{match.inspect[1..-2]}"
|
data/lib/parslet/atoms/str.rb
CHANGED
@@ -9,7 +9,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
9
9
|
def initialize(str)
|
10
10
|
super()
|
11
11
|
|
12
|
-
@str = str
|
12
|
+
@str = str.to_s
|
13
13
|
@error_msgs = {
|
14
14
|
:premature => "Premature end of input",
|
15
15
|
:failed => "Expected #{str.inspect}, but got "
|
@@ -17,6 +17,9 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def try(source, context) # :nodoc:
|
20
|
+
# NOTE: Even though it doesn't look that way, this is the hotspot, the
|
21
|
+
# contents of parslets inner loop. Changes here affect parslets speed
|
22
|
+
# enormously.
|
20
23
|
error_pos = source.pos
|
21
24
|
s = source.read(str.size)
|
22
25
|
|
@@ -26,7 +29,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
|
|
26
29
|
|
27
30
|
# Failures:
|
28
31
|
return error(source, @error_msgs[:premature]) unless s && s.size==str.size
|
29
|
-
return error(source, @error_msgs[:failed]
|
32
|
+
return error(source, [@error_msgs[:failed], s], error_pos)
|
30
33
|
end
|
31
34
|
|
32
35
|
def to_s_inner(prec) # :nodoc:
|
@@ -0,0 +1,75 @@
|
|
1
|
+
|
2
|
+
require 'parslet/atoms/visitor'
|
3
|
+
|
4
|
+
# A helper class that allows transforming one grammar into another. You can
|
5
|
+
# use this class as a base class:
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# class MyTransform < Parslet::Atoms::Transform
|
9
|
+
# def visit_str(str)
|
10
|
+
# # mangle string here
|
11
|
+
# super(str)
|
12
|
+
# end
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# Note that all the methods in a Transform must return parser atoms. The
|
16
|
+
# quickest way to do so is to call super with your own arguments. This will
|
17
|
+
# just create the same kind of atom that was just visited.
|
18
|
+
#
|
19
|
+
# In essence, this base class performs what is called an 'identity transform'
|
20
|
+
# with one small caveat: It returns a brand new grammar composed of brand new
|
21
|
+
# parser atoms. This is like a deep clone of your grammar.
|
22
|
+
#
|
23
|
+
# But nothing stops you from doing something that is far from a deep clone.
|
24
|
+
# You can totally transform the language your grammar accepts. Or maybe
|
25
|
+
# turn all repetitions into non-greedy ones? Go wild.
|
26
|
+
#
|
27
|
+
class Parslet::Atoms::Transform
|
28
|
+
# Applies a transformation to a grammar and returns a new grammar that
|
29
|
+
# is the result of the transform.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# Parslet::Atoms::Transform.new.apply(my_grammar) # => deep clone of my_grammar
|
33
|
+
#
|
34
|
+
def apply(grammar)
|
35
|
+
grammar.accept(self)
|
36
|
+
end
|
37
|
+
|
38
|
+
def visit_str(str)
|
39
|
+
Parslet.str(str)
|
40
|
+
end
|
41
|
+
|
42
|
+
def visit_sequence(parslets)
|
43
|
+
parslets[1..-1].inject(parslets[0]) { |a,p| a >> p.accept(self) }
|
44
|
+
end
|
45
|
+
|
46
|
+
def visit_re(match)
|
47
|
+
Parslet.match(match)
|
48
|
+
end
|
49
|
+
|
50
|
+
def visit_alternative(parslets)
|
51
|
+
parslets[1..-1].inject(parslets[0]) { |a,p| a | p.accept(self) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def visit_lookahead(positive, parslet)
|
55
|
+
Parslet::Atoms::Lookahead.new(positive, parslet.accept(self))
|
56
|
+
end
|
57
|
+
|
58
|
+
def visit_entity(name, block)
|
59
|
+
# NOTE: This is kinda tricky. We return a new entity that keeps a reference
|
60
|
+
# to the transformer around. Once somebody accesses the parslet in that
|
61
|
+
# entity, the original block will produce the original parslet, and then
|
62
|
+
# we transform that then and there. Its lazy and futuristic!
|
63
|
+
transformer = self
|
64
|
+
transformed_block = proc { block.call.accept(transformer) }
|
65
|
+
Parslet::Atoms::Entity.new(name, &transformed_block)
|
66
|
+
end
|
67
|
+
|
68
|
+
def visit_named(name, parslet)
|
69
|
+
parslet.accept(self).as(name)
|
70
|
+
end
|
71
|
+
|
72
|
+
def visit_repetition(min, max, parslet)
|
73
|
+
parslet.accept(self).repeat(min, max)
|
74
|
+
end
|
75
|
+
end
|
@@ -5,7 +5,7 @@
|
|
5
5
|
module Parslet::Atoms
|
6
6
|
class Base
|
7
7
|
def accept(visitor)
|
8
|
-
raise NotImplementedError, "No
|
8
|
+
raise NotImplementedError, "No #accept method on #{self.class.name}."
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
@@ -13,7 +13,7 @@ module Parslet::Atoms
|
|
13
13
|
# Call back visitors #str method. See parslet/export for an example.
|
14
14
|
#
|
15
15
|
def accept(visitor)
|
16
|
-
visitor.
|
16
|
+
visitor.visit_str(str)
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
@@ -21,7 +21,7 @@ module Parslet::Atoms
|
|
21
21
|
# Call back visitors #entity method. See parslet/export for an example.
|
22
22
|
#
|
23
23
|
def accept(visitor)
|
24
|
-
visitor.
|
24
|
+
visitor.visit_entity(name, block)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
@@ -29,7 +29,7 @@ module Parslet::Atoms
|
|
29
29
|
# Call back visitors #named method. See parslet/export for an example.
|
30
30
|
#
|
31
31
|
def accept(visitor)
|
32
|
-
visitor.
|
32
|
+
visitor.visit_named(name, parslet)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -37,7 +37,7 @@ module Parslet::Atoms
|
|
37
37
|
# Call back visitors #sequence method. See parslet/export for an example.
|
38
38
|
#
|
39
39
|
def accept(visitor)
|
40
|
-
visitor.
|
40
|
+
visitor.visit_sequence(parslets)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
@@ -45,7 +45,7 @@ module Parslet::Atoms
|
|
45
45
|
# Call back visitors #repetition method. See parslet/export for an example.
|
46
46
|
#
|
47
47
|
def accept(visitor)
|
48
|
-
visitor.
|
48
|
+
visitor.visit_repetition(min, max, parslet)
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
@@ -53,7 +53,7 @@ module Parslet::Atoms
|
|
53
53
|
# Call back visitors #alternative method. See parslet/export for an example.
|
54
54
|
#
|
55
55
|
def accept(visitor)
|
56
|
-
visitor.
|
56
|
+
visitor.visit_alternative(alternatives)
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
@@ -61,7 +61,7 @@ module Parslet::Atoms
|
|
61
61
|
# Call back visitors #lookahead method. See parslet/export for an example.
|
62
62
|
#
|
63
63
|
def accept(visitor)
|
64
|
-
visitor.
|
64
|
+
visitor.visit_lookahead(positive, bound_parslet)
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
@@ -69,7 +69,7 @@ module Parslet::Atoms
|
|
69
69
|
# Call back visitors #re method. See parslet/export for an example.
|
70
70
|
#
|
71
71
|
def accept(visitor)
|
72
|
-
visitor.
|
72
|
+
visitor.visit_re(match)
|
73
73
|
end
|
74
74
|
end
|
75
75
|
end
|
data/lib/parslet/convenience.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class Parslet::
|
1
|
+
class Parslet::Atoms::Base
|
2
2
|
|
3
3
|
# Packages the common idiom
|
4
4
|
#
|
@@ -6,7 +6,7 @@ class Parslet::Parser
|
|
6
6
|
# tree = parser.parse('something')
|
7
7
|
# rescue Parslet::ParseFailed => error
|
8
8
|
# puts error
|
9
|
-
# puts parser.
|
9
|
+
# puts parser.error_tree
|
10
10
|
# end
|
11
11
|
#
|
12
12
|
# into a convenient method.
|
@@ -27,7 +27,7 @@ class Parslet::Parser
|
|
27
27
|
parse str
|
28
28
|
rescue Parslet::ParseFailed => error
|
29
29
|
puts error
|
30
|
-
puts
|
30
|
+
puts error_tree
|
31
31
|
end
|
32
32
|
|
33
33
|
end
|
data/lib/parslet/export.rb
CHANGED
@@ -11,33 +11,33 @@ class Parslet::Parser
|
|
11
11
|
@context = context
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def visit_str(str)
|
15
15
|
"\"#{str.inspect[1..-2]}\""
|
16
16
|
end
|
17
|
-
def
|
17
|
+
def visit_re(match)
|
18
18
|
match.to_s
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
22
|
-
context.deferred(name,
|
21
|
+
def visit_entity(name, block)
|
22
|
+
context.deferred(name, block)
|
23
23
|
|
24
24
|
"(#{context.mangle_name(name)})"
|
25
25
|
end
|
26
|
-
def
|
26
|
+
def visit_named(name, parslet)
|
27
27
|
parslet.accept(self)
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def visit_sequence(parslets)
|
31
31
|
'(' <<
|
32
32
|
parslets.
|
33
33
|
map { |el| el.accept(self) }.
|
34
34
|
join(' ') <<
|
35
35
|
')'
|
36
36
|
end
|
37
|
-
def
|
37
|
+
def visit_repetition(min, max, parslet)
|
38
38
|
parslet.accept(self) << "#{min}*#{max}"
|
39
39
|
end
|
40
|
-
def
|
40
|
+
def visit_alternative(alternatives)
|
41
41
|
'(' <<
|
42
42
|
alternatives.
|
43
43
|
map { |el| el.accept(self) }.
|
@@ -45,18 +45,18 @@ class Parslet::Parser
|
|
45
45
|
')'
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
48
|
+
def visit_lookahead(positive, bound_parslet)
|
49
49
|
(positive ? '&' : '!') <<
|
50
50
|
bound_parslet.accept(self)
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
54
|
class Treetop < Citrus
|
55
|
-
def
|
55
|
+
def visit_repetition(min, max, parslet)
|
56
56
|
parslet.accept(self) << "#{min}..#{max}"
|
57
57
|
end
|
58
58
|
|
59
|
-
def
|
59
|
+
def visit_alternative(alternatives)
|
60
60
|
'(' <<
|
61
61
|
alternatives.
|
62
62
|
map { |el| el.accept(self) }.
|
@@ -88,13 +88,13 @@ class Parslet::Parser
|
|
88
88
|
# @todo is constantly filled by the visitor (see #deferred). We
|
89
89
|
# keep going until it is empty.
|
90
90
|
break if @todo.empty?
|
91
|
-
name,
|
91
|
+
name, block = @todo.shift
|
92
92
|
|
93
93
|
# Track what rules we've already seen. This breaks loops.
|
94
94
|
next if seen.include?(name)
|
95
95
|
seen << name
|
96
96
|
|
97
|
-
output << rule(name,
|
97
|
+
output << rule(name, block.call)
|
98
98
|
end
|
99
99
|
|
100
100
|
output << "end\n"
|
@@ -33,7 +33,7 @@ class Parslet::Expression::Treetop
|
|
33
33
|
rule(:char_class) {
|
34
34
|
(str('[') >>
|
35
35
|
(str('\\') >> any |
|
36
|
-
str(']').
|
36
|
+
str(']').absent? >> any).repeat(1) >>
|
37
37
|
str(']')).as(:match) >> space?
|
38
38
|
}
|
39
39
|
|
@@ -45,7 +45,7 @@ class Parslet::Expression::Treetop
|
|
45
45
|
str('\'') >>
|
46
46
|
(
|
47
47
|
(str('\\') >> any) |
|
48
|
-
(str("'").
|
48
|
+
(str("'").absent? >> any)
|
49
49
|
).repeat.as(:string) >>
|
50
50
|
str('\'') >> space?
|
51
51
|
}
|
data/lib/parslet/parser.rb
CHANGED
@@ -12,6 +12,60 @@
|
|
12
12
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
13
|
# # Don't know what to do with bbbb at line 1 char 1.
|
14
14
|
#
|
15
|
-
|
15
|
+
# Parslet::Parser is also a grammar atom. This means that you can mix full
|
16
|
+
# fledged parsers freely with small parts of a different parser.
|
17
|
+
#
|
18
|
+
# Example:
|
19
|
+
# class ParserA < Parslet::Parser
|
20
|
+
# root :aaa
|
21
|
+
# rule(:aaa) { str('a').repeat(3,3) }
|
22
|
+
# end
|
23
|
+
# class ParserB < Parslet::Parser
|
24
|
+
# root :expression
|
25
|
+
# rule(:expression) { str('b') >> ParserA.new >> str('b') }
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# In the above example, ParserB would parse something like 'baaab'.
|
29
|
+
#
|
30
|
+
class Parslet::Parser < Parslet::Atoms::Base
|
16
31
|
include Parslet
|
32
|
+
|
33
|
+
class <<self # class methods
|
34
|
+
# Define the parsers #root function. This is the place where you start
|
35
|
+
# parsing; if you have a rule for 'file' that describes what should be
|
36
|
+
# in a file, this would be your root declaration:
|
37
|
+
#
|
38
|
+
# class Parser
|
39
|
+
# root :file
|
40
|
+
# rule(:file) { ... }
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# #root declares a 'parse' function that works just like the parse
|
44
|
+
# function that you can call on a simple parslet, taking a string as input
|
45
|
+
# and producing parse output.
|
46
|
+
#
|
47
|
+
# In a way, #root is a shorthand for:
|
48
|
+
#
|
49
|
+
# def parse(str)
|
50
|
+
# your_parser_root.parse(str)
|
51
|
+
# end
|
52
|
+
#
|
53
|
+
def root(name)
|
54
|
+
define_method(:root) do
|
55
|
+
self.send(name)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def try(source, context) # :nodoc:
|
61
|
+
root.try(source, context)
|
62
|
+
end
|
63
|
+
|
64
|
+
def error_tree # :nodoc:
|
65
|
+
root.error_tree
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_s_inner(prec) # :nodoc:
|
69
|
+
root.to_s(prec)
|
70
|
+
end
|
17
71
|
end
|
data/lib/parslet/rig/rspec.rb
CHANGED
@@ -1,24 +1,50 @@
|
|
1
|
-
RSpec::Matchers.define(:parse) do |input|
|
2
|
-
chain(:as) { |as| @as = as }
|
3
|
-
|
1
|
+
RSpec::Matchers.define(:parse) do |input, opts|
|
4
2
|
match do |parser|
|
5
3
|
begin
|
6
4
|
@result = parser.parse(input)
|
7
|
-
@
|
5
|
+
@block ?
|
6
|
+
@block.call(@result) :
|
7
|
+
(@as == @result || @as.nil?)
|
8
8
|
rescue Parslet::ParseFailed
|
9
|
+
@trace = parser.error_tree.ascii_tree if opts && opts[:trace]
|
9
10
|
false
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
14
|
failure_message_for_should do |is|
|
14
|
-
|
15
|
-
"output of parsing #{input.inspect}
|
16
|
-
"#{is.inspect} to
|
15
|
+
if @block
|
16
|
+
"expected output of parsing #{input.inspect}" <<
|
17
|
+
" with #{is.inspect} to meet block conditions, but it didn't"
|
18
|
+
else
|
19
|
+
"expected " <<
|
20
|
+
(@as ?
|
21
|
+
"output of parsing #{input.inspect}"<<
|
22
|
+
" with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
|
23
|
+
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
24
|
+
(@trace ?
|
25
|
+
"\n"+@trace :
|
26
|
+
'')
|
27
|
+
end
|
17
28
|
end
|
18
29
|
|
19
30
|
failure_message_for_should_not do |is|
|
20
|
-
|
21
|
-
"output of parsing #{input.inspect} with #{is.inspect} not to
|
22
|
-
|
31
|
+
if @block
|
32
|
+
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
33
|
+
else
|
34
|
+
"expected " <<
|
35
|
+
(@as ?
|
36
|
+
"output of parsing #{input.inspect}"<<
|
37
|
+
" with #{is.inspect} not to equal #{@as.inspect}" :
|
38
|
+
|
39
|
+
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
44
|
+
# Object, a thing I would never allow.
|
45
|
+
def as(expected_output = nil, &block) # :nodoc:
|
46
|
+
@as = expected_output
|
47
|
+
@block = block
|
48
|
+
self
|
23
49
|
end
|
24
50
|
end
|