parslet 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +14 -0
- data/README +1 -3
- data/Rakefile +2 -2
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/ip_address.rb +125 -0
- data/example/minilisp.rb +101 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +36 -0
- data/example/seasons.rb +45 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +57 -0
- data/example/string_parser.rb +75 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +2 -3
- data/lib/parslet/atoms/base.rb +56 -19
- data/lib/parslet/atoms/lookahead.rb +12 -9
- data/lib/parslet/atoms/re.rb +3 -3
- data/lib/parslet/atoms/repetition.rb +15 -16
- data/lib/parslet/atoms/sequence.rb +7 -5
- data/lib/parslet/pattern.rb +2 -1
- data/lib/parslet/rig/rspec.rb +24 -0
- metadata +16 -3
data/HISTORY.txt
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
= 1.1.0 / ???
|
2
|
+
|
3
|
+
+ Uses throw/catch internally for an order of magnitude increase in execution
|
4
|
+
speed.
|
5
|
+
|
6
|
+
= 1.0.1 / 17Jan2011
|
7
|
+
|
8
|
+
A happy new year!
|
9
|
+
|
10
|
+
! FIX: Parslet::Transform was wrongly fixed earlier - it now wont mangle
|
11
|
+
hashes anymore. (Blake Sweeney)
|
12
|
+
|
13
|
+
+ parslet/rig/rspec.rb contains useful rspec matchers. (R. Konstantin Haase)
|
14
|
+
|
1
15
|
= 1.0.0 / 29Dez2010
|
2
16
|
|
3
17
|
- #each_match was removed. There was some duplication of code that even
|
data/README
CHANGED
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ spec = Gem::Specification.new do |s|
|
|
18
18
|
|
19
19
|
# Change these as appropriate
|
20
20
|
s.name = "parslet"
|
21
|
-
s.version = "1.0.
|
21
|
+
s.version = "1.0.1"
|
22
22
|
s.summary = "Parser construction library with great error reporting in Ruby."
|
23
23
|
s.author = "Kaspar Schiess"
|
24
24
|
s.email = "kaspar.schiess@absurd.li"
|
@@ -29,7 +29,7 @@ spec = Gem::Specification.new do |s|
|
|
29
29
|
s.rdoc_options = %w(--main README)
|
30
30
|
|
31
31
|
# Add any extra files to include in the gem
|
32
|
-
s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{
|
32
|
+
s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
|
33
33
|
s.require_paths = ["lib"]
|
34
34
|
|
35
35
|
# If you want to depend on other gems, add them here, along with any
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# A small example that shows a really small parser and what happens on parser
|
2
|
+
# errors.
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
class MyParser < Parslet::Parser
|
10
|
+
rule(:a) { str('a').repeat }
|
11
|
+
|
12
|
+
def parse(str)
|
13
|
+
a.parse(str)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
pp MyParser.new.parse('aaaa')
|
18
|
+
pp MyParser.new.parse('bbbb')
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Example contributed by Hal Brodigan (postmodern). Thanks!
|
4
|
+
|
5
|
+
require 'parslet'
|
6
|
+
|
7
|
+
class EmailParser < Parslet::Parser
|
8
|
+
rule(:space) { match('\s').repeat(1) }
|
9
|
+
rule(:space?) { space.maybe }
|
10
|
+
rule(:dash?) { match['_-'].maybe }
|
11
|
+
|
12
|
+
rule(:at) {
|
13
|
+
str('@') |
|
14
|
+
(dash? >> (str('at') | str('AT')) >> dash?)
|
15
|
+
}
|
16
|
+
rule(:dot) {
|
17
|
+
str('.') |
|
18
|
+
(dash? >> (str('dot') | str('DOT')) >> dash?)
|
19
|
+
}
|
20
|
+
|
21
|
+
rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
|
22
|
+
rule(:separator) { space? >> dot.as(:dot) >> space? | space }
|
23
|
+
rule(:words) { word >> (separator >> word).repeat }
|
24
|
+
|
25
|
+
rule(:email) {
|
26
|
+
(words >> space? >> at.as(:at) >> space? >> words).as(:email)
|
27
|
+
}
|
28
|
+
|
29
|
+
root(:email)
|
30
|
+
end
|
31
|
+
|
32
|
+
class EmailSanitizer < Parslet::Transform
|
33
|
+
rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
|
34
|
+
rule(:at => simple(:at)) { '@' }
|
35
|
+
rule(:word => simple(:word)) { word }
|
36
|
+
rule(:email => sequence(:email)) { email.join }
|
37
|
+
end
|
38
|
+
|
39
|
+
parser = EmailParser.new
|
40
|
+
sanitizer = EmailSanitizer.new
|
41
|
+
|
42
|
+
unless ARGV[0]
|
43
|
+
STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
|
44
|
+
exit -1
|
45
|
+
end
|
46
|
+
|
47
|
+
begin
|
48
|
+
puts sanitizer.apply(parser.parse(ARGV[0]))
|
49
|
+
rescue Parslet::ParseFailed => error
|
50
|
+
puts error
|
51
|
+
puts parser.root.error_tree
|
52
|
+
end
|
data/example/empty.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Basically just demonstrates that you can leave rules empty and get a nice
|
2
|
+
# NotImplementedError. A way to quickly spec out your parser rules?
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'parslet'
|
7
|
+
|
8
|
+
class Parser < Parslet::Parser
|
9
|
+
rule(:empty) { }
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
Parser.new.empty.parslet
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# This example is heavily inspired by citrus' ip.citrus. Have a look at both
|
2
|
+
# of these to get some choice!
|
3
|
+
|
4
|
+
# The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
|
5
|
+
# Uniform Resource Identifier (URI): Generic Syntax.
|
6
|
+
#
|
7
|
+
# See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
|
8
|
+
|
9
|
+
$:.unshift '../lib'
|
10
|
+
|
11
|
+
require 'pp'
|
12
|
+
require 'parslet'
|
13
|
+
|
14
|
+
module IPv4
|
15
|
+
include Parslet
|
16
|
+
|
17
|
+
# A host identified by an IPv4 literal address is represented in
|
18
|
+
# dotted-decimal notation (a sequence of four decimal numbers in the range 0
|
19
|
+
# to 255, separated by "."), as described in [RFC1123] by reference to
|
20
|
+
# [RFC0952]. Note that other forms of dotted notation may be interpreted on
|
21
|
+
# some platforms, as described in Section 7.4, but only the dotted-decimal
|
22
|
+
# form of four octets is allowed by this grammar.
|
23
|
+
rule(:ipv4) {
|
24
|
+
(dec_octet >> str('.') >> dec_octet >> str('.') >>
|
25
|
+
dec_octet >> str('.') >> dec_octet).as(:ipv4)
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:dec_octet) {
|
29
|
+
str('25') >> match("[0-5]") |
|
30
|
+
str('2') >> match("[0-4]") >> digit |
|
31
|
+
str('1') >> digit >> digit |
|
32
|
+
match('[1-9]') >> digit |
|
33
|
+
digit
|
34
|
+
}
|
35
|
+
|
36
|
+
rule(:digit) {
|
37
|
+
match('[0-9]')
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Must be used in concert with IPv4
|
42
|
+
module IPv6
|
43
|
+
include Parslet
|
44
|
+
|
45
|
+
rule(:colon) { str(':') }
|
46
|
+
rule(:dcolon) { colon >> colon }
|
47
|
+
|
48
|
+
# h16 :
|
49
|
+
def h16r(times)
|
50
|
+
(h16 >> colon).repeat(times, times)
|
51
|
+
end
|
52
|
+
|
53
|
+
# : h16
|
54
|
+
def h16l(times)
|
55
|
+
(colon >> h16).repeat(0,times)
|
56
|
+
end
|
57
|
+
|
58
|
+
# A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
|
59
|
+
# represented numerically in case-insensitive hexadecimal, using one to four
|
60
|
+
# hexadecimal digits (leading zeroes are permitted). The eight encoded
|
61
|
+
# pieces are given most-significant first, separated by colon characters.
|
62
|
+
# Optionally, the least-significant two pieces may instead be represented in
|
63
|
+
# IPv4 address textual format. A sequence of one or more consecutive
|
64
|
+
# zero-valued 16-bit pieces within the address may be elided, omitting all
|
65
|
+
# their digits and leaving exactly two consecutive colons in their place to
|
66
|
+
# mark the elision.
|
67
|
+
rule(:ipv6) {
|
68
|
+
(
|
69
|
+
(
|
70
|
+
h16r(6) |
|
71
|
+
dcolon >> h16r(5) |
|
72
|
+
h16.maybe >> dcolon >> h16r(4) |
|
73
|
+
(h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
|
74
|
+
(h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
|
75
|
+
(h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
|
76
|
+
(h16 >> h16l(4)).maybe >> dcolon
|
77
|
+
) >> ls32 |
|
78
|
+
(h16 >> h16l(5)).maybe >> dcolon >> h16 |
|
79
|
+
(h16 >> h16l(6)).maybe >> dcolon
|
80
|
+
).as(:ipv6)
|
81
|
+
}
|
82
|
+
|
83
|
+
rule(:h16) {
|
84
|
+
hexdigit.repeat(1,4)
|
85
|
+
}
|
86
|
+
|
87
|
+
rule(:ls32) {
|
88
|
+
(h16 >> colon >> h16) |
|
89
|
+
ipv4
|
90
|
+
}
|
91
|
+
|
92
|
+
rule(:hexdigit) {
|
93
|
+
digit | match("[a-fA-F]")
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
class Parser
|
98
|
+
include IPv4
|
99
|
+
include IPv6
|
100
|
+
|
101
|
+
def parse(str)
|
102
|
+
(ipv4 | ipv6).parse(str)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
%W(
|
107
|
+
0.0.0.0
|
108
|
+
255.255.255.255
|
109
|
+
255.255.255
|
110
|
+
1:2:3:4:5:6:7:8
|
111
|
+
12AD:34FC:A453:1922::
|
112
|
+
12AD::34FC
|
113
|
+
12AD::
|
114
|
+
::
|
115
|
+
1:2
|
116
|
+
).each do |address|
|
117
|
+
parser = Parser.new
|
118
|
+
printf "%30s -> ", address
|
119
|
+
begin
|
120
|
+
result = parser.parse(address)
|
121
|
+
puts result.inspect
|
122
|
+
rescue Parslet::ParseFailed => m
|
123
|
+
puts "Failed: #{m}"
|
124
|
+
end
|
125
|
+
end
|
data/example/minilisp.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# Reproduces [1] using parslet.
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
module MiniLisp
|
10
|
+
class Parser < Parslet::Parser
|
11
|
+
root :expression
|
12
|
+
rule(:expression) {
|
13
|
+
space? >> str('(') >> space? >> body >> str(')')
|
14
|
+
}
|
15
|
+
|
16
|
+
rule(:body) {
|
17
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
18
|
+
}
|
19
|
+
|
20
|
+
rule(:space) {
|
21
|
+
match('\s').repeat(1)
|
22
|
+
}
|
23
|
+
rule(:space?) {
|
24
|
+
space.maybe
|
25
|
+
}
|
26
|
+
|
27
|
+
rule(:identifier) {
|
28
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
29
|
+
}
|
30
|
+
|
31
|
+
rule(:float) {
|
32
|
+
(
|
33
|
+
integer >> (
|
34
|
+
str('.') >> match('[0-9]').repeat(1) |
|
35
|
+
str('e') >> match('[0-9]').repeat(1)
|
36
|
+
).as(:e)
|
37
|
+
).as(:float) >> space?
|
38
|
+
}
|
39
|
+
|
40
|
+
rule(:integer) {
|
41
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:string) {
|
45
|
+
str('"') >> (
|
46
|
+
str('\\') >> any |
|
47
|
+
str('"').absnt? >> any
|
48
|
+
).repeat.as(:string) >> str('"') >> space?
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
class Transform
|
53
|
+
include Parslet
|
54
|
+
|
55
|
+
attr_reader :t
|
56
|
+
def initialize
|
57
|
+
@t = Parslet::Transform.new
|
58
|
+
|
59
|
+
# To understand these, take a look at what comes out of the parser.
|
60
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
61
|
+
|
62
|
+
t.rule(:string => simple(:str)) { str }
|
63
|
+
|
64
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
65
|
+
|
66
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
67
|
+
|
68
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
69
|
+
end
|
70
|
+
|
71
|
+
def do(tree)
|
72
|
+
t.apply(tree)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
parser = MiniLisp::Parser.new
|
78
|
+
transform = MiniLisp::Transform.new
|
79
|
+
|
80
|
+
# Parse stage
|
81
|
+
begin
|
82
|
+
result = parser.parse %Q{
|
83
|
+
(define test (lambda ()
|
84
|
+
(begin
|
85
|
+
(display "something")
|
86
|
+
(display 1)
|
87
|
+
(display 3.08))))
|
88
|
+
(test)
|
89
|
+
}
|
90
|
+
rescue Parslet::ParseFailed => failure
|
91
|
+
puts failure
|
92
|
+
puts parser.root.error_tree if parser.root.cause
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
|
96
|
+
# Transform the result
|
97
|
+
pp transform.do(result)
|
98
|
+
|
99
|
+
# Thereby reducing it to the earlier problem:
|
100
|
+
# http://github.com/kschiess/toylisp
|
101
|
+
|
data/example/parens.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# A small example that demonstrates the power of tree pattern matching. Also
|
2
|
+
# uses '.as(:name)' to construct a tree that can reliably be matched
|
3
|
+
# afterwards.
|
4
|
+
|
5
|
+
$:.unshift '../lib'
|
6
|
+
|
7
|
+
require 'pp'
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
module LISP # as in 'lots of insipid and stupid parenthesis'
|
11
|
+
class Parser < Parslet::Parser
|
12
|
+
rule(:balanced) {
|
13
|
+
str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
14
|
+
}
|
15
|
+
|
16
|
+
root(:balanced)
|
17
|
+
end
|
18
|
+
|
19
|
+
class Transform < Parslet::Transform
|
20
|
+
rule(:l => '(', :m => simple(:x), :r => ')') {
|
21
|
+
# innermost :m will contain nil
|
22
|
+
x.nil? ? 1 : x+1
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
parser = LISP::Parser.new
|
28
|
+
transform = LISP::Transform.new
|
29
|
+
%w!
|
30
|
+
()
|
31
|
+
(())
|
32
|
+
((((()))))
|
33
|
+
((())
|
34
|
+
!.each do |pexp|
|
35
|
+
begin
|
36
|
+
result = parser.parse(pexp)
|
37
|
+
puts "#{"%20s"%pexp}: #{result.inspect} (#{transform.apply(result)} parens)"
|
38
|
+
rescue Parslet::ParseFailed => m
|
39
|
+
puts "#{"%20s"%pexp}: #{m}"
|
40
|
+
end
|
41
|
+
puts
|
42
|
+
end
|
data/example/readme.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# The example from the readme. With this, I am making sure that the readme
|
2
|
+
# 'works'. Is this too messy?
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
include Parslet
|
9
|
+
|
10
|
+
require 'parslet'
|
11
|
+
include Parslet
|
12
|
+
|
13
|
+
# Constructs a parser using a Parser Expression Grammar like DSL:
|
14
|
+
parser = str('"') >>
|
15
|
+
(
|
16
|
+
str('\\') >> any |
|
17
|
+
str('"').absnt? >> any
|
18
|
+
).repeat.as(:string) >>
|
19
|
+
str('"')
|
20
|
+
|
21
|
+
# Parse the string and capture parts of the interpretation (:string above)
|
22
|
+
tree = parser.parse(%Q{
|
23
|
+
"This is a \\"String\\" in which you can escape stuff"
|
24
|
+
}.strip)
|
25
|
+
|
26
|
+
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
27
|
+
|
28
|
+
# Here's how you can grab results from that tree:
|
29
|
+
|
30
|
+
# 1)
|
31
|
+
transform = Parslet::Transform.new do
|
32
|
+
rule(:string => simple(:x)) {
|
33
|
+
puts "String contents: #{x}" }
|
34
|
+
end
|
35
|
+
transform.apply(tree)
|
36
|
+
|
data/example/seasons.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
$:.unshift '../lib/'
|
2
|
+
require 'parslet'
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
tree = {:bud => {:stem => []}}
|
6
|
+
|
7
|
+
class Spring < Parslet::Transform
|
8
|
+
rule(:stem => sequence(:branches)) {
|
9
|
+
{:stem => (branches + [{:branch => :leaf}])}
|
10
|
+
}
|
11
|
+
end
|
12
|
+
class Summer < Parslet::Transform
|
13
|
+
rule(:stem => subtree(:branches)) {
|
14
|
+
new_branches = branches.map { |b| {:branch => [:leaf, :flower]} }
|
15
|
+
{:stem => new_branches}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
class Fall < Parslet::Transform
|
19
|
+
rule(:branch => sequence(:x)) {
|
20
|
+
x.each { |e| puts "Fruit!" if e==:flower }
|
21
|
+
x.each { |e| puts "Falling Leaves!" if e==:leaf }
|
22
|
+
{:branch => []}
|
23
|
+
}
|
24
|
+
end
|
25
|
+
class Winter < Parslet::Transform
|
26
|
+
rule(:stem => subtree(:x)) {
|
27
|
+
{:stem => []}
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def do_seasons(tree)
|
32
|
+
[Spring, Summer, Fall, Winter].each do |season|
|
33
|
+
p "And when #{season} comes"
|
34
|
+
tree = season.new.apply(tree)
|
35
|
+
pp tree
|
36
|
+
puts
|
37
|
+
end
|
38
|
+
tree
|
39
|
+
end
|
40
|
+
|
41
|
+
# What marvel of life!
|
42
|
+
tree = do_seasons(tree)
|
43
|
+
tree = do_seasons(tree)
|
44
|
+
|
45
|
+
|
data/example/simple.lit
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# A simple xml parser. It is simple in the respect as that it doesn't address
|
2
|
+
# any of the complexities of XML. This is ruby 1.9.
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
module XML
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
root :document
|
13
|
+
|
14
|
+
rule(:document) {
|
15
|
+
tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
|
16
|
+
text
|
17
|
+
}
|
18
|
+
|
19
|
+
# Perhaps we could have some syntax sugar to make this more easy?
|
20
|
+
#
|
21
|
+
def tag(opts={})
|
22
|
+
close = opts[:close] || false
|
23
|
+
|
24
|
+
parslet = str('<')
|
25
|
+
parslet = parslet >> str('/') if close
|
26
|
+
parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
|
27
|
+
parslet = parslet >> str('>')
|
28
|
+
|
29
|
+
parslet
|
30
|
+
end
|
31
|
+
|
32
|
+
rule(:text) {
|
33
|
+
match('[^<>]').repeat(0)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def check(xml)
|
38
|
+
include XML
|
39
|
+
r=parse(xml)
|
40
|
+
|
41
|
+
# We'll validate the tree by reducing valid pairs of tags into simply the
|
42
|
+
# string "verified". If the transformation ends on a string, then the
|
43
|
+
# document was 'valid'.
|
44
|
+
#
|
45
|
+
t = Parslet::Transform.new do
|
46
|
+
rule(
|
47
|
+
o: {name: simple(:tag)},
|
48
|
+
c: {name: simple(:tag)},
|
49
|
+
i: simple(:t)
|
50
|
+
) { 'verified' }
|
51
|
+
end
|
52
|
+
|
53
|
+
t.apply(r)
|
54
|
+
end
|
55
|
+
|
56
|
+
pp check("<a><b>some text in the tags</b></a>")
|
57
|
+
pp check("<b><b>some text in the tags</b></a>")
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# A more complex parser that illustrates how a compiler might be constructed.
|
2
|
+
# The parser recognizes strings and integer literals and constructs almost a
|
3
|
+
# useful AST from the file contents.
|
4
|
+
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
$:.unshift '../lib/'
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
class LiteralsParser < Parslet::Parser
|
13
|
+
rule :space do
|
14
|
+
(match '[ ]').repeat(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
rule :literals do
|
18
|
+
(literal >> eol).repeat
|
19
|
+
end
|
20
|
+
|
21
|
+
rule :literal do
|
22
|
+
(integer | string).as(:literal) >> space.maybe
|
23
|
+
end
|
24
|
+
|
25
|
+
rule :string do
|
26
|
+
str('"') >>
|
27
|
+
(
|
28
|
+
(str('\\') >> any) |
|
29
|
+
(str('"').absnt? >> any)
|
30
|
+
).repeat.as(:string) >>
|
31
|
+
str('"')
|
32
|
+
end
|
33
|
+
|
34
|
+
rule :integer do
|
35
|
+
match('[0-9]').repeat(1).as(:integer)
|
36
|
+
end
|
37
|
+
|
38
|
+
rule :eol do
|
39
|
+
line_end.repeat(1)
|
40
|
+
end
|
41
|
+
|
42
|
+
rule :line_end do
|
43
|
+
crlf >> space.maybe
|
44
|
+
end
|
45
|
+
|
46
|
+
rule :crlf do
|
47
|
+
match('[\r\n]').repeat(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
root :literals
|
51
|
+
end
|
52
|
+
|
53
|
+
parsetree = LiteralsParser.new.parse(
|
54
|
+
File.read('simple.lit'))
|
55
|
+
|
56
|
+
class Lit < Struct.new(:text)
|
57
|
+
def to_s
|
58
|
+
text.inspect
|
59
|
+
end
|
60
|
+
end
|
61
|
+
class StringLit < Lit
|
62
|
+
end
|
63
|
+
class IntLit < Lit
|
64
|
+
def to_s
|
65
|
+
text
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
transform = Parslet::Transform.new do
|
70
|
+
rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
|
71
|
+
rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
|
72
|
+
end
|
73
|
+
|
74
|
+
ast = transform.apply(parsetree)
|
75
|
+
pp ast
|
data/example/test.lit
ADDED
data/lib/parslet.rb
CHANGED
@@ -30,10 +30,9 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
|
30
30
|
|
31
31
|
def try(io) # :nodoc:
|
32
32
|
alternatives.each { |a|
|
33
|
-
|
33
|
+
catch(:error) {
|
34
34
|
return a.apply(io)
|
35
|
-
|
36
|
-
end
|
35
|
+
}
|
37
36
|
}
|
38
37
|
# If we reach this point, all alternatives have failed.
|
39
38
|
error(io, "Expected one of #{alternatives.inspect}.")
|
data/lib/parslet/atoms/base.rb
CHANGED
@@ -13,7 +13,18 @@ class Parslet::Atoms::Base
|
|
13
13
|
io = StringIO.new(io)
|
14
14
|
end
|
15
15
|
|
16
|
-
result =
|
16
|
+
result = nil
|
17
|
+
error_message_or_success = catch(:error) {
|
18
|
+
result = apply(io)
|
19
|
+
:success
|
20
|
+
}
|
21
|
+
|
22
|
+
# If we didn't succeed the parse, raise an exception for the user.
|
23
|
+
# Stack trace will be off, but the error tree should explain the reason
|
24
|
+
# it failed.
|
25
|
+
if error_message_or_success != :success
|
26
|
+
raise Parslet::ParseFailed, error_message_or_success
|
27
|
+
end
|
17
28
|
|
18
29
|
# If we haven't consumed the input, then the pattern doesn't match. Try
|
19
30
|
# to provide a good error message (even asking down below)
|
@@ -22,30 +33,49 @@ class Parslet::Atoms::Base
|
|
22
33
|
# error to fail with. Otherwise just report that we cannot consume the
|
23
34
|
# input.
|
24
35
|
if cause
|
25
|
-
|
36
|
+
# Don't garnish the real cause; but the exception is different anyway.
|
37
|
+
raise Parslet::ParseFailed,
|
38
|
+
"Unconsumed input, maybe because of this: #{cause}"
|
26
39
|
else
|
27
|
-
|
40
|
+
parse_failed(
|
41
|
+
format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
|
28
42
|
end
|
29
43
|
end
|
30
44
|
|
31
45
|
return flatten(result)
|
32
46
|
end
|
33
47
|
|
48
|
+
#---
|
49
|
+
# Calls the #try method of this parslet. In case of a parse error, apply
|
50
|
+
# leaves the io in the state it was before the attempt.
|
51
|
+
#+++
|
34
52
|
def apply(io) # :nodoc:
|
35
53
|
# p [:start, self, io.string[io.pos, 10]]
|
36
54
|
|
37
55
|
old_pos = io.pos
|
38
56
|
|
39
57
|
# p [:try, self, io.string[io.pos, 20]]
|
40
|
-
|
58
|
+
message = catch(:error) {
|
41
59
|
r = try(io)
|
42
60
|
# p [:return_from, self, r, flatten(r)]
|
61
|
+
|
62
|
+
# This has just succeeded, so last_cause must be empty
|
43
63
|
@last_cause = nil
|
44
64
|
return r
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
65
|
+
}
|
66
|
+
|
67
|
+
# We only reach this point if the parse has failed. message is not nil.
|
68
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
69
|
+
|
70
|
+
io.pos = old_pos
|
71
|
+
throw :error, message
|
72
|
+
end
|
73
|
+
|
74
|
+
# Override this in your Atoms::Base subclasses to implement parsing
|
75
|
+
# behaviour.
|
76
|
+
#
|
77
|
+
def try(io)
|
78
|
+
raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
|
49
79
|
end
|
50
80
|
|
51
81
|
# Construct a new atom that repeats the current atom min times at least and
|
@@ -126,6 +156,9 @@ class Parslet::Atoms::Base
|
|
126
156
|
Parslet::Atoms::Named.new(self, name)
|
127
157
|
end
|
128
158
|
|
159
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
160
|
+
# value for the user by dropping things and merging hashes.
|
161
|
+
#
|
129
162
|
def flatten(value) # :nodoc:
|
130
163
|
# Passes through everything that isn't an array of things
|
131
164
|
return value unless value.instance_of? Array
|
@@ -225,31 +258,35 @@ class Parslet::Atoms::Base
|
|
225
258
|
# with #cause.
|
226
259
|
#
|
227
260
|
def error_tree
|
228
|
-
Parslet::ErrorTree.new(self)
|
261
|
+
Parslet::ErrorTree.new(self)
|
229
262
|
end
|
230
263
|
def cause? # :nodoc:
|
231
264
|
not @last_cause.nil?
|
232
265
|
end
|
233
266
|
private
|
267
|
+
# TODO comments!!!
|
234
268
|
# Report/raise a parse error with the given message, printing the current
|
235
269
|
# position as well. Appends 'at line X char Y.' to the message you give.
|
236
270
|
# If +pos+ is given, it is used as the real position the error happened,
|
237
271
|
# correcting the io's current position.
|
238
272
|
#
|
239
273
|
def error(io, str, pos=nil)
|
274
|
+
@last_cause = format_cause(io, str, pos)
|
275
|
+
throw :error, @last_cause
|
276
|
+
end
|
277
|
+
def parse_failed(str)
|
278
|
+
@last_cause = str
|
279
|
+
raise Parslet::ParseFailed,
|
280
|
+
@last_cause
|
281
|
+
end
|
282
|
+
def format_cause(io, str, pos=nil)
|
240
283
|
pre = io.string[0..(pos||io.pos)]
|
241
284
|
lines = Array(pre.lines)
|
242
285
|
|
243
|
-
if lines.empty?
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
248
|
-
end
|
249
|
-
|
250
|
-
@last_cause = formatted_cause
|
251
|
-
|
252
|
-
raise Parslet::ParseFailed, formatted_cause, nil
|
286
|
+
return str if lines.empty?
|
287
|
+
|
288
|
+
pos = lines.last.length
|
289
|
+
return "#{str} at line #{lines.count} char #{pos}."
|
253
290
|
end
|
254
291
|
def warn_about_duplicate_keys(h1, h2)
|
255
292
|
d = h1.keys & h2.keys
|
@@ -16,27 +16,30 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
16
16
|
|
17
17
|
def try(io) # :nodoc:
|
18
18
|
pos = io.pos
|
19
|
-
|
19
|
+
|
20
|
+
failed = true
|
21
|
+
catch(:error) {
|
20
22
|
bound_parslet.apply(io)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
failed = false
|
24
|
+
}
|
25
|
+
return failed ? fail(io) : success(io)
|
26
|
+
|
27
|
+
ensure
|
28
|
+
io.pos = pos
|
27
29
|
end
|
28
30
|
|
31
|
+
# TODO Both of these will produce results that could be reduced easily.
|
32
|
+
# Maybe do some shortcut reducing here?
|
29
33
|
def fail(io) # :nodoc:
|
30
34
|
if positive
|
31
35
|
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
32
36
|
else
|
33
|
-
# TODO: Squash this down to nothing? Return value handling here...
|
34
37
|
return nil
|
35
38
|
end
|
36
39
|
end
|
37
40
|
def success(io) # :nodoc:
|
38
41
|
if positive
|
39
|
-
return nil
|
42
|
+
return nil
|
40
43
|
else
|
41
44
|
error(
|
42
45
|
io,
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -8,16 +8,16 @@
|
|
8
8
|
# match('\s') # like regexps: matches space characters
|
9
9
|
#
|
10
10
|
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
|
-
attr_reader :match
|
11
|
+
attr_reader :match, :re
|
12
12
|
def initialize(match) # :nodoc:
|
13
13
|
@match = match
|
14
|
+
@re = Regexp.new(match, Regexp::MULTILINE)
|
14
15
|
end
|
15
16
|
|
16
17
|
def try(io) # :nodoc:
|
17
|
-
r = Regexp.new(match, Regexp::MULTILINE)
|
18
18
|
s = io.read(1)
|
19
19
|
error(io, "Premature end of input") unless s
|
20
|
-
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(
|
20
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
|
21
21
|
return s
|
22
22
|
end
|
23
23
|
|
@@ -17,22 +17,21 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
|
17
17
|
def try(io) # :nodoc:
|
18
18
|
occ = 0
|
19
19
|
result = [@tag] # initialize the result array with the tag (for flattening)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
end
|
20
|
+
catch(:error) {
|
21
|
+
result << parslet.apply(io)
|
22
|
+
occ += 1
|
23
|
+
|
24
|
+
# If we're not greedy (max is defined), check if that has been
|
25
|
+
# reached.
|
26
|
+
return result if max && occ>=max
|
27
|
+
redo
|
28
|
+
}
|
29
|
+
|
30
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
31
|
+
# contain the number of sucesses) is in {min, max}.
|
32
|
+
# p [:repetition, occ, min, max]
|
33
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
34
|
+
return result
|
36
35
|
end
|
37
36
|
|
38
37
|
precedence REPETITION
|
@@ -16,12 +16,14 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def try(io) # :nodoc:
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
catch(:error) {
|
20
|
+
return [:sequence]+parslets.map { |p|
|
21
|
+
# Save each parslet as potentially offending (raising an error).
|
22
|
+
@offending_parslet = p
|
23
|
+
p.apply(io)
|
24
|
+
}
|
23
25
|
}
|
24
|
-
|
26
|
+
|
25
27
|
error(io, "Failed to match sequence (#{self.inspect})")
|
26
28
|
end
|
27
29
|
|
data/lib/parslet/pattern.rb
CHANGED
@@ -99,7 +99,8 @@ class Parslet::Pattern
|
|
99
99
|
end
|
100
100
|
|
101
101
|
def element_match_hash(tree, exp, bindings)
|
102
|
-
#
|
102
|
+
# Early failure when not all of the hash keys are matched.
|
103
|
+
return false unless exp.keys == tree.keys
|
103
104
|
|
104
105
|
# We iterate over expected pattern, since we demand that the keys that
|
105
106
|
# are there should be in tree as well.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
RSpec::Matchers.define(:parse) do |input|
|
2
|
+
chain(:as) { |as| @as = as }
|
3
|
+
|
4
|
+
match do |parser|
|
5
|
+
begin
|
6
|
+
@result = parser.parse(input)
|
7
|
+
@as == @result or @as.nil?
|
8
|
+
rescue Parslet::ParseFailed
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
failure_message_for_should do |is|
|
14
|
+
"expected " << (@result ?
|
15
|
+
"output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
|
16
|
+
"expected #{is.inspect} to be able to parse #{input.inspect}")
|
17
|
+
end
|
18
|
+
|
19
|
+
failure_message_for_should_not do |is|
|
20
|
+
"expected " << (@as ?
|
21
|
+
"output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
|
22
|
+
"expected #{is.inspect} to be able to parse #{input.inspect}")
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 1
|
9
|
+
version: 1.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Kaspar Schiess
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2011-01-17 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -90,8 +90,21 @@ files:
|
|
90
90
|
- lib/parslet/pattern/binding.rb
|
91
91
|
- lib/parslet/pattern/context.rb
|
92
92
|
- lib/parslet/pattern.rb
|
93
|
+
- lib/parslet/rig/rspec.rb
|
93
94
|
- lib/parslet/transform.rb
|
94
95
|
- lib/parslet.rb
|
96
|
+
- example/documentation.rb
|
97
|
+
- example/email_parser.rb
|
98
|
+
- example/empty.rb
|
99
|
+
- example/ip_address.rb
|
100
|
+
- example/minilisp.rb
|
101
|
+
- example/parens.rb
|
102
|
+
- example/readme.rb
|
103
|
+
- example/seasons.rb
|
104
|
+
- example/simple.lit
|
105
|
+
- example/simple_xml.rb
|
106
|
+
- example/string_parser.rb
|
107
|
+
- example/test.lit
|
95
108
|
has_rdoc: true
|
96
109
|
homepage: http://kschiess.github.com/parslet
|
97
110
|
licenses: []
|