parslet 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +14 -0
- data/README +1 -3
- data/Rakefile +2 -2
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/ip_address.rb +125 -0
- data/example/minilisp.rb +101 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +36 -0
- data/example/seasons.rb +45 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +57 -0
- data/example/string_parser.rb +75 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +2 -3
- data/lib/parslet/atoms/base.rb +56 -19
- data/lib/parslet/atoms/lookahead.rb +12 -9
- data/lib/parslet/atoms/re.rb +3 -3
- data/lib/parslet/atoms/repetition.rb +15 -16
- data/lib/parslet/atoms/sequence.rb +7 -5
- data/lib/parslet/pattern.rb +2 -1
- data/lib/parslet/rig/rspec.rb +24 -0
- metadata +16 -3
data/HISTORY.txt
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
= 1.1.0 / ???
|
2
|
+
|
3
|
+
+ Uses throw/catch internally for an order of magnitude increase in execution
|
4
|
+
speed.
|
5
|
+
|
6
|
+
= 1.0.1 / 17Jan2011
|
7
|
+
|
8
|
+
A happy new year!
|
9
|
+
|
10
|
+
! FIX: Parslet::Transform was wrongly fixed earlier - it now wont mangle
|
11
|
+
hashes anymore. (Blake Sweeney)
|
12
|
+
|
13
|
+
+ parslet/rig/rspec.rb contains useful rspec matchers. (R. Konstantin Haase)
|
14
|
+
|
1
15
|
= 1.0.0 / 29Dez2010
|
2
16
|
|
3
17
|
- #each_match was removed. There was some duplication of code that even
|
data/README
CHANGED
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ spec = Gem::Specification.new do |s|
|
|
18
18
|
|
19
19
|
# Change these as appropriate
|
20
20
|
s.name = "parslet"
|
21
|
-
s.version = "1.0.
|
21
|
+
s.version = "1.0.1"
|
22
22
|
s.summary = "Parser construction library with great error reporting in Ruby."
|
23
23
|
s.author = "Kaspar Schiess"
|
24
24
|
s.email = "kaspar.schiess@absurd.li"
|
@@ -29,7 +29,7 @@ spec = Gem::Specification.new do |s|
|
|
29
29
|
s.rdoc_options = %w(--main README)
|
30
30
|
|
31
31
|
# Add any extra files to include in the gem
|
32
|
-
s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{
|
32
|
+
s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
|
33
33
|
s.require_paths = ["lib"]
|
34
34
|
|
35
35
|
# If you want to depend on other gems, add them here, along with any
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# A small example that shows a really small parser and what happens on parser
|
2
|
+
# errors.
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
class MyParser < Parslet::Parser
|
10
|
+
rule(:a) { str('a').repeat }
|
11
|
+
|
12
|
+
def parse(str)
|
13
|
+
a.parse(str)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
pp MyParser.new.parse('aaaa')
|
18
|
+
pp MyParser.new.parse('bbbb')
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Example contributed by Hal Brodigan (postmodern). Thanks!
|
4
|
+
|
5
|
+
require 'parslet'
|
6
|
+
|
7
|
+
class EmailParser < Parslet::Parser
|
8
|
+
rule(:space) { match('\s').repeat(1) }
|
9
|
+
rule(:space?) { space.maybe }
|
10
|
+
rule(:dash?) { match['_-'].maybe }
|
11
|
+
|
12
|
+
rule(:at) {
|
13
|
+
str('@') |
|
14
|
+
(dash? >> (str('at') | str('AT')) >> dash?)
|
15
|
+
}
|
16
|
+
rule(:dot) {
|
17
|
+
str('.') |
|
18
|
+
(dash? >> (str('dot') | str('DOT')) >> dash?)
|
19
|
+
}
|
20
|
+
|
21
|
+
rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
|
22
|
+
rule(:separator) { space? >> dot.as(:dot) >> space? | space }
|
23
|
+
rule(:words) { word >> (separator >> word).repeat }
|
24
|
+
|
25
|
+
rule(:email) {
|
26
|
+
(words >> space? >> at.as(:at) >> space? >> words).as(:email)
|
27
|
+
}
|
28
|
+
|
29
|
+
root(:email)
|
30
|
+
end
|
31
|
+
|
32
|
+
class EmailSanitizer < Parslet::Transform
|
33
|
+
rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
|
34
|
+
rule(:at => simple(:at)) { '@' }
|
35
|
+
rule(:word => simple(:word)) { word }
|
36
|
+
rule(:email => sequence(:email)) { email.join }
|
37
|
+
end
|
38
|
+
|
39
|
+
parser = EmailParser.new
|
40
|
+
sanitizer = EmailSanitizer.new
|
41
|
+
|
42
|
+
unless ARGV[0]
|
43
|
+
STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
|
44
|
+
exit -1
|
45
|
+
end
|
46
|
+
|
47
|
+
begin
|
48
|
+
puts sanitizer.apply(parser.parse(ARGV[0]))
|
49
|
+
rescue Parslet::ParseFailed => error
|
50
|
+
puts error
|
51
|
+
puts parser.root.error_tree
|
52
|
+
end
|
data/example/empty.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Basically just demonstrates that you can leave rules empty and get a nice
|
2
|
+
# NotImplementedError. A way to quickly spec out your parser rules?
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'parslet'
|
7
|
+
|
8
|
+
class Parser < Parslet::Parser
|
9
|
+
rule(:empty) { }
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
Parser.new.empty.parslet
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# This example is heavily inspired by citrus' ip.citrus. Have a look at both
|
2
|
+
# of these to get some choice!
|
3
|
+
|
4
|
+
# The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
|
5
|
+
# Uniform Resource Identifier (URI): Generic Syntax.
|
6
|
+
#
|
7
|
+
# See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
|
8
|
+
|
9
|
+
$:.unshift '../lib'
|
10
|
+
|
11
|
+
require 'pp'
|
12
|
+
require 'parslet'
|
13
|
+
|
14
|
+
module IPv4
|
15
|
+
include Parslet
|
16
|
+
|
17
|
+
# A host identified by an IPv4 literal address is represented in
|
18
|
+
# dotted-decimal notation (a sequence of four decimal numbers in the range 0
|
19
|
+
# to 255, separated by "."), as described in [RFC1123] by reference to
|
20
|
+
# [RFC0952]. Note that other forms of dotted notation may be interpreted on
|
21
|
+
# some platforms, as described in Section 7.4, but only the dotted-decimal
|
22
|
+
# form of four octets is allowed by this grammar.
|
23
|
+
rule(:ipv4) {
|
24
|
+
(dec_octet >> str('.') >> dec_octet >> str('.') >>
|
25
|
+
dec_octet >> str('.') >> dec_octet).as(:ipv4)
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:dec_octet) {
|
29
|
+
str('25') >> match("[0-5]") |
|
30
|
+
str('2') >> match("[0-4]") >> digit |
|
31
|
+
str('1') >> digit >> digit |
|
32
|
+
match('[1-9]') >> digit |
|
33
|
+
digit
|
34
|
+
}
|
35
|
+
|
36
|
+
rule(:digit) {
|
37
|
+
match('[0-9]')
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Must be used in concert with IPv4
|
42
|
+
module IPv6
|
43
|
+
include Parslet
|
44
|
+
|
45
|
+
rule(:colon) { str(':') }
|
46
|
+
rule(:dcolon) { colon >> colon }
|
47
|
+
|
48
|
+
# h16 :
|
49
|
+
def h16r(times)
|
50
|
+
(h16 >> colon).repeat(times, times)
|
51
|
+
end
|
52
|
+
|
53
|
+
# : h16
|
54
|
+
def h16l(times)
|
55
|
+
(colon >> h16).repeat(0,times)
|
56
|
+
end
|
57
|
+
|
58
|
+
# A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
|
59
|
+
# represented numerically in case-insensitive hexadecimal, using one to four
|
60
|
+
# hexadecimal digits (leading zeroes are permitted). The eight encoded
|
61
|
+
# pieces are given most-significant first, separated by colon characters.
|
62
|
+
# Optionally, the least-significant two pieces may instead be represented in
|
63
|
+
# IPv4 address textual format. A sequence of one or more consecutive
|
64
|
+
# zero-valued 16-bit pieces within the address may be elided, omitting all
|
65
|
+
# their digits and leaving exactly two consecutive colons in their place to
|
66
|
+
# mark the elision.
|
67
|
+
rule(:ipv6) {
|
68
|
+
(
|
69
|
+
(
|
70
|
+
h16r(6) |
|
71
|
+
dcolon >> h16r(5) |
|
72
|
+
h16.maybe >> dcolon >> h16r(4) |
|
73
|
+
(h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
|
74
|
+
(h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
|
75
|
+
(h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
|
76
|
+
(h16 >> h16l(4)).maybe >> dcolon
|
77
|
+
) >> ls32 |
|
78
|
+
(h16 >> h16l(5)).maybe >> dcolon >> h16 |
|
79
|
+
(h16 >> h16l(6)).maybe >> dcolon
|
80
|
+
).as(:ipv6)
|
81
|
+
}
|
82
|
+
|
83
|
+
rule(:h16) {
|
84
|
+
hexdigit.repeat(1,4)
|
85
|
+
}
|
86
|
+
|
87
|
+
rule(:ls32) {
|
88
|
+
(h16 >> colon >> h16) |
|
89
|
+
ipv4
|
90
|
+
}
|
91
|
+
|
92
|
+
rule(:hexdigit) {
|
93
|
+
digit | match("[a-fA-F]")
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
class Parser
|
98
|
+
include IPv4
|
99
|
+
include IPv6
|
100
|
+
|
101
|
+
def parse(str)
|
102
|
+
(ipv4 | ipv6).parse(str)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
%W(
|
107
|
+
0.0.0.0
|
108
|
+
255.255.255.255
|
109
|
+
255.255.255
|
110
|
+
1:2:3:4:5:6:7:8
|
111
|
+
12AD:34FC:A453:1922::
|
112
|
+
12AD::34FC
|
113
|
+
12AD::
|
114
|
+
::
|
115
|
+
1:2
|
116
|
+
).each do |address|
|
117
|
+
parser = Parser.new
|
118
|
+
printf "%30s -> ", address
|
119
|
+
begin
|
120
|
+
result = parser.parse(address)
|
121
|
+
puts result.inspect
|
122
|
+
rescue Parslet::ParseFailed => m
|
123
|
+
puts "Failed: #{m}"
|
124
|
+
end
|
125
|
+
end
|
data/example/minilisp.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# Reproduces [1] using parslet.
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
module MiniLisp
|
10
|
+
class Parser < Parslet::Parser
|
11
|
+
root :expression
|
12
|
+
rule(:expression) {
|
13
|
+
space? >> str('(') >> space? >> body >> str(')')
|
14
|
+
}
|
15
|
+
|
16
|
+
rule(:body) {
|
17
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
18
|
+
}
|
19
|
+
|
20
|
+
rule(:space) {
|
21
|
+
match('\s').repeat(1)
|
22
|
+
}
|
23
|
+
rule(:space?) {
|
24
|
+
space.maybe
|
25
|
+
}
|
26
|
+
|
27
|
+
rule(:identifier) {
|
28
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
29
|
+
}
|
30
|
+
|
31
|
+
rule(:float) {
|
32
|
+
(
|
33
|
+
integer >> (
|
34
|
+
str('.') >> match('[0-9]').repeat(1) |
|
35
|
+
str('e') >> match('[0-9]').repeat(1)
|
36
|
+
).as(:e)
|
37
|
+
).as(:float) >> space?
|
38
|
+
}
|
39
|
+
|
40
|
+
rule(:integer) {
|
41
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:string) {
|
45
|
+
str('"') >> (
|
46
|
+
str('\\') >> any |
|
47
|
+
str('"').absnt? >> any
|
48
|
+
).repeat.as(:string) >> str('"') >> space?
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
class Transform
|
53
|
+
include Parslet
|
54
|
+
|
55
|
+
attr_reader :t
|
56
|
+
def initialize
|
57
|
+
@t = Parslet::Transform.new
|
58
|
+
|
59
|
+
# To understand these, take a look at what comes out of the parser.
|
60
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
61
|
+
|
62
|
+
t.rule(:string => simple(:str)) { str }
|
63
|
+
|
64
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
65
|
+
|
66
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
67
|
+
|
68
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
69
|
+
end
|
70
|
+
|
71
|
+
def do(tree)
|
72
|
+
t.apply(tree)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
parser = MiniLisp::Parser.new
|
78
|
+
transform = MiniLisp::Transform.new
|
79
|
+
|
80
|
+
# Parse stage
|
81
|
+
begin
|
82
|
+
result = parser.parse %Q{
|
83
|
+
(define test (lambda ()
|
84
|
+
(begin
|
85
|
+
(display "something")
|
86
|
+
(display 1)
|
87
|
+
(display 3.08))))
|
88
|
+
(test)
|
89
|
+
}
|
90
|
+
rescue Parslet::ParseFailed => failure
|
91
|
+
puts failure
|
92
|
+
puts parser.root.error_tree if parser.root.cause
|
93
|
+
exit
|
94
|
+
end
|
95
|
+
|
96
|
+
# Transform the result
|
97
|
+
pp transform.do(result)
|
98
|
+
|
99
|
+
# Thereby reducing it to the earlier problem:
|
100
|
+
# http://github.com/kschiess/toylisp
|
101
|
+
|
data/example/parens.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# A small example that demonstrates the power of tree pattern matching. Also
|
2
|
+
# uses '.as(:name)' to construct a tree that can reliably be matched
|
3
|
+
# afterwards.
|
4
|
+
|
5
|
+
$:.unshift '../lib'
|
6
|
+
|
7
|
+
require 'pp'
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
module LISP # as in 'lots of insipid and stupid parenthesis'
|
11
|
+
class Parser < Parslet::Parser
|
12
|
+
rule(:balanced) {
|
13
|
+
str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
14
|
+
}
|
15
|
+
|
16
|
+
root(:balanced)
|
17
|
+
end
|
18
|
+
|
19
|
+
class Transform < Parslet::Transform
|
20
|
+
rule(:l => '(', :m => simple(:x), :r => ')') {
|
21
|
+
# innermost :m will contain nil
|
22
|
+
x.nil? ? 1 : x+1
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
parser = LISP::Parser.new
|
28
|
+
transform = LISP::Transform.new
|
29
|
+
%w!
|
30
|
+
()
|
31
|
+
(())
|
32
|
+
((((()))))
|
33
|
+
((())
|
34
|
+
!.each do |pexp|
|
35
|
+
begin
|
36
|
+
result = parser.parse(pexp)
|
37
|
+
puts "#{"%20s"%pexp}: #{result.inspect} (#{transform.apply(result)} parens)"
|
38
|
+
rescue Parslet::ParseFailed => m
|
39
|
+
puts "#{"%20s"%pexp}: #{m}"
|
40
|
+
end
|
41
|
+
puts
|
42
|
+
end
|
data/example/readme.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# The example from the readme. With this, I am making sure that the readme
|
2
|
+
# 'works'. Is this too messy?
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
include Parslet
|
9
|
+
|
10
|
+
require 'parslet'
|
11
|
+
include Parslet
|
12
|
+
|
13
|
+
# Constructs a parser using a Parser Expression Grammar like DSL:
|
14
|
+
parser = str('"') >>
|
15
|
+
(
|
16
|
+
str('\\') >> any |
|
17
|
+
str('"').absnt? >> any
|
18
|
+
).repeat.as(:string) >>
|
19
|
+
str('"')
|
20
|
+
|
21
|
+
# Parse the string and capture parts of the interpretation (:string above)
|
22
|
+
tree = parser.parse(%Q{
|
23
|
+
"This is a \\"String\\" in which you can escape stuff"
|
24
|
+
}.strip)
|
25
|
+
|
26
|
+
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
27
|
+
|
28
|
+
# Here's how you can grab results from that tree:
|
29
|
+
|
30
|
+
# 1)
|
31
|
+
transform = Parslet::Transform.new do
|
32
|
+
rule(:string => simple(:x)) {
|
33
|
+
puts "String contents: #{x}" }
|
34
|
+
end
|
35
|
+
transform.apply(tree)
|
36
|
+
|
data/example/seasons.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
$:.unshift '../lib/'
|
2
|
+
require 'parslet'
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
tree = {:bud => {:stem => []}}
|
6
|
+
|
7
|
+
class Spring < Parslet::Transform
|
8
|
+
rule(:stem => sequence(:branches)) {
|
9
|
+
{:stem => (branches + [{:branch => :leaf}])}
|
10
|
+
}
|
11
|
+
end
|
12
|
+
class Summer < Parslet::Transform
|
13
|
+
rule(:stem => subtree(:branches)) {
|
14
|
+
new_branches = branches.map { |b| {:branch => [:leaf, :flower]} }
|
15
|
+
{:stem => new_branches}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
class Fall < Parslet::Transform
|
19
|
+
rule(:branch => sequence(:x)) {
|
20
|
+
x.each { |e| puts "Fruit!" if e==:flower }
|
21
|
+
x.each { |e| puts "Falling Leaves!" if e==:leaf }
|
22
|
+
{:branch => []}
|
23
|
+
}
|
24
|
+
end
|
25
|
+
class Winter < Parslet::Transform
|
26
|
+
rule(:stem => subtree(:x)) {
|
27
|
+
{:stem => []}
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def do_seasons(tree)
|
32
|
+
[Spring, Summer, Fall, Winter].each do |season|
|
33
|
+
p "And when #{season} comes"
|
34
|
+
tree = season.new.apply(tree)
|
35
|
+
pp tree
|
36
|
+
puts
|
37
|
+
end
|
38
|
+
tree
|
39
|
+
end
|
40
|
+
|
41
|
+
# What marvel of life!
|
42
|
+
tree = do_seasons(tree)
|
43
|
+
tree = do_seasons(tree)
|
44
|
+
|
45
|
+
|
data/example/simple.lit
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# A simple xml parser. It is simple in the respect as that it doesn't address
|
2
|
+
# any of the complexities of XML. This is ruby 1.9.
|
3
|
+
|
4
|
+
$:.unshift '../lib'
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
|
9
|
+
module XML
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
root :document
|
13
|
+
|
14
|
+
rule(:document) {
|
15
|
+
tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
|
16
|
+
text
|
17
|
+
}
|
18
|
+
|
19
|
+
# Perhaps we could have some syntax sugar to make this more easy?
|
20
|
+
#
|
21
|
+
def tag(opts={})
|
22
|
+
close = opts[:close] || false
|
23
|
+
|
24
|
+
parslet = str('<')
|
25
|
+
parslet = parslet >> str('/') if close
|
26
|
+
parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
|
27
|
+
parslet = parslet >> str('>')
|
28
|
+
|
29
|
+
parslet
|
30
|
+
end
|
31
|
+
|
32
|
+
rule(:text) {
|
33
|
+
match('[^<>]').repeat(0)
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def check(xml)
|
38
|
+
include XML
|
39
|
+
r=parse(xml)
|
40
|
+
|
41
|
+
# We'll validate the tree by reducing valid pairs of tags into simply the
|
42
|
+
# string "verified". If the transformation ends on a string, then the
|
43
|
+
# document was 'valid'.
|
44
|
+
#
|
45
|
+
t = Parslet::Transform.new do
|
46
|
+
rule(
|
47
|
+
o: {name: simple(:tag)},
|
48
|
+
c: {name: simple(:tag)},
|
49
|
+
i: simple(:t)
|
50
|
+
) { 'verified' }
|
51
|
+
end
|
52
|
+
|
53
|
+
t.apply(r)
|
54
|
+
end
|
55
|
+
|
56
|
+
pp check("<a><b>some text in the tags</b></a>")
|
57
|
+
pp check("<b><b>some text in the tags</b></a>")
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# A more complex parser that illustrates how a compiler might be constructed.
|
2
|
+
# The parser recognizes strings and integer literals and constructs almost a
|
3
|
+
# useful AST from the file contents.
|
4
|
+
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
$:.unshift '../lib/'
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
class LiteralsParser < Parslet::Parser
|
13
|
+
rule :space do
|
14
|
+
(match '[ ]').repeat(1)
|
15
|
+
end
|
16
|
+
|
17
|
+
rule :literals do
|
18
|
+
(literal >> eol).repeat
|
19
|
+
end
|
20
|
+
|
21
|
+
rule :literal do
|
22
|
+
(integer | string).as(:literal) >> space.maybe
|
23
|
+
end
|
24
|
+
|
25
|
+
rule :string do
|
26
|
+
str('"') >>
|
27
|
+
(
|
28
|
+
(str('\\') >> any) |
|
29
|
+
(str('"').absnt? >> any)
|
30
|
+
).repeat.as(:string) >>
|
31
|
+
str('"')
|
32
|
+
end
|
33
|
+
|
34
|
+
rule :integer do
|
35
|
+
match('[0-9]').repeat(1).as(:integer)
|
36
|
+
end
|
37
|
+
|
38
|
+
rule :eol do
|
39
|
+
line_end.repeat(1)
|
40
|
+
end
|
41
|
+
|
42
|
+
rule :line_end do
|
43
|
+
crlf >> space.maybe
|
44
|
+
end
|
45
|
+
|
46
|
+
rule :crlf do
|
47
|
+
match('[\r\n]').repeat(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
root :literals
|
51
|
+
end
|
52
|
+
|
53
|
+
parsetree = LiteralsParser.new.parse(
|
54
|
+
File.read('simple.lit'))
|
55
|
+
|
56
|
+
class Lit < Struct.new(:text)
|
57
|
+
def to_s
|
58
|
+
text.inspect
|
59
|
+
end
|
60
|
+
end
|
61
|
+
class StringLit < Lit
|
62
|
+
end
|
63
|
+
class IntLit < Lit
|
64
|
+
def to_s
|
65
|
+
text
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
transform = Parslet::Transform.new do
|
70
|
+
rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
|
71
|
+
rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
|
72
|
+
end
|
73
|
+
|
74
|
+
ast = transform.apply(parsetree)
|
75
|
+
pp ast
|
data/example/test.lit
ADDED
data/lib/parslet.rb
CHANGED
@@ -30,10 +30,9 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
|
30
30
|
|
31
31
|
def try(io) # :nodoc:
|
32
32
|
alternatives.each { |a|
|
33
|
-
|
33
|
+
catch(:error) {
|
34
34
|
return a.apply(io)
|
35
|
-
|
36
|
-
end
|
35
|
+
}
|
37
36
|
}
|
38
37
|
# If we reach this point, all alternatives have failed.
|
39
38
|
error(io, "Expected one of #{alternatives.inspect}.")
|
data/lib/parslet/atoms/base.rb
CHANGED
@@ -13,7 +13,18 @@ class Parslet::Atoms::Base
|
|
13
13
|
io = StringIO.new(io)
|
14
14
|
end
|
15
15
|
|
16
|
-
result =
|
16
|
+
result = nil
|
17
|
+
error_message_or_success = catch(:error) {
|
18
|
+
result = apply(io)
|
19
|
+
:success
|
20
|
+
}
|
21
|
+
|
22
|
+
# If we didn't succeed the parse, raise an exception for the user.
|
23
|
+
# Stack trace will be off, but the error tree should explain the reason
|
24
|
+
# it failed.
|
25
|
+
if error_message_or_success != :success
|
26
|
+
raise Parslet::ParseFailed, error_message_or_success
|
27
|
+
end
|
17
28
|
|
18
29
|
# If we haven't consumed the input, then the pattern doesn't match. Try
|
19
30
|
# to provide a good error message (even asking down below)
|
@@ -22,30 +33,49 @@ class Parslet::Atoms::Base
|
|
22
33
|
# error to fail with. Otherwise just report that we cannot consume the
|
23
34
|
# input.
|
24
35
|
if cause
|
25
|
-
|
36
|
+
# Don't garnish the real cause; but the exception is different anyway.
|
37
|
+
raise Parslet::ParseFailed,
|
38
|
+
"Unconsumed input, maybe because of this: #{cause}"
|
26
39
|
else
|
27
|
-
|
40
|
+
parse_failed(
|
41
|
+
format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
|
28
42
|
end
|
29
43
|
end
|
30
44
|
|
31
45
|
return flatten(result)
|
32
46
|
end
|
33
47
|
|
48
|
+
#---
|
49
|
+
# Calls the #try method of this parslet. In case of a parse error, apply
|
50
|
+
# leaves the io in the state it was before the attempt.
|
51
|
+
#+++
|
34
52
|
def apply(io) # :nodoc:
|
35
53
|
# p [:start, self, io.string[io.pos, 10]]
|
36
54
|
|
37
55
|
old_pos = io.pos
|
38
56
|
|
39
57
|
# p [:try, self, io.string[io.pos, 20]]
|
40
|
-
|
58
|
+
message = catch(:error) {
|
41
59
|
r = try(io)
|
42
60
|
# p [:return_from, self, r, flatten(r)]
|
61
|
+
|
62
|
+
# This has just succeeded, so last_cause must be empty
|
43
63
|
@last_cause = nil
|
44
64
|
return r
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
65
|
+
}
|
66
|
+
|
67
|
+
# We only reach this point if the parse has failed. message is not nil.
|
68
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
69
|
+
|
70
|
+
io.pos = old_pos
|
71
|
+
throw :error, message
|
72
|
+
end
|
73
|
+
|
74
|
+
# Override this in your Atoms::Base subclasses to implement parsing
|
75
|
+
# behaviour.
|
76
|
+
#
|
77
|
+
def try(io)
|
78
|
+
raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
|
49
79
|
end
|
50
80
|
|
51
81
|
# Construct a new atom that repeats the current atom min times at least and
|
@@ -126,6 +156,9 @@ class Parslet::Atoms::Base
|
|
126
156
|
Parslet::Atoms::Named.new(self, name)
|
127
157
|
end
|
128
158
|
|
159
|
+
# Takes a mixed value coming out of a parslet and converts it to a return
|
160
|
+
# value for the user by dropping things and merging hashes.
|
161
|
+
#
|
129
162
|
def flatten(value) # :nodoc:
|
130
163
|
# Passes through everything that isn't an array of things
|
131
164
|
return value unless value.instance_of? Array
|
@@ -225,31 +258,35 @@ class Parslet::Atoms::Base
|
|
225
258
|
# with #cause.
|
226
259
|
#
|
227
260
|
def error_tree
|
228
|
-
Parslet::ErrorTree.new(self)
|
261
|
+
Parslet::ErrorTree.new(self)
|
229
262
|
end
|
230
263
|
def cause? # :nodoc:
|
231
264
|
not @last_cause.nil?
|
232
265
|
end
|
233
266
|
private
|
267
|
+
# TODO comments!!!
|
234
268
|
# Report/raise a parse error with the given message, printing the current
|
235
269
|
# position as well. Appends 'at line X char Y.' to the message you give.
|
236
270
|
# If +pos+ is given, it is used as the real position the error happened,
|
237
271
|
# correcting the io's current position.
|
238
272
|
#
|
239
273
|
def error(io, str, pos=nil)
|
274
|
+
@last_cause = format_cause(io, str, pos)
|
275
|
+
throw :error, @last_cause
|
276
|
+
end
|
277
|
+
def parse_failed(str)
|
278
|
+
@last_cause = str
|
279
|
+
raise Parslet::ParseFailed,
|
280
|
+
@last_cause
|
281
|
+
end
|
282
|
+
def format_cause(io, str, pos=nil)
|
240
283
|
pre = io.string[0..(pos||io.pos)]
|
241
284
|
lines = Array(pre.lines)
|
242
285
|
|
243
|
-
if lines.empty?
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
248
|
-
end
|
249
|
-
|
250
|
-
@last_cause = formatted_cause
|
251
|
-
|
252
|
-
raise Parslet::ParseFailed, formatted_cause, nil
|
286
|
+
return str if lines.empty?
|
287
|
+
|
288
|
+
pos = lines.last.length
|
289
|
+
return "#{str} at line #{lines.count} char #{pos}."
|
253
290
|
end
|
254
291
|
def warn_about_duplicate_keys(h1, h2)
|
255
292
|
d = h1.keys & h2.keys
|
@@ -16,27 +16,30 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
|
16
16
|
|
17
17
|
def try(io) # :nodoc:
|
18
18
|
pos = io.pos
|
19
|
-
|
19
|
+
|
20
|
+
failed = true
|
21
|
+
catch(:error) {
|
20
22
|
bound_parslet.apply(io)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
failed = false
|
24
|
+
}
|
25
|
+
return failed ? fail(io) : success(io)
|
26
|
+
|
27
|
+
ensure
|
28
|
+
io.pos = pos
|
27
29
|
end
|
28
30
|
|
31
|
+
# TODO Both of these will produce results that could be reduced easily.
|
32
|
+
# Maybe do some shortcut reducing here?
|
29
33
|
def fail(io) # :nodoc:
|
30
34
|
if positive
|
31
35
|
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
32
36
|
else
|
33
|
-
# TODO: Squash this down to nothing? Return value handling here...
|
34
37
|
return nil
|
35
38
|
end
|
36
39
|
end
|
37
40
|
def success(io) # :nodoc:
|
38
41
|
if positive
|
39
|
-
return nil
|
42
|
+
return nil
|
40
43
|
else
|
41
44
|
error(
|
42
45
|
io,
|
data/lib/parslet/atoms/re.rb
CHANGED
@@ -8,16 +8,16 @@
|
|
8
8
|
# match('\s') # like regexps: matches space characters
|
9
9
|
#
|
10
10
|
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
|
-
attr_reader :match
|
11
|
+
attr_reader :match, :re
|
12
12
|
def initialize(match) # :nodoc:
|
13
13
|
@match = match
|
14
|
+
@re = Regexp.new(match, Regexp::MULTILINE)
|
14
15
|
end
|
15
16
|
|
16
17
|
def try(io) # :nodoc:
|
17
|
-
r = Regexp.new(match, Regexp::MULTILINE)
|
18
18
|
s = io.read(1)
|
19
19
|
error(io, "Premature end of input") unless s
|
20
|
-
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(
|
20
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
|
21
21
|
return s
|
22
22
|
end
|
23
23
|
|
@@ -17,22 +17,21 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
|
17
17
|
def try(io) # :nodoc:
|
18
18
|
occ = 0
|
19
19
|
result = [@tag] # initialize the result array with the tag (for flattening)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
end
|
20
|
+
catch(:error) {
|
21
|
+
result << parslet.apply(io)
|
22
|
+
occ += 1
|
23
|
+
|
24
|
+
# If we're not greedy (max is defined), check if that has been
|
25
|
+
# reached.
|
26
|
+
return result if max && occ>=max
|
27
|
+
redo
|
28
|
+
}
|
29
|
+
|
30
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
31
|
+
# contain the number of sucesses) is in {min, max}.
|
32
|
+
# p [:repetition, occ, min, max]
|
33
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
34
|
+
return result
|
36
35
|
end
|
37
36
|
|
38
37
|
precedence REPETITION
|
@@ -16,12 +16,14 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def try(io) # :nodoc:
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
catch(:error) {
|
20
|
+
return [:sequence]+parslets.map { |p|
|
21
|
+
# Save each parslet as potentially offending (raising an error).
|
22
|
+
@offending_parslet = p
|
23
|
+
p.apply(io)
|
24
|
+
}
|
23
25
|
}
|
24
|
-
|
26
|
+
|
25
27
|
error(io, "Failed to match sequence (#{self.inspect})")
|
26
28
|
end
|
27
29
|
|
data/lib/parslet/pattern.rb
CHANGED
@@ -99,7 +99,8 @@ class Parslet::Pattern
|
|
99
99
|
end
|
100
100
|
|
101
101
|
def element_match_hash(tree, exp, bindings)
|
102
|
-
#
|
102
|
+
# Early failure when not all of the hash keys are matched.
|
103
|
+
return false unless exp.keys == tree.keys
|
103
104
|
|
104
105
|
# We iterate over expected pattern, since we demand that the keys that
|
105
106
|
# are there should be in tree as well.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
RSpec::Matchers.define(:parse) do |input|
|
2
|
+
chain(:as) { |as| @as = as }
|
3
|
+
|
4
|
+
match do |parser|
|
5
|
+
begin
|
6
|
+
@result = parser.parse(input)
|
7
|
+
@as == @result or @as.nil?
|
8
|
+
rescue Parslet::ParseFailed
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
failure_message_for_should do |is|
|
14
|
+
"expected " << (@result ?
|
15
|
+
"output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
|
16
|
+
"expected #{is.inspect} to be able to parse #{input.inspect}")
|
17
|
+
end
|
18
|
+
|
19
|
+
failure_message_for_should_not do |is|
|
20
|
+
"expected " << (@as ?
|
21
|
+
"output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
|
22
|
+
"expected #{is.inspect} to be able to parse #{input.inspect}")
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 1
|
9
|
+
version: 1.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Kaspar Schiess
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2011-01-17 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -90,8 +90,21 @@ files:
|
|
90
90
|
- lib/parslet/pattern/binding.rb
|
91
91
|
- lib/parslet/pattern/context.rb
|
92
92
|
- lib/parslet/pattern.rb
|
93
|
+
- lib/parslet/rig/rspec.rb
|
93
94
|
- lib/parslet/transform.rb
|
94
95
|
- lib/parslet.rb
|
96
|
+
- example/documentation.rb
|
97
|
+
- example/email_parser.rb
|
98
|
+
- example/empty.rb
|
99
|
+
- example/ip_address.rb
|
100
|
+
- example/minilisp.rb
|
101
|
+
- example/parens.rb
|
102
|
+
- example/readme.rb
|
103
|
+
- example/seasons.rb
|
104
|
+
- example/simple.lit
|
105
|
+
- example/simple_xml.rb
|
106
|
+
- example/string_parser.rb
|
107
|
+
- example/test.lit
|
95
108
|
has_rdoc: true
|
96
109
|
homepage: http://kschiess.github.com/parslet
|
97
110
|
licenses: []
|