parslet 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/HISTORY.txt
CHANGED
@@ -1,3 +1,32 @@
|
|
1
|
+
= 2.0 / ?? (future release changes, like a reminder to self)
|
2
|
+
|
3
|
+
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
|
+
the win.
|
5
|
+
|
6
|
+
= 1.2.0 / ???
|
7
|
+
|
8
|
+
+ Parslet::Parser is now also a grammar atom, it can be composed freely with
|
9
|
+
other atoms. (str('f') >> MiniLispParser.new >> str('b'))
|
10
|
+
|
11
|
+
+ No strings, only slices are returned as part of the parser result.
|
12
|
+
Parslet::Slice is almost a string class, but one that remembers the
|
13
|
+
source offset. This has also bought us a slight speedup.
|
14
|
+
|
15
|
+
+ require 'parslet/convenience' now brings #parse_with_debug to all parslets.
|
16
|
+
This is a consequence of the above change.
|
17
|
+
|
18
|
+
+ Deprecates prsnt? and absnt? in favor of the more readable absent? and
|
19
|
+
prsnt?. Uses 3 bytes more RAM. The old variants will exist until we release
|
20
|
+
2.0.
|
21
|
+
|
22
|
+
INTERNALLY
|
23
|
+
|
24
|
+
+ Visitors now should have methods that all begin with 'visit_*'. #str
|
25
|
+
becomes #visit_str.
|
26
|
+
|
27
|
+
+ Parslet::Atoms::Entity now takes only a block argument instead of context
|
28
|
+
and block.
|
29
|
+
|
1
30
|
= 1.1.1 / 4Feb2011
|
2
31
|
|
3
32
|
! FIX: Line counting was broken by performance optimisations.
|
data/README
CHANGED
@@ -27,9 +27,7 @@ SYNOPSIS
|
|
27
27
|
str('"')
|
28
28
|
|
29
29
|
# Parse the string and capture parts of the interpretation (:string above)
|
30
|
-
tree = parser.parse(
|
31
|
-
"This is a \\"String\\" in which you can escape stuff"
|
32
|
-
}.strip)
|
30
|
+
tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
|
33
31
|
|
34
32
|
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
35
33
|
|
@@ -37,7 +35,7 @@ SYNOPSIS
|
|
37
35
|
|
38
36
|
transform = Parslet::Transform.new do
|
39
37
|
rule(:string => simple(:x)) {
|
40
|
-
puts "String contents
|
38
|
+
puts "String contents: #{x}" }
|
41
39
|
end
|
42
40
|
transform.apply(tree)
|
43
41
|
|
data/Rakefile
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
-
|
2
1
|
require "rubygems"
|
3
2
|
require "rake/rdoctask"
|
4
3
|
require 'rspec/core/rake_task'
|
5
4
|
require "rake/gempackagetask"
|
6
5
|
|
7
|
-
|
8
|
-
desc "Run all examples"
|
6
|
+
desc "Run all tests: Exhaustive."
|
9
7
|
RSpec::Core::RakeTask.new
|
10
8
|
|
9
|
+
namespace :spec do
|
10
|
+
desc "Only run unit tests: Fast. "
|
11
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
12
|
+
task.pattern = "spec/parslet/**/*_spec.rb"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
11
16
|
task :default => :spec
|
12
17
|
|
13
18
|
require 'sdoc'
|
@@ -27,10 +32,19 @@ desc 'Clear out RDoc'
|
|
27
32
|
task :clean => [:clobber_rdoc, :clobber_package]
|
28
33
|
|
29
34
|
# This task actually builds the gem.
|
35
|
+
task :gem => :spec
|
30
36
|
spec = eval(File.read('parslet.gemspec'))
|
37
|
+
|
31
38
|
desc "Generate the gem package."
|
32
39
|
Rake::GemPackageTask.new(spec) do |pkg|
|
33
40
|
pkg.gem_spec = spec
|
34
41
|
end
|
35
42
|
|
36
|
-
|
43
|
+
desc "Prints LOC stats"
|
44
|
+
task :stat do
|
45
|
+
%w(lib spec example).each do |dir|
|
46
|
+
loc = %x(find #{dir} -name "*.rb" | xargs wc -l | grep 'total').split.first.to_i
|
47
|
+
printf("%20s %d\n", dir, loc)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
data/example/comments.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
# A small example on how to parse common types of comments. The example
|
2
2
|
# started out with parser code from Stephen Waits.
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
8
8
|
require 'parslet/convenience'
|
9
9
|
|
10
10
|
class ALanguage < Parslet::Parser
|
11
|
-
root(:
|
11
|
+
root(:lines)
|
12
12
|
|
13
|
-
rule(:
|
14
|
-
rule(:line) {
|
15
|
-
rule(:
|
13
|
+
rule(:lines) { line.repeat }
|
14
|
+
rule(:line) { spaces >> expression.repeat >> newline }
|
15
|
+
rule(:newline) { str("\n") >> str("\r").maybe }
|
16
16
|
|
17
|
-
rule(:
|
17
|
+
rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
|
18
18
|
|
19
|
-
rule(:
|
20
|
-
rule(:space) { multiline_comment
|
21
|
-
|
22
|
-
rule(:line_comment) { str('//') >> (
|
23
|
-
rule(:multiline_comment) { str('/*') >> (str('*/').
|
19
|
+
rule(:spaces) { space.repeat }
|
20
|
+
rule(:space) { multiline_comment | line_comment | str(' ') }
|
21
|
+
|
22
|
+
rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
|
23
|
+
rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
|
24
24
|
end
|
25
25
|
|
26
26
|
code = %q(
|
@@ -33,5 +33,3 @@ code = %q(
|
|
33
33
|
)
|
34
34
|
|
35
35
|
pp ALanguage.new.parse_with_debug(code)
|
36
|
-
|
37
|
-
|
data/example/documentation.rb
CHANGED
data/example/email_parser.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Example contributed by Hal Brodigan (postmodern). Thanks!
|
4
4
|
|
5
|
-
$:.unshift
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
6
|
require 'parslet'
|
7
7
|
|
8
8
|
class EmailParser < Parslet::Parser
|
@@ -35,7 +35,7 @@ class EmailSanitizer < Parslet::Transform
|
|
35
35
|
rule(:word => simple(:word)) { word }
|
36
36
|
|
37
37
|
rule(:username => sequence(:username)) { username.join + "@" }
|
38
|
-
rule(:username => simple(:username)) { username + "@" }
|
38
|
+
rule(:username => simple(:username)) { username.to_s + "@" }
|
39
39
|
|
40
40
|
rule(:email => sequence(:email)) { email.join }
|
41
41
|
end
|
@@ -45,12 +45,12 @@ sanitizer = EmailSanitizer.new
|
|
45
45
|
|
46
46
|
unless ARGV[0]
|
47
47
|
STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
|
48
|
-
|
48
|
+
STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
|
49
49
|
end
|
50
50
|
|
51
51
|
begin
|
52
|
-
p sanitizer.apply(parser.parse(ARGV[0]))
|
52
|
+
p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
|
53
53
|
rescue Parslet::ParseFailed => error
|
54
54
|
puts error
|
55
|
-
puts parser.
|
55
|
+
puts parser.error_tree
|
56
56
|
end
|
data/example/empty.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Basically just demonstrates that you can leave rules empty and get a nice
|
2
2
|
# NotImplementedError. A way to quickly spec out your parser rules?
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'parslet'
|
7
7
|
|
@@ -10,4 +10,4 @@ class Parser < Parslet::Parser
|
|
10
10
|
end
|
11
11
|
|
12
12
|
|
13
|
-
Parser.new.empty.parslet
|
13
|
+
Parser.new.empty.parslet
|
data/example/erb.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
+
# Example that demonstrates how a simple erb-like parser could be constructed.
|
2
|
+
|
1
3
|
$:.unshift File.dirname(__FILE__) + "/../lib"
|
4
|
+
|
2
5
|
require 'parslet'
|
3
6
|
|
4
7
|
class ErbParser < Parslet::Parser
|
5
|
-
rule(:ruby) { (str('%>').
|
8
|
+
rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
|
6
9
|
|
7
10
|
rule(:expression) { (str('=') >> ruby).as(:expression) }
|
8
11
|
rule(:comment) { (str('#') >> ruby).as(:comment) }
|
@@ -10,7 +13,7 @@ class ErbParser < Parslet::Parser
|
|
10
13
|
rule(:erb) { expression | comment | code }
|
11
14
|
|
12
15
|
rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
|
13
|
-
rule(:text) { (str('<%').
|
16
|
+
rule(:text) { (str('<%').absent? >> any).repeat(1) }
|
14
17
|
|
15
18
|
rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
|
16
19
|
root(:text_with_ruby)
|
@@ -41,4 +44,4 @@ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a be
|
|
41
44
|
The <%= 'nicely' %> printed result.
|
42
45
|
The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
|
43
46
|
ERB
|
44
|
-
))
|
47
|
+
))
|
data/example/ip_address.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
#
|
7
7
|
# See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
|
8
8
|
|
9
|
-
$:.unshift
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
10
|
|
11
11
|
require 'pp'
|
12
12
|
require 'parslet'
|
@@ -122,4 +122,4 @@ end
|
|
122
122
|
rescue Parslet::ParseFailed => m
|
123
123
|
puts "Failed: #{m}"
|
124
124
|
end
|
125
|
-
end
|
125
|
+
end
|
data/example/local.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
# An exploration of two ideas:
|
3
|
+
# a) Constructing a whole parser inline, without the artificial class around
|
4
|
+
# it.
|
5
|
+
# and:
|
6
|
+
# b) Constructing non-greedy or non-blind parsers by transforming the
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
|
+
|
11
|
+
require 'parslet'
|
12
|
+
include Parslet
|
13
|
+
|
14
|
+
a = str('a').repeat >> str('aa')
|
15
|
+
|
16
|
+
# E1% E2
|
17
|
+
#
|
18
|
+
# S = E2 | E1 S
|
19
|
+
|
20
|
+
def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
|
21
|
+
def epsilon; any.absent? end
|
22
|
+
|
23
|
+
# Traditional repetition will try as long as the pattern can be matched and
|
24
|
+
# then give up. This is greedy and blind.
|
25
|
+
a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
|
26
|
+
|
27
|
+
# Here's a pattern match that is greedy and non-blind. The first pattern
|
28
|
+
# 'a'* will be tried as many times as possible, while still matching the
|
29
|
+
# end pattern 'aa'.
|
30
|
+
b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
|
31
|
+
|
32
|
+
p a.parse('aaaa')
|
33
|
+
p b
|
34
|
+
p b.parse('aaaa')
|
data/example/minilisp.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Reproduces [1] using parslet.
|
2
2
|
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
@@ -45,7 +45,7 @@ module MiniLisp
|
|
45
45
|
rule(:string) {
|
46
46
|
str('"') >> (
|
47
47
|
str('\\') >> any |
|
48
|
-
str('"').
|
48
|
+
str('"').absent? >> any
|
49
49
|
).repeat.as(:string) >> str('"') >> space?
|
50
50
|
}
|
51
51
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
[{:exp=>{:a=>"a"@3}},
|
2
|
+
{:line=>"// line comment"@7},
|
3
|
+
{:exp=>{:a=>"a"@25}},
|
4
|
+
{:exp=>{:a=>"a"@27}},
|
5
|
+
{:exp=>[{:a=>"a"@29}, {:line=>"// line comment"@31}]},
|
6
|
+
{:exp=>[{:a=>"a"@49}, {:multi=>"/* inline comment */"@51}]},
|
7
|
+
{:exp=>{:a=>"a"@72}},
|
8
|
+
{:multi=>"/* multiline\n comment */"@77}]
|
@@ -0,0 +1,4 @@
|
|
1
|
+
/Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:326:in `parse_failed': Don't know what to do with bbbb at line 1 char 1. (Parslet::ParseFailed)
|
2
|
+
from /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:55:in `parse'
|
3
|
+
from example/documentation.rb:13:in `parse'
|
4
|
+
from example/documentation.rb:18:in `<main>'
|
@@ -0,0 +1 @@
|
|
1
|
+
"aaaa"@0
|
@@ -0,0 +1 @@
|
|
1
|
+
example/empty.rb:13:in `<main>': rule(:empty) { ... } returns nil. Still not implemented, but already used? (NotImplementedError)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{:text=>[{:text=>"The value of x is "@0}, {:expression=>{:ruby=>" x "@21}}, {:text=>"."@26}]}
|
2
|
+
{:text=>[{:code=>{:ruby=>" 1 + 2 "@2}}]}
|
3
|
+
{:text=>[{:comment=>{:ruby=>" commented "@3}}]}
|
4
|
+
The not printed result of "a = 2".
|
5
|
+
The not printed non-evaluated comment "a = 1", see the value of a below.
|
6
|
+
The nicely printed result.
|
7
|
+
The value of a is 2, and b is 3.
|
@@ -0,0 +1,9 @@
|
|
1
|
+
0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
|
2
|
+
255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
|
3
|
+
255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
|
4
|
+
1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
|
5
|
+
12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
|
6
|
+
12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
|
7
|
+
12AD:: -> {:ipv6=>"12AD::"@0}
|
8
|
+
:: -> {:ipv6=>"::"@0}
|
9
|
+
1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
|
@@ -0,0 +1,8 @@
|
|
1
|
+
(): {:l=>"("@0, :m=>nil, :r=>")"@1} (1 parens)
|
2
|
+
|
3
|
+
(()): {:l=>"("@0, :m=>{:l=>"("@1, :m=>nil, :r=>")"@2}, :r=>")"@3} (2 parens)
|
4
|
+
|
5
|
+
((((())))): {:l=>"("@0, :m=>{:l=>"("@1, :m=>{:l=>"("@2, :m=>{:l=>"("@3, :m=>{:l=>"("@4, :m=>nil, :r=>")"@5}, :r=>")"@6}, :r=>")"@7}, :r=>")"@8}, :r=>")"@9} (5 parens)
|
6
|
+
|
7
|
+
((()): Failed to match sequence (l:'(' m:(BALANCED?)) at line 1 char 6.
|
8
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
String contents: This is a \"String\" in which you can escape stuff
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"And when Spring comes"
|
2
|
+
{:bud=>{:stem=>[{:branch=>:leaf}]}}
|
3
|
+
|
4
|
+
"And when Summer comes"
|
5
|
+
{:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
|
6
|
+
|
7
|
+
"And when Fall comes"
|
8
|
+
Fruit!
|
9
|
+
Falling Leaves!
|
10
|
+
{:bud=>{:stem=>[{:branch=>[]}]}}
|
11
|
+
|
12
|
+
"And when Winter comes"
|
13
|
+
{:bud=>{:stem=>[]}}
|
14
|
+
|
15
|
+
"And when Spring comes"
|
16
|
+
{:bud=>{:stem=>[{:branch=>:leaf}]}}
|
17
|
+
|
18
|
+
"And when Summer comes"
|
19
|
+
{:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
|
20
|
+
|
21
|
+
"And when Fall comes"
|
22
|
+
Fruit!
|
23
|
+
Falling Leaves!
|
24
|
+
{:bud=>{:stem=>[{:branch=>[]}]}}
|
25
|
+
|
26
|
+
"And when Winter comes"
|
27
|
+
{:bud=>{:stem=>[]}}
|
28
|
+
|
data/example/parens.rb
CHANGED
data/example/readme.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
# The example from the readme. With this, I am making sure that the readme
|
2
2
|
# 'works'. Is this too messy?
|
3
3
|
|
4
|
-
$:.unshift
|
5
|
-
|
6
|
-
require 'pp'
|
7
|
-
require 'parslet'
|
8
|
-
include Parslet
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
9
5
|
|
6
|
+
# cut here -------------------------------------------------------------------
|
10
7
|
require 'parslet'
|
11
8
|
include Parslet
|
12
9
|
|
@@ -14,20 +11,17 @@ include Parslet
|
|
14
11
|
parser = str('"') >>
|
15
12
|
(
|
16
13
|
str('\\') >> any |
|
17
|
-
str('"').
|
14
|
+
str('"').absent? >> any
|
18
15
|
).repeat.as(:string) >>
|
19
16
|
str('"')
|
20
17
|
|
21
18
|
# Parse the string and capture parts of the interpretation (:string above)
|
22
|
-
tree = parser.parse(
|
23
|
-
"This is a \\"String\\" in which you can escape stuff"
|
24
|
-
}.strip)
|
19
|
+
tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
|
25
20
|
|
26
21
|
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
27
22
|
|
28
23
|
# Here's how you can grab results from that tree:
|
29
24
|
|
30
|
-
# 1)
|
31
25
|
transform = Parslet::Transform.new do
|
32
26
|
rule(:string => simple(:x)) {
|
33
27
|
puts "String contents: #{x}" }
|