parslet 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/HISTORY.txt
CHANGED
@@ -1,3 +1,32 @@
|
|
1
|
+
= 2.0 / ?? (future release changes, like a reminder to self)
|
2
|
+
|
3
|
+
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
|
+
the win.
|
5
|
+
|
6
|
+
= 1.2.0 / ???
|
7
|
+
|
8
|
+
+ Parslet::Parser is now also a grammar atom, it can be composed freely with
|
9
|
+
other atoms. (str('f') >> MiniLispParser.new >> str('b'))
|
10
|
+
|
11
|
+
+ No strings, only slices are returned as part of the parser result.
|
12
|
+
Parslet::Slice is almost a string class, but one that remembers the
|
13
|
+
source offset. This has also bought us a slight speedup.
|
14
|
+
|
15
|
+
+ require 'parslet/convenience' now brings #parse_with_debug to all parslets.
|
16
|
+
This is a consequence of the above change.
|
17
|
+
|
18
|
+
+ Deprecates prsnt? and absnt? in favor of the more readable absent? and
|
19
|
+
prsnt?. Uses 3 bytes more RAM. The old variants will exist until we release
|
20
|
+
2.0.
|
21
|
+
|
22
|
+
INTERNALLY
|
23
|
+
|
24
|
+
+ Visitors now should have methods that all begin with 'visit_*'. #str
|
25
|
+
becomes #visit_str.
|
26
|
+
|
27
|
+
+ Parslet::Atoms::Entity now takes only a block argument instead of context
|
28
|
+
and block.
|
29
|
+
|
1
30
|
= 1.1.1 / 4Feb2011
|
2
31
|
|
3
32
|
! FIX: Line counting was broken by performance optimisations.
|
data/README
CHANGED
@@ -27,9 +27,7 @@ SYNOPSIS
|
|
27
27
|
str('"')
|
28
28
|
|
29
29
|
# Parse the string and capture parts of the interpretation (:string above)
|
30
|
-
tree = parser.parse(
|
31
|
-
"This is a \\"String\\" in which you can escape stuff"
|
32
|
-
}.strip)
|
30
|
+
tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
|
33
31
|
|
34
32
|
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
35
33
|
|
@@ -37,7 +35,7 @@ SYNOPSIS
|
|
37
35
|
|
38
36
|
transform = Parslet::Transform.new do
|
39
37
|
rule(:string => simple(:x)) {
|
40
|
-
puts "String contents
|
38
|
+
puts "String contents: #{x}" }
|
41
39
|
end
|
42
40
|
transform.apply(tree)
|
43
41
|
|
data/Rakefile
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
-
|
2
1
|
require "rubygems"
|
3
2
|
require "rake/rdoctask"
|
4
3
|
require 'rspec/core/rake_task'
|
5
4
|
require "rake/gempackagetask"
|
6
5
|
|
7
|
-
|
8
|
-
desc "Run all examples"
|
6
|
+
desc "Run all tests: Exhaustive."
|
9
7
|
RSpec::Core::RakeTask.new
|
10
8
|
|
9
|
+
namespace :spec do
|
10
|
+
desc "Only run unit tests: Fast. "
|
11
|
+
RSpec::Core::RakeTask.new(:unit) do |task|
|
12
|
+
task.pattern = "spec/parslet/**/*_spec.rb"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
11
16
|
task :default => :spec
|
12
17
|
|
13
18
|
require 'sdoc'
|
@@ -27,10 +32,19 @@ desc 'Clear out RDoc'
|
|
27
32
|
task :clean => [:clobber_rdoc, :clobber_package]
|
28
33
|
|
29
34
|
# This task actually builds the gem.
|
35
|
+
task :gem => :spec
|
30
36
|
spec = eval(File.read('parslet.gemspec'))
|
37
|
+
|
31
38
|
desc "Generate the gem package."
|
32
39
|
Rake::GemPackageTask.new(spec) do |pkg|
|
33
40
|
pkg.gem_spec = spec
|
34
41
|
end
|
35
42
|
|
36
|
-
|
43
|
+
desc "Prints LOC stats"
|
44
|
+
task :stat do
|
45
|
+
%w(lib spec example).each do |dir|
|
46
|
+
loc = %x(find #{dir} -name "*.rb" | xargs wc -l | grep 'total').split.first.to_i
|
47
|
+
printf("%20s %d\n", dir, loc)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
data/example/comments.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
1
|
# A small example on how to parse common types of comments. The example
|
2
2
|
# started out with parser code from Stephen Waits.
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
8
8
|
require 'parslet/convenience'
|
9
9
|
|
10
10
|
class ALanguage < Parslet::Parser
|
11
|
-
root(:
|
11
|
+
root(:lines)
|
12
12
|
|
13
|
-
rule(:
|
14
|
-
rule(:line) {
|
15
|
-
rule(:
|
13
|
+
rule(:lines) { line.repeat }
|
14
|
+
rule(:line) { spaces >> expression.repeat >> newline }
|
15
|
+
rule(:newline) { str("\n") >> str("\r").maybe }
|
16
16
|
|
17
|
-
rule(:
|
17
|
+
rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
|
18
18
|
|
19
|
-
rule(:
|
20
|
-
rule(:space) { multiline_comment
|
21
|
-
|
22
|
-
rule(:line_comment) { str('//') >> (
|
23
|
-
rule(:multiline_comment) { str('/*') >> (str('*/').
|
19
|
+
rule(:spaces) { space.repeat }
|
20
|
+
rule(:space) { multiline_comment | line_comment | str(' ') }
|
21
|
+
|
22
|
+
rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
|
23
|
+
rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
|
24
24
|
end
|
25
25
|
|
26
26
|
code = %q(
|
@@ -33,5 +33,3 @@ code = %q(
|
|
33
33
|
)
|
34
34
|
|
35
35
|
pp ALanguage.new.parse_with_debug(code)
|
36
|
-
|
37
|
-
|
data/example/documentation.rb
CHANGED
data/example/email_parser.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Example contributed by Hal Brodigan (postmodern). Thanks!
|
4
4
|
|
5
|
-
$:.unshift
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
6
|
require 'parslet'
|
7
7
|
|
8
8
|
class EmailParser < Parslet::Parser
|
@@ -35,7 +35,7 @@ class EmailSanitizer < Parslet::Transform
|
|
35
35
|
rule(:word => simple(:word)) { word }
|
36
36
|
|
37
37
|
rule(:username => sequence(:username)) { username.join + "@" }
|
38
|
-
rule(:username => simple(:username)) { username + "@" }
|
38
|
+
rule(:username => simple(:username)) { username.to_s + "@" }
|
39
39
|
|
40
40
|
rule(:email => sequence(:email)) { email.join }
|
41
41
|
end
|
@@ -45,12 +45,12 @@ sanitizer = EmailSanitizer.new
|
|
45
45
|
|
46
46
|
unless ARGV[0]
|
47
47
|
STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
|
48
|
-
|
48
|
+
STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
|
49
49
|
end
|
50
50
|
|
51
51
|
begin
|
52
|
-
p sanitizer.apply(parser.parse(ARGV[0]))
|
52
|
+
p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
|
53
53
|
rescue Parslet::ParseFailed => error
|
54
54
|
puts error
|
55
|
-
puts parser.
|
55
|
+
puts parser.error_tree
|
56
56
|
end
|
data/example/empty.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Basically just demonstrates that you can leave rules empty and get a nice
|
2
2
|
# NotImplementedError. A way to quickly spec out your parser rules?
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'parslet'
|
7
7
|
|
@@ -10,4 +10,4 @@ class Parser < Parslet::Parser
|
|
10
10
|
end
|
11
11
|
|
12
12
|
|
13
|
-
Parser.new.empty.parslet
|
13
|
+
Parser.new.empty.parslet
|
data/example/erb.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
+
# Example that demonstrates how a simple erb-like parser could be constructed.
|
2
|
+
|
1
3
|
$:.unshift File.dirname(__FILE__) + "/../lib"
|
4
|
+
|
2
5
|
require 'parslet'
|
3
6
|
|
4
7
|
class ErbParser < Parslet::Parser
|
5
|
-
rule(:ruby) { (str('%>').
|
8
|
+
rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
|
6
9
|
|
7
10
|
rule(:expression) { (str('=') >> ruby).as(:expression) }
|
8
11
|
rule(:comment) { (str('#') >> ruby).as(:comment) }
|
@@ -10,7 +13,7 @@ class ErbParser < Parslet::Parser
|
|
10
13
|
rule(:erb) { expression | comment | code }
|
11
14
|
|
12
15
|
rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
|
13
|
-
rule(:text) { (str('<%').
|
16
|
+
rule(:text) { (str('<%').absent? >> any).repeat(1) }
|
14
17
|
|
15
18
|
rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
|
16
19
|
root(:text_with_ruby)
|
@@ -41,4 +44,4 @@ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a be
|
|
41
44
|
The <%= 'nicely' %> printed result.
|
42
45
|
The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
|
43
46
|
ERB
|
44
|
-
))
|
47
|
+
))
|
data/example/ip_address.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
#
|
7
7
|
# See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
|
8
8
|
|
9
|
-
$:.unshift
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
10
|
|
11
11
|
require 'pp'
|
12
12
|
require 'parslet'
|
@@ -122,4 +122,4 @@ end
|
|
122
122
|
rescue Parslet::ParseFailed => m
|
123
123
|
puts "Failed: #{m}"
|
124
124
|
end
|
125
|
-
end
|
125
|
+
end
|
data/example/local.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
# An exploration of two ideas:
|
3
|
+
# a) Constructing a whole parser inline, without the artificial class around
|
4
|
+
# it.
|
5
|
+
# and:
|
6
|
+
# b) Constructing non-greedy or non-blind parsers by transforming the
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
|
+
|
11
|
+
require 'parslet'
|
12
|
+
include Parslet
|
13
|
+
|
14
|
+
a = str('a').repeat >> str('aa')
|
15
|
+
|
16
|
+
# E1% E2
|
17
|
+
#
|
18
|
+
# S = E2 | E1 S
|
19
|
+
|
20
|
+
def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
|
21
|
+
def epsilon; any.absent? end
|
22
|
+
|
23
|
+
# Traditional repetition will try as long as the pattern can be matched and
|
24
|
+
# then give up. This is greedy and blind.
|
25
|
+
a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
|
26
|
+
|
27
|
+
# Here's a pattern match that is greedy and non-blind. The first pattern
|
28
|
+
# 'a'* will be tried as many times as possible, while still matching the
|
29
|
+
# end pattern 'aa'.
|
30
|
+
b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
|
31
|
+
|
32
|
+
p a.parse('aaaa')
|
33
|
+
p b
|
34
|
+
p b.parse('aaaa')
|
data/example/minilisp.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Reproduces [1] using parslet.
|
2
2
|
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
@@ -45,7 +45,7 @@ module MiniLisp
|
|
45
45
|
rule(:string) {
|
46
46
|
str('"') >> (
|
47
47
|
str('\\') >> any |
|
48
|
-
str('"').
|
48
|
+
str('"').absent? >> any
|
49
49
|
).repeat.as(:string) >> str('"') >> space?
|
50
50
|
}
|
51
51
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
[{:exp=>{:a=>"a"@3}},
|
2
|
+
{:line=>"// line comment"@7},
|
3
|
+
{:exp=>{:a=>"a"@25}},
|
4
|
+
{:exp=>{:a=>"a"@27}},
|
5
|
+
{:exp=>[{:a=>"a"@29}, {:line=>"// line comment"@31}]},
|
6
|
+
{:exp=>[{:a=>"a"@49}, {:multi=>"/* inline comment */"@51}]},
|
7
|
+
{:exp=>{:a=>"a"@72}},
|
8
|
+
{:multi=>"/* multiline\n comment */"@77}]
|
@@ -0,0 +1,4 @@
|
|
1
|
+
/Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:326:in `parse_failed': Don't know what to do with bbbb at line 1 char 1. (Parslet::ParseFailed)
|
2
|
+
from /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:55:in `parse'
|
3
|
+
from example/documentation.rb:13:in `parse'
|
4
|
+
from example/documentation.rb:18:in `<main>'
|
@@ -0,0 +1 @@
|
|
1
|
+
"aaaa"@0
|
@@ -0,0 +1 @@
|
|
1
|
+
example/empty.rb:13:in `<main>': rule(:empty) { ... } returns nil. Still not implemented, but already used? (NotImplementedError)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{:text=>[{:text=>"The value of x is "@0}, {:expression=>{:ruby=>" x "@21}}, {:text=>"."@26}]}
|
2
|
+
{:text=>[{:code=>{:ruby=>" 1 + 2 "@2}}]}
|
3
|
+
{:text=>[{:comment=>{:ruby=>" commented "@3}}]}
|
4
|
+
The not printed result of "a = 2".
|
5
|
+
The not printed non-evaluated comment "a = 1", see the value of a below.
|
6
|
+
The nicely printed result.
|
7
|
+
The value of a is 2, and b is 3.
|
@@ -0,0 +1,9 @@
|
|
1
|
+
0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
|
2
|
+
255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
|
3
|
+
255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
|
4
|
+
1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
|
5
|
+
12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
|
6
|
+
12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
|
7
|
+
12AD:: -> {:ipv6=>"12AD::"@0}
|
8
|
+
:: -> {:ipv6=>"::"@0}
|
9
|
+
1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
|
@@ -0,0 +1,8 @@
|
|
1
|
+
(): {:l=>"("@0, :m=>nil, :r=>")"@1} (1 parens)
|
2
|
+
|
3
|
+
(()): {:l=>"("@0, :m=>{:l=>"("@1, :m=>nil, :r=>")"@2}, :r=>")"@3} (2 parens)
|
4
|
+
|
5
|
+
((((())))): {:l=>"("@0, :m=>{:l=>"("@1, :m=>{:l=>"("@2, :m=>{:l=>"("@3, :m=>{:l=>"("@4, :m=>nil, :r=>")"@5}, :r=>")"@6}, :r=>")"@7}, :r=>")"@8}, :r=>")"@9} (5 parens)
|
6
|
+
|
7
|
+
((()): Failed to match sequence (l:'(' m:(BALANCED?)) at line 1 char 6.
|
8
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
String contents: This is a \"String\" in which you can escape stuff
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"And when Spring comes"
|
2
|
+
{:bud=>{:stem=>[{:branch=>:leaf}]}}
|
3
|
+
|
4
|
+
"And when Summer comes"
|
5
|
+
{:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
|
6
|
+
|
7
|
+
"And when Fall comes"
|
8
|
+
Fruit!
|
9
|
+
Falling Leaves!
|
10
|
+
{:bud=>{:stem=>[{:branch=>[]}]}}
|
11
|
+
|
12
|
+
"And when Winter comes"
|
13
|
+
{:bud=>{:stem=>[]}}
|
14
|
+
|
15
|
+
"And when Spring comes"
|
16
|
+
{:bud=>{:stem=>[{:branch=>:leaf}]}}
|
17
|
+
|
18
|
+
"And when Summer comes"
|
19
|
+
{:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
|
20
|
+
|
21
|
+
"And when Fall comes"
|
22
|
+
Fruit!
|
23
|
+
Falling Leaves!
|
24
|
+
{:bud=>{:stem=>[{:branch=>[]}]}}
|
25
|
+
|
26
|
+
"And when Winter comes"
|
27
|
+
{:bud=>{:stem=>[]}}
|
28
|
+
|
data/example/parens.rb
CHANGED
data/example/readme.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
# The example from the readme. With this, I am making sure that the readme
|
2
2
|
# 'works'. Is this too messy?
|
3
3
|
|
4
|
-
$:.unshift
|
5
|
-
|
6
|
-
require 'pp'
|
7
|
-
require 'parslet'
|
8
|
-
include Parslet
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
9
5
|
|
6
|
+
# cut here -------------------------------------------------------------------
|
10
7
|
require 'parslet'
|
11
8
|
include Parslet
|
12
9
|
|
@@ -14,20 +11,17 @@ include Parslet
|
|
14
11
|
parser = str('"') >>
|
15
12
|
(
|
16
13
|
str('\\') >> any |
|
17
|
-
str('"').
|
14
|
+
str('"').absent? >> any
|
18
15
|
).repeat.as(:string) >>
|
19
16
|
str('"')
|
20
17
|
|
21
18
|
# Parse the string and capture parts of the interpretation (:string above)
|
22
|
-
tree = parser.parse(
|
23
|
-
"This is a \\"String\\" in which you can escape stuff"
|
24
|
-
}.strip)
|
19
|
+
tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
|
25
20
|
|
26
21
|
tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
|
27
22
|
|
28
23
|
# Here's how you can grab results from that tree:
|
29
24
|
|
30
|
-
# 1)
|
31
25
|
transform = Parslet::Transform.new do
|
32
26
|
rule(:string => simple(:x)) {
|
33
27
|
puts "String contents: #{x}" }
|