parslet 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
data/HISTORY.txt CHANGED
@@ -1,3 +1,32 @@
1
+ = 2.0 / ?? (future release changes, like a reminder to self)
2
+
3
+ - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
+ the win.
5
+
6
+ = 1.2.0 / ???
7
+
8
+ + Parslet::Parser is now also a grammar atom, it can be composed freely with
9
+ other atoms. (str('f') >> MiniLispParser.new >> str('b'))
10
+
11
+ + No strings, only slices are returned as part of the parser result.
12
+ Parslet::Slice is almost a string class, but one that remembers the
13
+ source offset. This has also bought us a slight speedup.
14
+
15
+ + require 'parslet/convenience' now brings #parse_with_debug to all parslets.
16
+ This is a consequence of the above change.
17
+
18
+ + Deprecates prsnt? and absnt? in favor of the more readable absent? and
19
+ prsnt?. Uses 3 bytes more RAM. The old variants will exist until we release
20
+ 2.0.
21
+
22
+ INTERNALLY
23
+
24
+ + Visitors now should have methods that all begin with 'visit_*'. #str
25
+ becomes #visit_str.
26
+
27
+ + Parslet::Atoms::Entity now takes only a block argument instead of context
28
+ and block.
29
+
1
30
  = 1.1.1 / 4Feb2011
2
31
 
3
32
  ! FIX: Line counting was broken by performance optimisations.
data/README CHANGED
@@ -27,9 +27,7 @@ SYNOPSIS
27
27
  str('"')
28
28
 
29
29
  # Parse the string and capture parts of the interpretation (:string above)
30
- tree = parser.parse(%Q{
31
- "This is a \\"String\\" in which you can escape stuff"
32
- }.strip)
30
+ tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
33
31
 
34
32
  tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
35
33
 
@@ -37,7 +35,7 @@ SYNOPSIS
37
35
 
38
36
  transform = Parslet::Transform.new do
39
37
  rule(:string => simple(:x)) {
40
- puts "String contents (method 2): #{x}" }
38
+ puts "String contents: #{x}" }
41
39
  end
42
40
  transform.apply(tree)
43
41
 
data/Rakefile CHANGED
@@ -1,13 +1,18 @@
1
-
2
1
  require "rubygems"
3
2
  require "rake/rdoctask"
4
3
  require 'rspec/core/rake_task'
5
4
  require "rake/gempackagetask"
6
5
 
7
-
8
- desc "Run all examples"
6
+ desc "Run all tests: Exhaustive."
9
7
  RSpec::Core::RakeTask.new
10
8
 
9
+ namespace :spec do
10
+ desc "Only run unit tests: Fast. "
11
+ RSpec::Core::RakeTask.new(:unit) do |task|
12
+ task.pattern = "spec/parslet/**/*_spec.rb"
13
+ end
14
+ end
15
+
11
16
  task :default => :spec
12
17
 
13
18
  require 'sdoc'
@@ -27,10 +32,19 @@ desc 'Clear out RDoc'
27
32
  task :clean => [:clobber_rdoc, :clobber_package]
28
33
 
29
34
  # This task actually builds the gem.
35
+ task :gem => :spec
30
36
  spec = eval(File.read('parslet.gemspec'))
37
+
31
38
  desc "Generate the gem package."
32
39
  Rake::GemPackageTask.new(spec) do |pkg|
33
40
  pkg.gem_spec = spec
34
41
  end
35
42
 
36
- task :gem => :spec
43
+ desc "Prints LOC stats"
44
+ task :stat do
45
+ %w(lib spec example).each do |dir|
46
+ loc = %x(find #{dir} -name "*.rb" | xargs wc -l | grep 'total').split.first.to_i
47
+ printf("%20s %d\n", dir, loc)
48
+ end
49
+ end
50
+
data/example/comments.rb CHANGED
@@ -1,26 +1,26 @@
1
1
  # A small example on how to parse common types of comments. The example
2
2
  # started out with parser code from Stephen Waits.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
8
  require 'parslet/convenience'
9
9
 
10
10
  class ALanguage < Parslet::Parser
11
- root(:expressions)
11
+ root(:lines)
12
12
 
13
- rule(:expressions) { (line >> eol).repeat(1) | line }
14
- rule(:line) { space? >> an_expression.as(:exp).repeat }
15
- rule(:an_expression) { str('a').as(:a) >> space? }
13
+ rule(:lines) { line.repeat }
14
+ rule(:line) { spaces >> expression.repeat >> newline }
15
+ rule(:newline) { str("\n") >> str("\r").maybe }
16
16
 
17
- rule(:eol) { space? >> match["\n\r"].repeat(1) >> space? }
17
+ rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
18
18
 
19
- rule(:space?) { space.repeat }
20
- rule(:space) { multiline_comment.as(:multi) | line_comment.as(:line) | str(' ') }
21
-
22
- rule(:line_comment) { str('//') >> (match["\n\r"].absnt? >> any).repeat }
23
- rule(:multiline_comment) { str('/*') >> (str('*/').absnt? >> any).repeat >> str('*/') }
19
+ rule(:spaces) { space.repeat }
20
+ rule(:space) { multiline_comment | line_comment | str(' ') }
21
+
22
+ rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
23
+ rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
24
24
  end
25
25
 
26
26
  code = %q(
@@ -33,5 +33,3 @@ code = %q(
33
33
  )
34
34
 
35
35
  pp ALanguage.new.parse_with_debug(code)
36
-
37
-
@@ -1,7 +1,7 @@
1
1
  # A small example that shows a really small parser and what happens on parser
2
2
  # errors.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Example contributed by Hal Brodigan (postmodern). Thanks!
4
4
 
5
- $:.unshift '../lib'
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
6
  require 'parslet'
7
7
 
8
8
  class EmailParser < Parslet::Parser
@@ -35,7 +35,7 @@ class EmailSanitizer < Parslet::Transform
35
35
  rule(:word => simple(:word)) { word }
36
36
 
37
37
  rule(:username => sequence(:username)) { username.join + "@" }
38
- rule(:username => simple(:username)) { username + "@" }
38
+ rule(:username => simple(:username)) { username.to_s + "@" }
39
39
 
40
40
  rule(:email => sequence(:email)) { email.join }
41
41
  end
@@ -45,12 +45,12 @@ sanitizer = EmailSanitizer.new
45
45
 
46
46
  unless ARGV[0]
47
47
  STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
48
- exit -1
48
+ STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
49
49
  end
50
50
 
51
51
  begin
52
- p sanitizer.apply(parser.parse(ARGV[0]))
52
+ p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
53
53
  rescue Parslet::ParseFailed => error
54
54
  puts error
55
- puts parser.root.error_tree
55
+ puts parser.error_tree
56
56
  end
data/example/empty.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # Basically just demonstrates that you can leave rules empty and get a nice
2
2
  # NotImplementedError. A way to quickly spec out your parser rules?
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'parslet'
7
7
 
@@ -10,4 +10,4 @@ class Parser < Parslet::Parser
10
10
  end
11
11
 
12
12
 
13
- Parser.new.empty.parslet
13
+ Parser.new.empty.parslet
data/example/erb.rb CHANGED
@@ -1,8 +1,11 @@
1
+ # Example that demonstrates how a simple erb-like parser could be constructed.
2
+
1
3
  $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
2
5
  require 'parslet'
3
6
 
4
7
  class ErbParser < Parslet::Parser
5
- rule(:ruby) { (str('%>').absnt? >> any).repeat.as(:ruby) }
8
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
6
9
 
7
10
  rule(:expression) { (str('=') >> ruby).as(:expression) }
8
11
  rule(:comment) { (str('#') >> ruby).as(:comment) }
@@ -10,7 +13,7 @@ class ErbParser < Parslet::Parser
10
13
  rule(:erb) { expression | comment | code }
11
14
 
12
15
  rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
13
- rule(:text) { (str('<%').absnt? >> any).repeat(1) }
16
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
14
17
 
15
18
  rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
16
19
  root(:text_with_ruby)
@@ -41,4 +44,4 @@ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a be
41
44
  The <%= 'nicely' %> printed result.
42
45
  The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
43
46
  ERB
44
- ))
47
+ ))
@@ -6,7 +6,7 @@
6
6
  #
7
7
  # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
8
 
9
- $:.unshift '../lib'
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
10
 
11
11
  require 'pp'
12
12
  require 'parslet'
@@ -122,4 +122,4 @@ end
122
122
  rescue Parslet::ParseFailed => m
123
123
  puts "Failed: #{m}"
124
124
  end
125
- end
125
+ end
data/example/local.rb ADDED
@@ -0,0 +1,34 @@
1
+
2
+ # An exploration of two ideas:
3
+ # a) Constructing a whole parser inline, without the artificial class around
4
+ # it.
5
+ # and:
6
+ # b) Constructing non-greedy or non-blind parsers by transforming the
7
+ # grammar.
8
+
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
+
11
+ require 'parslet'
12
+ include Parslet
13
+
14
+ a = str('a').repeat >> str('aa')
15
+
16
+ # E1% E2
17
+ #
18
+ # S = E2 | E1 S
19
+
20
+ def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
21
+ def epsilon; any.absent? end
22
+
23
+ # Traditional repetition will try as long as the pattern can be matched and
24
+ # then give up. This is greedy and blind.
25
+ a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
26
+
27
+ # Here's a pattern match that is greedy and non-blind. The first pattern
28
+ # 'a'* will be tried as many times as possible, while still matching the
29
+ # end pattern 'aa'.
30
+ b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
31
+
32
+ p a.parse('aaaa')
33
+ p b
34
+ p b.parse('aaaa')
data/example/minilisp.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # Reproduces [1] using parslet.
2
2
  # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
@@ -45,7 +45,7 @@ module MiniLisp
45
45
  rule(:string) {
46
46
  str('"') >> (
47
47
  str('\\') >> any |
48
- str('"').absnt? >> any
48
+ str('"').absent? >> any
49
49
  ).repeat.as(:string) >> str('"') >> space?
50
50
  }
51
51
  end
@@ -0,0 +1,8 @@
1
+ [{:exp=>{:a=>"a"@3}},
2
+ {:line=>"// line comment"@7},
3
+ {:exp=>{:a=>"a"@25}},
4
+ {:exp=>{:a=>"a"@27}},
5
+ {:exp=>[{:a=>"a"@29}, {:line=>"// line comment"@31}]},
6
+ {:exp=>[{:a=>"a"@49}, {:multi=>"/* inline comment */"@51}]},
7
+ {:exp=>{:a=>"a"@72}},
8
+ {:multi=>"/* multiline\n comment */"@77}]
@@ -0,0 +1,4 @@
1
+ /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:326:in `parse_failed': Don't know what to do with bbbb at line 1 char 1. (Parslet::ParseFailed)
2
+ from /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:55:in `parse'
3
+ from example/documentation.rb:13:in `parse'
4
+ from example/documentation.rb:18:in `<main>'
@@ -0,0 +1 @@
1
+ "aaaa"@0
@@ -0,0 +1,2 @@
1
+ since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com
2
+ "a.b.c.d@gmail.com"
@@ -0,0 +1 @@
1
+ example/empty.rb:13:in `<main>': rule(:empty) { ... } returns nil. Still not implemented, but already used? (NotImplementedError)
@@ -0,0 +1,7 @@
1
+ {:text=>[{:text=>"The value of x is "@0}, {:expression=>{:ruby=>" x "@21}}, {:text=>"."@26}]}
2
+ {:text=>[{:code=>{:ruby=>" 1 + 2 "@2}}]}
3
+ {:text=>[{:comment=>{:ruby=>" commented "@3}}]}
4
+ The not printed result of "a = 2".
5
+ The not printed non-evaluated comment "a = 1", see the value of a below.
6
+ The nicely printed result.
7
+ The value of a is 2, and b is 3.
@@ -0,0 +1,9 @@
1
+ 0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
2
+ 255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
3
+ 255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
4
+ 1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
5
+ 12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
6
+ 12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
7
+ 12AD:: -> {:ipv6=>"12AD::"@0}
8
+ :: -> {:ipv6=>"::"@0}
9
+ 1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
@@ -0,0 +1,3 @@
1
+ {:e=>"a"@0, :rec=>{:e=>"a"@1, :rec=>{:e=>"a"@2, :rec=>{:e=>"a"@3, :rec=>nil}}}}
2
+ e2:'aa' !. / e1:'a' rec:B
3
+ {:e1=>"a"@0, :rec=>{:e1=>"a"@1, :rec=>{:e2=>"aa"@2}}}
@@ -0,0 +1,5 @@
1
+ [:define,
2
+ :test,
3
+ [:lambda,
4
+ [],
5
+ [:begin, [:display, "something"@54], [:display, 1], [:display, 3.08]]]]
@@ -0,0 +1,8 @@
1
+ (): {:l=>"("@0, :m=>nil, :r=>")"@1} (1 parens)
2
+
3
+ (()): {:l=>"("@0, :m=>{:l=>"("@1, :m=>nil, :r=>")"@2}, :r=>")"@3} (2 parens)
4
+
5
+ ((((())))): {:l=>"("@0, :m=>{:l=>"("@1, :m=>{:l=>"("@2, :m=>{:l=>"("@3, :m=>{:l=>"("@4, :m=>nil, :r=>")"@5}, :r=>")"@6}, :r=>")"@7}, :r=>")"@8}, :r=>")"@9} (5 parens)
6
+
7
+ ((()): Failed to match sequence (l:'(' m:(BALANCED?)) at line 1 char 6.
8
+
@@ -0,0 +1 @@
1
+ String contents: This is a \"String\" in which you can escape stuff
@@ -0,0 +1,28 @@
1
+ "And when Spring comes"
2
+ {:bud=>{:stem=>[{:branch=>:leaf}]}}
3
+
4
+ "And when Summer comes"
5
+ {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
6
+
7
+ "And when Fall comes"
8
+ Fruit!
9
+ Falling Leaves!
10
+ {:bud=>{:stem=>[{:branch=>[]}]}}
11
+
12
+ "And when Winter comes"
13
+ {:bud=>{:stem=>[]}}
14
+
15
+ "And when Spring comes"
16
+ {:bud=>{:stem=>[{:branch=>:leaf}]}}
17
+
18
+ "And when Summer comes"
19
+ {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
20
+
21
+ "And when Fall comes"
22
+ Fruit!
23
+ Falling Leaves!
24
+ {:bud=>{:stem=>[{:branch=>[]}]}}
25
+
26
+ "And when Winter comes"
27
+ {:bud=>{:stem=>[]}}
28
+
@@ -0,0 +1,2 @@
1
+ "verified"
2
+ {:o=>{:name=>"b"@1}, :i=>"verified", :c=>{:name=>"a"@33}}
@@ -0,0 +1,3 @@
1
+ [#<struct IntLit text="123"@0>,
2
+ #<struct IntLit text="12345"@4>,
3
+ #<struct StringLit text=" Some String with \\\"escapes\\\""@11>]
data/example/parens.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # uses '.as(:name)' to construct a tree that can reliably be matched
3
3
  # afterwards.
4
4
 
5
- $:.unshift File.join(
6
- File.dirname(__FILE__),
7
- '/../lib')
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
6
 
9
7
  require 'pp'
10
8
  require 'parslet'
data/example/readme.rb CHANGED
@@ -1,12 +1,9 @@
1
1
  # The example from the readme. With this, I am making sure that the readme
2
2
  # 'works'. Is this too messy?
3
3
 
4
- $:.unshift '../lib'
5
-
6
- require 'pp'
7
- require 'parslet'
8
- include Parslet
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
9
5
 
6
+ # cut here -------------------------------------------------------------------
10
7
  require 'parslet'
11
8
  include Parslet
12
9
 
@@ -14,20 +11,17 @@ include Parslet
14
11
  parser = str('"') >>
15
12
  (
16
13
  str('\\') >> any |
17
- str('"').absnt? >> any
14
+ str('"').absent? >> any
18
15
  ).repeat.as(:string) >>
19
16
  str('"')
20
17
 
21
18
  # Parse the string and capture parts of the interpretation (:string above)
22
- tree = parser.parse(%Q{
23
- "This is a \\"String\\" in which you can escape stuff"
24
- }.strip)
19
+ tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
25
20
 
26
21
  tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
27
22
 
28
23
  # Here's how you can grab results from that tree:
29
24
 
30
- # 1)
31
25
  transform = Parslet::Transform.new do
32
26
  rule(:string => simple(:x)) {
33
27
  puts "String contents: #{x}" }