parslet 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
data/HISTORY.txt CHANGED
@@ -1,3 +1,32 @@
1
+ = 2.0 / ?? (future release changes, like a reminder to self)
2
+
3
+ - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
+ the win.
5
+
6
+ = 1.2.0 / ???
7
+
8
+ + Parslet::Parser is now also a grammar atom, it can be composed freely with
9
+ other atoms. (str('f') >> MiniLispParser.new >> str('b'))
10
+
11
+ + No strings, only slices are returned as part of the parser result.
12
+ Parslet::Slice is almost a string class, but one that remembers the
13
+ source offset. This has also bought us a slight speedup.
14
+
15
+ + require 'parslet/convenience' now brings #parse_with_debug to all parslets.
16
+ This is a consequence of the above change.
17
+
18
+ + Deprecates prsnt? and absnt? in favor of the more readable absent? and
19
+ prsnt?. Uses 3 bytes more RAM. The old variants will exist until we release
20
+ 2.0.
21
+
22
+ INTERNALLY
23
+
24
+ + Visitors now should have methods that all begin with 'visit_*'. #str
25
+ becomes #visit_str.
26
+
27
+ + Parslet::Atoms::Entity now takes only a block argument instead of context
28
+ and block.
29
+
1
30
  = 1.1.1 / 4Feb2011
2
31
 
3
32
  ! FIX: Line counting was broken by performance optimisations.
data/README CHANGED
@@ -27,9 +27,7 @@ SYNOPSIS
27
27
  str('"')
28
28
 
29
29
  # Parse the string and capture parts of the interpretation (:string above)
30
- tree = parser.parse(%Q{
31
- "This is a \\"String\\" in which you can escape stuff"
32
- }.strip)
30
+ tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
33
31
 
34
32
  tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
35
33
 
@@ -37,7 +35,7 @@ SYNOPSIS
37
35
 
38
36
  transform = Parslet::Transform.new do
39
37
  rule(:string => simple(:x)) {
40
- puts "String contents (method 2): #{x}" }
38
+ puts "String contents: #{x}" }
41
39
  end
42
40
  transform.apply(tree)
43
41
 
data/Rakefile CHANGED
@@ -1,13 +1,18 @@
1
-
2
1
  require "rubygems"
3
2
  require "rake/rdoctask"
4
3
  require 'rspec/core/rake_task'
5
4
  require "rake/gempackagetask"
6
5
 
7
-
8
- desc "Run all examples"
6
+ desc "Run all tests: Exhaustive."
9
7
  RSpec::Core::RakeTask.new
10
8
 
9
+ namespace :spec do
10
+ desc "Only run unit tests: Fast. "
11
+ RSpec::Core::RakeTask.new(:unit) do |task|
12
+ task.pattern = "spec/parslet/**/*_spec.rb"
13
+ end
14
+ end
15
+
11
16
  task :default => :spec
12
17
 
13
18
  require 'sdoc'
@@ -27,10 +32,19 @@ desc 'Clear out RDoc'
27
32
  task :clean => [:clobber_rdoc, :clobber_package]
28
33
 
29
34
  # This task actually builds the gem.
35
+ task :gem => :spec
30
36
  spec = eval(File.read('parslet.gemspec'))
37
+
31
38
  desc "Generate the gem package."
32
39
  Rake::GemPackageTask.new(spec) do |pkg|
33
40
  pkg.gem_spec = spec
34
41
  end
35
42
 
36
- task :gem => :spec
43
+ desc "Prints LOC stats"
44
+ task :stat do
45
+ %w(lib spec example).each do |dir|
46
+ loc = %x(find #{dir} -name "*.rb" | xargs wc -l | grep 'total').split.first.to_i
47
+ printf("%20s %d\n", dir, loc)
48
+ end
49
+ end
50
+
data/example/comments.rb CHANGED
@@ -1,26 +1,26 @@
1
1
  # A small example on how to parse common types of comments. The example
2
2
  # started out with parser code from Stephen Waits.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
8
  require 'parslet/convenience'
9
9
 
10
10
  class ALanguage < Parslet::Parser
11
- root(:expressions)
11
+ root(:lines)
12
12
 
13
- rule(:expressions) { (line >> eol).repeat(1) | line }
14
- rule(:line) { space? >> an_expression.as(:exp).repeat }
15
- rule(:an_expression) { str('a').as(:a) >> space? }
13
+ rule(:lines) { line.repeat }
14
+ rule(:line) { spaces >> expression.repeat >> newline }
15
+ rule(:newline) { str("\n") >> str("\r").maybe }
16
16
 
17
- rule(:eol) { space? >> match["\n\r"].repeat(1) >> space? }
17
+ rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
18
18
 
19
- rule(:space?) { space.repeat }
20
- rule(:space) { multiline_comment.as(:multi) | line_comment.as(:line) | str(' ') }
21
-
22
- rule(:line_comment) { str('//') >> (match["\n\r"].absnt? >> any).repeat }
23
- rule(:multiline_comment) { str('/*') >> (str('*/').absnt? >> any).repeat >> str('*/') }
19
+ rule(:spaces) { space.repeat }
20
+ rule(:space) { multiline_comment | line_comment | str(' ') }
21
+
22
+ rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
23
+ rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
24
24
  end
25
25
 
26
26
  code = %q(
@@ -33,5 +33,3 @@ code = %q(
33
33
  )
34
34
 
35
35
  pp ALanguage.new.parse_with_debug(code)
36
-
37
-
@@ -1,7 +1,7 @@
1
1
  # A small example that shows a really small parser and what happens on parser
2
2
  # errors.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Example contributed by Hal Brodigan (postmodern). Thanks!
4
4
 
5
- $:.unshift '../lib'
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
6
  require 'parslet'
7
7
 
8
8
  class EmailParser < Parslet::Parser
@@ -35,7 +35,7 @@ class EmailSanitizer < Parslet::Transform
35
35
  rule(:word => simple(:word)) { word }
36
36
 
37
37
  rule(:username => sequence(:username)) { username.join + "@" }
38
- rule(:username => simple(:username)) { username + "@" }
38
+ rule(:username => simple(:username)) { username.to_s + "@" }
39
39
 
40
40
  rule(:email => sequence(:email)) { email.join }
41
41
  end
@@ -45,12 +45,12 @@ sanitizer = EmailSanitizer.new
45
45
 
46
46
  unless ARGV[0]
47
47
  STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
48
- exit -1
48
+ STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
49
49
  end
50
50
 
51
51
  begin
52
- p sanitizer.apply(parser.parse(ARGV[0]))
52
+ p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
53
53
  rescue Parslet::ParseFailed => error
54
54
  puts error
55
- puts parser.root.error_tree
55
+ puts parser.error_tree
56
56
  end
data/example/empty.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # Basically just demonstrates that you can leave rules empty and get a nice
2
2
  # NotImplementedError. A way to quickly spec out your parser rules?
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'parslet'
7
7
 
@@ -10,4 +10,4 @@ class Parser < Parslet::Parser
10
10
  end
11
11
 
12
12
 
13
- Parser.new.empty.parslet
13
+ Parser.new.empty.parslet
data/example/erb.rb CHANGED
@@ -1,8 +1,11 @@
1
+ # Example that demonstrates how a simple erb-like parser could be constructed.
2
+
1
3
  $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
2
5
  require 'parslet'
3
6
 
4
7
  class ErbParser < Parslet::Parser
5
- rule(:ruby) { (str('%>').absnt? >> any).repeat.as(:ruby) }
8
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
6
9
 
7
10
  rule(:expression) { (str('=') >> ruby).as(:expression) }
8
11
  rule(:comment) { (str('#') >> ruby).as(:comment) }
@@ -10,7 +13,7 @@ class ErbParser < Parslet::Parser
10
13
  rule(:erb) { expression | comment | code }
11
14
 
12
15
  rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
13
- rule(:text) { (str('<%').absnt? >> any).repeat(1) }
16
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
14
17
 
15
18
  rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
16
19
  root(:text_with_ruby)
@@ -41,4 +44,4 @@ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a be
41
44
  The <%= 'nicely' %> printed result.
42
45
  The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
43
46
  ERB
44
- ))
47
+ ))
@@ -6,7 +6,7 @@
6
6
  #
7
7
  # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
8
 
9
- $:.unshift '../lib'
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
10
 
11
11
  require 'pp'
12
12
  require 'parslet'
@@ -122,4 +122,4 @@ end
122
122
  rescue Parslet::ParseFailed => m
123
123
  puts "Failed: #{m}"
124
124
  end
125
- end
125
+ end
data/example/local.rb ADDED
@@ -0,0 +1,34 @@
1
+
2
+ # An exploration of two ideas:
3
+ # a) Constructing a whole parser inline, without the artificial class around
4
+ # it.
5
+ # and:
6
+ # b) Constructing non-greedy or non-blind parsers by transforming the
7
+ # grammar.
8
+
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
+
11
+ require 'parslet'
12
+ include Parslet
13
+
14
+ a = str('a').repeat >> str('aa')
15
+
16
+ # E1% E2
17
+ #
18
+ # S = E2 | E1 S
19
+
20
+ def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
21
+ def epsilon; any.absent? end
22
+
23
+ # Traditional repetition will try as long as the pattern can be matched and
24
+ # then give up. This is greedy and blind.
25
+ a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
26
+
27
+ # Here's a pattern match that is greedy and non-blind. The first pattern
28
+ # 'a'* will be tried as many times as possible, while still matching the
29
+ # end pattern 'aa'.
30
+ b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
31
+
32
+ p a.parse('aaaa')
33
+ p b
34
+ p b.parse('aaaa')
data/example/minilisp.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # Reproduces [1] using parslet.
2
2
  # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
@@ -45,7 +45,7 @@ module MiniLisp
45
45
  rule(:string) {
46
46
  str('"') >> (
47
47
  str('\\') >> any |
48
- str('"').absnt? >> any
48
+ str('"').absent? >> any
49
49
  ).repeat.as(:string) >> str('"') >> space?
50
50
  }
51
51
  end
@@ -0,0 +1,8 @@
1
+ [{:exp=>{:a=>"a"@3}},
2
+ {:line=>"// line comment"@7},
3
+ {:exp=>{:a=>"a"@25}},
4
+ {:exp=>{:a=>"a"@27}},
5
+ {:exp=>[{:a=>"a"@29}, {:line=>"// line comment"@31}]},
6
+ {:exp=>[{:a=>"a"@49}, {:multi=>"/* inline comment */"@51}]},
7
+ {:exp=>{:a=>"a"@72}},
8
+ {:multi=>"/* multiline\n comment */"@77}]
@@ -0,0 +1,4 @@
1
+ /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:326:in `parse_failed': Don't know what to do with bbbb at line 1 char 1. (Parslet::ParseFailed)
2
+ from /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:55:in `parse'
3
+ from example/documentation.rb:13:in `parse'
4
+ from example/documentation.rb:18:in `<main>'
@@ -0,0 +1 @@
1
+ "aaaa"@0
@@ -0,0 +1,2 @@
1
+ since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com
2
+ "a.b.c.d@gmail.com"
@@ -0,0 +1 @@
1
+ example/empty.rb:13:in `<main>': rule(:empty) { ... } returns nil. Still not implemented, but already used? (NotImplementedError)
@@ -0,0 +1,7 @@
1
+ {:text=>[{:text=>"The value of x is "@0}, {:expression=>{:ruby=>" x "@21}}, {:text=>"."@26}]}
2
+ {:text=>[{:code=>{:ruby=>" 1 + 2 "@2}}]}
3
+ {:text=>[{:comment=>{:ruby=>" commented "@3}}]}
4
+ The not printed result of "a = 2".
5
+ The not printed non-evaluated comment "a = 1", see the value of a below.
6
+ The nicely printed result.
7
+ The value of a is 2, and b is 3.
@@ -0,0 +1,9 @@
1
+ 0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
2
+ 255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
3
+ 255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
4
+ 1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
5
+ 12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
6
+ 12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
7
+ 12AD:: -> {:ipv6=>"12AD::"@0}
8
+ :: -> {:ipv6=>"::"@0}
9
+ 1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
@@ -0,0 +1,3 @@
1
+ {:e=>"a"@0, :rec=>{:e=>"a"@1, :rec=>{:e=>"a"@2, :rec=>{:e=>"a"@3, :rec=>nil}}}}
2
+ e2:'aa' !. / e1:'a' rec:B
3
+ {:e1=>"a"@0, :rec=>{:e1=>"a"@1, :rec=>{:e2=>"aa"@2}}}
@@ -0,0 +1,5 @@
1
+ [:define,
2
+ :test,
3
+ [:lambda,
4
+ [],
5
+ [:begin, [:display, "something"@54], [:display, 1], [:display, 3.08]]]]
@@ -0,0 +1,8 @@
1
+ (): {:l=>"("@0, :m=>nil, :r=>")"@1} (1 parens)
2
+
3
+ (()): {:l=>"("@0, :m=>{:l=>"("@1, :m=>nil, :r=>")"@2}, :r=>")"@3} (2 parens)
4
+
5
+ ((((())))): {:l=>"("@0, :m=>{:l=>"("@1, :m=>{:l=>"("@2, :m=>{:l=>"("@3, :m=>{:l=>"("@4, :m=>nil, :r=>")"@5}, :r=>")"@6}, :r=>")"@7}, :r=>")"@8}, :r=>")"@9} (5 parens)
6
+
7
+ ((()): Failed to match sequence (l:'(' m:(BALANCED?)) at line 1 char 6.
8
+
@@ -0,0 +1 @@
1
+ String contents: This is a \"String\" in which you can escape stuff
@@ -0,0 +1,28 @@
1
+ "And when Spring comes"
2
+ {:bud=>{:stem=>[{:branch=>:leaf}]}}
3
+
4
+ "And when Summer comes"
5
+ {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
6
+
7
+ "And when Fall comes"
8
+ Fruit!
9
+ Falling Leaves!
10
+ {:bud=>{:stem=>[{:branch=>[]}]}}
11
+
12
+ "And when Winter comes"
13
+ {:bud=>{:stem=>[]}}
14
+
15
+ "And when Spring comes"
16
+ {:bud=>{:stem=>[{:branch=>:leaf}]}}
17
+
18
+ "And when Summer comes"
19
+ {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}}
20
+
21
+ "And when Fall comes"
22
+ Fruit!
23
+ Falling Leaves!
24
+ {:bud=>{:stem=>[{:branch=>[]}]}}
25
+
26
+ "And when Winter comes"
27
+ {:bud=>{:stem=>[]}}
28
+
@@ -0,0 +1,2 @@
1
+ "verified"
2
+ {:o=>{:name=>"b"@1}, :i=>"verified", :c=>{:name=>"a"@33}}
@@ -0,0 +1,3 @@
1
+ [#<struct IntLit text="123"@0>,
2
+ #<struct IntLit text="12345"@4>,
3
+ #<struct StringLit text=" Some String with \\\"escapes\\\""@11>]
data/example/parens.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # uses '.as(:name)' to construct a tree that can reliably be matched
3
3
  # afterwards.
4
4
 
5
- $:.unshift File.join(
6
- File.dirname(__FILE__),
7
- '/../lib')
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
6
 
9
7
  require 'pp'
10
8
  require 'parslet'
data/example/readme.rb CHANGED
@@ -1,12 +1,9 @@
1
1
  # The example from the readme. With this, I am making sure that the readme
2
2
  # 'works'. Is this too messy?
3
3
 
4
- $:.unshift '../lib'
5
-
6
- require 'pp'
7
- require 'parslet'
8
- include Parslet
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
9
5
 
6
+ # cut here -------------------------------------------------------------------
10
7
  require 'parslet'
11
8
  include Parslet
12
9
 
@@ -14,20 +11,17 @@ include Parslet
14
11
  parser = str('"') >>
15
12
  (
16
13
  str('\\') >> any |
17
- str('"').absnt? >> any
14
+ str('"').absent? >> any
18
15
  ).repeat.as(:string) >>
19
16
  str('"')
20
17
 
21
18
  # Parse the string and capture parts of the interpretation (:string above)
22
- tree = parser.parse(%Q{
23
- "This is a \\"String\\" in which you can escape stuff"
24
- }.strip)
19
+ tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
25
20
 
26
21
  tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
27
22
 
28
23
  # Here's how you can grab results from that tree:
29
24
 
30
- # 1)
31
25
  transform = Parslet::Transform.new do
32
26
  rule(:string => simple(:x)) {
33
27
  puts "String contents: #{x}" }