parslet 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,17 @@
1
+ = 1.1.0 / ???
2
+
3
+ + Uses throw/catch internally for an order of magnitude increase in execution
4
+ speed.
5
+
6
+ = 1.0.1 / 17Jan2011
7
+
8
+ A happy new year!
9
+
10
+ ! FIX: Parslet::Transform was wrongly fixed earlier - it now wont mangle
11
+ hashes anymore. (Blake Sweeney)
12
+
13
+ + parslet/rig/rspec.rb contains useful rspec matchers. (R. Konstantin Haase)
14
+
1
15
  = 1.0.0 / 29Dez2010
2
16
 
3
17
  - #each_match was removed. There was some duplication of code that even
data/README CHANGED
@@ -47,8 +47,6 @@ This library should work with both ruby 1.8 and ruby 1.9.
47
47
 
48
48
  STATUS
49
49
 
50
- 0.12.0
51
-
52
- On the road to 1.0; improving documentation, trying to ease access to the API.
50
+ one dot oh.
53
51
 
54
52
  (c) 2010 Kaspar Schiess
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ spec = Gem::Specification.new do |s|
18
18
 
19
19
  # Change these as appropriate
20
20
  s.name = "parslet"
21
- s.version = "1.0.0"
21
+ s.version = "1.0.1"
22
22
  s.summary = "Parser construction library with great error reporting in Ruby."
23
23
  s.author = "Kaspar Schiess"
24
24
  s.email = "kaspar.schiess@absurd.li"
@@ -29,7 +29,7 @@ spec = Gem::Specification.new do |s|
29
29
  s.rdoc_options = %w(--main README)
30
30
 
31
31
  # Add any extra files to include in the gem
32
- s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{spec,lib/**/*}")
32
+ s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
33
33
  s.require_paths = ["lib"]
34
34
 
35
35
  # If you want to depend on other gems, add them here, along with any
@@ -0,0 +1,18 @@
1
+ # A small example that shows a really small parser and what happens on parser
2
+ # errors.
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ class MyParser < Parslet::Parser
10
+ rule(:a) { str('a').repeat }
11
+
12
+ def parse(str)
13
+ a.parse(str)
14
+ end
15
+ end
16
+
17
+ pp MyParser.new.parse('aaaa')
18
+ pp MyParser.new.parse('bbbb')
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Example contributed by Hal Brodigan (postmodern). Thanks!
4
+
5
+ require 'parslet'
6
+
7
+ class EmailParser < Parslet::Parser
8
+ rule(:space) { match('\s').repeat(1) }
9
+ rule(:space?) { space.maybe }
10
+ rule(:dash?) { match['_-'].maybe }
11
+
12
+ rule(:at) {
13
+ str('@') |
14
+ (dash? >> (str('at') | str('AT')) >> dash?)
15
+ }
16
+ rule(:dot) {
17
+ str('.') |
18
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
19
+ }
20
+
21
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
22
+ rule(:separator) { space? >> dot.as(:dot) >> space? | space }
23
+ rule(:words) { word >> (separator >> word).repeat }
24
+
25
+ rule(:email) {
26
+ (words >> space? >> at.as(:at) >> space? >> words).as(:email)
27
+ }
28
+
29
+ root(:email)
30
+ end
31
+
32
+ class EmailSanitizer < Parslet::Transform
33
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
34
+ rule(:at => simple(:at)) { '@' }
35
+ rule(:word => simple(:word)) { word }
36
+ rule(:email => sequence(:email)) { email.join }
37
+ end
38
+
39
+ parser = EmailParser.new
40
+ sanitizer = EmailSanitizer.new
41
+
42
+ unless ARGV[0]
43
+ STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
44
+ exit -1
45
+ end
46
+
47
+ begin
48
+ puts sanitizer.apply(parser.parse(ARGV[0]))
49
+ rescue Parslet::ParseFailed => error
50
+ puts error
51
+ puts parser.root.error_tree
52
+ end
@@ -0,0 +1,13 @@
1
+ # Basically just demonstrates that you can leave rules empty and get a nice
2
+ # NotImplementedError. A way to quickly spec out your parser rules?
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'parslet'
7
+
8
+ class Parser < Parslet::Parser
9
+ rule(:empty) { }
10
+ end
11
+
12
+
13
+ Parser.new.empty.parslet
@@ -0,0 +1,125 @@
1
+ # This example is heavily inspired by citrus' ip.citrus. Have a look at both
2
+ # of these to get some choice!
3
+
4
+ # The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
5
+ # Uniform Resource Identifier (URI): Generic Syntax.
6
+ #
7
+ # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
+
9
+ $:.unshift '../lib'
10
+
11
+ require 'pp'
12
+ require 'parslet'
13
+
14
+ module IPv4
15
+ include Parslet
16
+
17
+ # A host identified by an IPv4 literal address is represented in
18
+ # dotted-decimal notation (a sequence of four decimal numbers in the range 0
19
+ # to 255, separated by "."), as described in [RFC1123] by reference to
20
+ # [RFC0952]. Note that other forms of dotted notation may be interpreted on
21
+ # some platforms, as described in Section 7.4, but only the dotted-decimal
22
+ # form of four octets is allowed by this grammar.
23
+ rule(:ipv4) {
24
+ (dec_octet >> str('.') >> dec_octet >> str('.') >>
25
+ dec_octet >> str('.') >> dec_octet).as(:ipv4)
26
+ }
27
+
28
+ rule(:dec_octet) {
29
+ str('25') >> match("[0-5]") |
30
+ str('2') >> match("[0-4]") >> digit |
31
+ str('1') >> digit >> digit |
32
+ match('[1-9]') >> digit |
33
+ digit
34
+ }
35
+
36
+ rule(:digit) {
37
+ match('[0-9]')
38
+ }
39
+ end
40
+
41
+ # Must be used in concert with IPv4
42
+ module IPv6
43
+ include Parslet
44
+
45
+ rule(:colon) { str(':') }
46
+ rule(:dcolon) { colon >> colon }
47
+
48
+ # h16 :
49
+ def h16r(times)
50
+ (h16 >> colon).repeat(times, times)
51
+ end
52
+
53
+ # : h16
54
+ def h16l(times)
55
+ (colon >> h16).repeat(0,times)
56
+ end
57
+
58
+ # A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
59
+ # represented numerically in case-insensitive hexadecimal, using one to four
60
+ # hexadecimal digits (leading zeroes are permitted). The eight encoded
61
+ # pieces are given most-significant first, separated by colon characters.
62
+ # Optionally, the least-significant two pieces may instead be represented in
63
+ # IPv4 address textual format. A sequence of one or more consecutive
64
+ # zero-valued 16-bit pieces within the address may be elided, omitting all
65
+ # their digits and leaving exactly two consecutive colons in their place to
66
+ # mark the elision.
67
+ rule(:ipv6) {
68
+ (
69
+ (
70
+ h16r(6) |
71
+ dcolon >> h16r(5) |
72
+ h16.maybe >> dcolon >> h16r(4) |
73
+ (h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
74
+ (h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
75
+ (h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
76
+ (h16 >> h16l(4)).maybe >> dcolon
77
+ ) >> ls32 |
78
+ (h16 >> h16l(5)).maybe >> dcolon >> h16 |
79
+ (h16 >> h16l(6)).maybe >> dcolon
80
+ ).as(:ipv6)
81
+ }
82
+
83
+ rule(:h16) {
84
+ hexdigit.repeat(1,4)
85
+ }
86
+
87
+ rule(:ls32) {
88
+ (h16 >> colon >> h16) |
89
+ ipv4
90
+ }
91
+
92
+ rule(:hexdigit) {
93
+ digit | match("[a-fA-F]")
94
+ }
95
+ end
96
+
97
+ class Parser
98
+ include IPv4
99
+ include IPv6
100
+
101
+ def parse(str)
102
+ (ipv4 | ipv6).parse(str)
103
+ end
104
+ end
105
+
106
+ %W(
107
+ 0.0.0.0
108
+ 255.255.255.255
109
+ 255.255.255
110
+ 1:2:3:4:5:6:7:8
111
+ 12AD:34FC:A453:1922::
112
+ 12AD::34FC
113
+ 12AD::
114
+ ::
115
+ 1:2
116
+ ).each do |address|
117
+ parser = Parser.new
118
+ printf "%30s -> ", address
119
+ begin
120
+ result = parser.parse(address)
121
+ puts result.inspect
122
+ rescue Parslet::ParseFailed => m
123
+ puts "Failed: #{m}"
124
+ end
125
+ end
@@ -0,0 +1,101 @@
1
+ # Reproduces [1] using parslet.
2
+ # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ module MiniLisp
10
+ class Parser < Parslet::Parser
11
+ root :expression
12
+ rule(:expression) {
13
+ space? >> str('(') >> space? >> body >> str(')')
14
+ }
15
+
16
+ rule(:body) {
17
+ (expression | identifier | float | integer | string).repeat.as(:exp)
18
+ }
19
+
20
+ rule(:space) {
21
+ match('\s').repeat(1)
22
+ }
23
+ rule(:space?) {
24
+ space.maybe
25
+ }
26
+
27
+ rule(:identifier) {
28
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
29
+ }
30
+
31
+ rule(:float) {
32
+ (
33
+ integer >> (
34
+ str('.') >> match('[0-9]').repeat(1) |
35
+ str('e') >> match('[0-9]').repeat(1)
36
+ ).as(:e)
37
+ ).as(:float) >> space?
38
+ }
39
+
40
+ rule(:integer) {
41
+ ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
42
+ }
43
+
44
+ rule(:string) {
45
+ str('"') >> (
46
+ str('\\') >> any |
47
+ str('"').absnt? >> any
48
+ ).repeat.as(:string) >> str('"') >> space?
49
+ }
50
+ end
51
+
52
+ class Transform
53
+ include Parslet
54
+
55
+ attr_reader :t
56
+ def initialize
57
+ @t = Parslet::Transform.new
58
+
59
+ # To understand these, take a look at what comes out of the parser.
60
+ t.rule(:identifier => simple(:ident)) { ident.to_sym }
61
+
62
+ t.rule(:string => simple(:str)) { str }
63
+
64
+ t.rule(:integer => simple(:int)) { Integer(int) }
65
+
66
+ t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
67
+
68
+ t.rule(:exp => subtree(:exp)) { exp }
69
+ end
70
+
71
+ def do(tree)
72
+ t.apply(tree)
73
+ end
74
+ end
75
+ end
76
+
77
+ parser = MiniLisp::Parser.new
78
+ transform = MiniLisp::Transform.new
79
+
80
+ # Parse stage
81
+ begin
82
+ result = parser.parse %Q{
83
+ (define test (lambda ()
84
+ (begin
85
+ (display "something")
86
+ (display 1)
87
+ (display 3.08))))
88
+ (test)
89
+ }
90
+ rescue Parslet::ParseFailed => failure
91
+ puts failure
92
+ puts parser.root.error_tree if parser.root.cause
93
+ exit
94
+ end
95
+
96
+ # Transform the result
97
+ pp transform.do(result)
98
+
99
+ # Thereby reducing it to the earlier problem:
100
+ # http://github.com/kschiess/toylisp
101
+
@@ -0,0 +1,42 @@
1
+ # A small example that demonstrates the power of tree pattern matching. Also
2
+ # uses '.as(:name)' to construct a tree that can reliably be matched
3
+ # afterwards.
4
+
5
+ $:.unshift '../lib'
6
+
7
+ require 'pp'
8
+ require 'parslet'
9
+
10
+ module LISP # as in 'lots of insipid and stupid parenthesis'
11
+ class Parser < Parslet::Parser
12
+ rule(:balanced) {
13
+ str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
14
+ }
15
+
16
+ root(:balanced)
17
+ end
18
+
19
+ class Transform < Parslet::Transform
20
+ rule(:l => '(', :m => simple(:x), :r => ')') {
21
+ # innermost :m will contain nil
22
+ x.nil? ? 1 : x+1
23
+ }
24
+ end
25
+ end
26
+
27
+ parser = LISP::Parser.new
28
+ transform = LISP::Transform.new
29
+ %w!
30
+ ()
31
+ (())
32
+ ((((()))))
33
+ ((())
34
+ !.each do |pexp|
35
+ begin
36
+ result = parser.parse(pexp)
37
+ puts "#{"%20s"%pexp}: #{result.inspect} (#{transform.apply(result)} parens)"
38
+ rescue Parslet::ParseFailed => m
39
+ puts "#{"%20s"%pexp}: #{m}"
40
+ end
41
+ puts
42
+ end
@@ -0,0 +1,36 @@
1
+ # The example from the readme. With this, I am making sure that the readme
2
+ # 'works'. Is this too messy?
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+ include Parslet
9
+
10
+ require 'parslet'
11
+ include Parslet
12
+
13
+ # Constructs a parser using a Parser Expression Grammar like DSL:
14
+ parser = str('"') >>
15
+ (
16
+ str('\\') >> any |
17
+ str('"').absnt? >> any
18
+ ).repeat.as(:string) >>
19
+ str('"')
20
+
21
+ # Parse the string and capture parts of the interpretation (:string above)
22
+ tree = parser.parse(%Q{
23
+ "This is a \\"String\\" in which you can escape stuff"
24
+ }.strip)
25
+
26
+ tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
27
+
28
+ # Here's how you can grab results from that tree:
29
+
30
+ # 1)
31
+ transform = Parslet::Transform.new do
32
+ rule(:string => simple(:x)) {
33
+ puts "String contents: #{x}" }
34
+ end
35
+ transform.apply(tree)
36
+
@@ -0,0 +1,45 @@
1
+ $:.unshift '../lib/'
2
+ require 'parslet'
3
+ require 'pp'
4
+
5
+ tree = {:bud => {:stem => []}}
6
+
7
+ class Spring < Parslet::Transform
8
+ rule(:stem => sequence(:branches)) {
9
+ {:stem => (branches + [{:branch => :leaf}])}
10
+ }
11
+ end
12
+ class Summer < Parslet::Transform
13
+ rule(:stem => subtree(:branches)) {
14
+ new_branches = branches.map { |b| {:branch => [:leaf, :flower]} }
15
+ {:stem => new_branches}
16
+ }
17
+ end
18
+ class Fall < Parslet::Transform
19
+ rule(:branch => sequence(:x)) {
20
+ x.each { |e| puts "Fruit!" if e==:flower }
21
+ x.each { |e| puts "Falling Leaves!" if e==:leaf }
22
+ {:branch => []}
23
+ }
24
+ end
25
+ class Winter < Parslet::Transform
26
+ rule(:stem => subtree(:x)) {
27
+ {:stem => []}
28
+ }
29
+ end
30
+
31
+ def do_seasons(tree)
32
+ [Spring, Summer, Fall, Winter].each do |season|
33
+ p "And when #{season} comes"
34
+ tree = season.new.apply(tree)
35
+ pp tree
36
+ puts
37
+ end
38
+ tree
39
+ end
40
+
41
+ # What marvel of life!
42
+ tree = do_seasons(tree)
43
+ tree = do_seasons(tree)
44
+
45
+
@@ -0,0 +1,3 @@
1
+ 123
2
+ 12345
3
+ " Some String with \"escapes\""
@@ -0,0 +1,57 @@
1
+ # A simple xml parser. It is simple in the respect as that it doesn't address
2
+ # any of the complexities of XML. This is ruby 1.9.
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ module XML
10
+ include Parslet
11
+
12
+ root :document
13
+
14
+ rule(:document) {
15
+ tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
16
+ text
17
+ }
18
+
19
+ # Perhaps we could have some syntax sugar to make this more easy?
20
+ #
21
+ def tag(opts={})
22
+ close = opts[:close] || false
23
+
24
+ parslet = str('<')
25
+ parslet = parslet >> str('/') if close
26
+ parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
27
+ parslet = parslet >> str('>')
28
+
29
+ parslet
30
+ end
31
+
32
+ rule(:text) {
33
+ match('[^<>]').repeat(0)
34
+ }
35
+ end
36
+
37
+ def check(xml)
38
+ include XML
39
+ r=parse(xml)
40
+
41
+ # We'll validate the tree by reducing valid pairs of tags into simply the
42
+ # string "verified". If the transformation ends on a string, then the
43
+ # document was 'valid'.
44
+ #
45
+ t = Parslet::Transform.new do
46
+ rule(
47
+ o: {name: simple(:tag)},
48
+ c: {name: simple(:tag)},
49
+ i: simple(:t)
50
+ ) { 'verified' }
51
+ end
52
+
53
+ t.apply(r)
54
+ end
55
+
56
+ pp check("<a><b>some text in the tags</b></a>")
57
+ pp check("<b><b>some text in the tags</b></a>")
@@ -0,0 +1,75 @@
1
+ # A more complex parser that illustrates how a compiler might be constructed.
2
+ # The parser recognizes strings and integer literals and constructs almost a
3
+ # useful AST from the file contents.
4
+
5
+ require 'pp'
6
+
7
+ $:.unshift '../lib/'
8
+ require 'parslet'
9
+
10
+ include Parslet
11
+
12
+ class LiteralsParser < Parslet::Parser
13
+ rule :space do
14
+ (match '[ ]').repeat(1)
15
+ end
16
+
17
+ rule :literals do
18
+ (literal >> eol).repeat
19
+ end
20
+
21
+ rule :literal do
22
+ (integer | string).as(:literal) >> space.maybe
23
+ end
24
+
25
+ rule :string do
26
+ str('"') >>
27
+ (
28
+ (str('\\') >> any) |
29
+ (str('"').absnt? >> any)
30
+ ).repeat.as(:string) >>
31
+ str('"')
32
+ end
33
+
34
+ rule :integer do
35
+ match('[0-9]').repeat(1).as(:integer)
36
+ end
37
+
38
+ rule :eol do
39
+ line_end.repeat(1)
40
+ end
41
+
42
+ rule :line_end do
43
+ crlf >> space.maybe
44
+ end
45
+
46
+ rule :crlf do
47
+ match('[\r\n]').repeat(1)
48
+ end
49
+
50
+ root :literals
51
+ end
52
+
53
+ parsetree = LiteralsParser.new.parse(
54
+ File.read('simple.lit'))
55
+
56
+ class Lit < Struct.new(:text)
57
+ def to_s
58
+ text.inspect
59
+ end
60
+ end
61
+ class StringLit < Lit
62
+ end
63
+ class IntLit < Lit
64
+ def to_s
65
+ text
66
+ end
67
+ end
68
+
69
+ transform = Parslet::Transform.new do
70
+ rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
71
+ rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
72
+ end
73
+
74
+ ast = transform.apply(parsetree)
75
+ pp ast
@@ -0,0 +1,4 @@
1
+ "THis is a string"
2
+ "This is another string"
3
+ "This string is escaped \"embedded quoted stuff \" "
4
+ 12 // an integer literal and a comment
@@ -66,7 +66,7 @@ module Parslet
66
66
  # puts parslet.error_tree
67
67
  # end
68
68
  #
69
- class ParseFailed < Exception
69
+ class ParseFailed < StandardError
70
70
  end
71
71
 
72
72
  module ClassMethods
@@ -30,10 +30,9 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
30
30
 
31
31
  def try(io) # :nodoc:
32
32
  alternatives.each { |a|
33
- begin
33
+ catch(:error) {
34
34
  return a.apply(io)
35
- rescue Parslet::ParseFailed => ex
36
- end
35
+ }
37
36
  }
38
37
  # If we reach this point, all alternatives have failed.
39
38
  error(io, "Expected one of #{alternatives.inspect}.")
@@ -13,7 +13,18 @@ class Parslet::Atoms::Base
13
13
  io = StringIO.new(io)
14
14
  end
15
15
 
16
- result = apply(io)
16
+ result = nil
17
+ error_message_or_success = catch(:error) {
18
+ result = apply(io)
19
+ :success
20
+ }
21
+
22
+ # If we didn't succeed the parse, raise an exception for the user.
23
+ # Stack trace will be off, but the error tree should explain the reason
24
+ # it failed.
25
+ if error_message_or_success != :success
26
+ raise Parslet::ParseFailed, error_message_or_success
27
+ end
17
28
 
18
29
  # If we haven't consumed the input, then the pattern doesn't match. Try
19
30
  # to provide a good error message (even asking down below)
@@ -22,30 +33,49 @@ class Parslet::Atoms::Base
22
33
  # error to fail with. Otherwise just report that we cannot consume the
23
34
  # input.
24
35
  if cause
25
- raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
36
+ # Don't garnish the real cause; but the exception is different anyway.
37
+ raise Parslet::ParseFailed,
38
+ "Unconsumed input, maybe because of this: #{cause}"
26
39
  else
27
- error(io, "Don't know what to do with #{io.string[io.pos,100]}")
40
+ parse_failed(
41
+ format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
28
42
  end
29
43
  end
30
44
 
31
45
  return flatten(result)
32
46
  end
33
47
 
48
+ #---
49
+ # Calls the #try method of this parslet. In case of a parse error, apply
50
+ # leaves the io in the state it was before the attempt.
51
+ #+++
34
52
  def apply(io) # :nodoc:
35
53
  # p [:start, self, io.string[io.pos, 10]]
36
54
 
37
55
  old_pos = io.pos
38
56
 
39
57
  # p [:try, self, io.string[io.pos, 20]]
40
- begin
58
+ message = catch(:error) {
41
59
  r = try(io)
42
60
  # p [:return_from, self, r, flatten(r)]
61
+
62
+ # This has just succeeded, so last_cause must be empty
43
63
  @last_cause = nil
44
64
  return r
45
- rescue Parslet::ParseFailed => ex
46
- # p [:failing, self, io.string[io.pos, 20]]
47
- io.pos = old_pos; raise ex
48
- end
65
+ }
66
+
67
+ # We only reach this point if the parse has failed. message is not nil.
68
+ # p [:failing, self, io.string[io.pos, 20]]
69
+
70
+ io.pos = old_pos
71
+ throw :error, message
72
+ end
73
+
74
+ # Override this in your Atoms::Base subclasses to implement parsing
75
+ # behaviour.
76
+ #
77
+ def try(io)
78
+ raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
49
79
  end
50
80
 
51
81
  # Construct a new atom that repeats the current atom min times at least and
@@ -126,6 +156,9 @@ class Parslet::Atoms::Base
126
156
  Parslet::Atoms::Named.new(self, name)
127
157
  end
128
158
 
159
+ # Takes a mixed value coming out of a parslet and converts it to a return
160
+ # value for the user by dropping things and merging hashes.
161
+ #
129
162
  def flatten(value) # :nodoc:
130
163
  # Passes through everything that isn't an array of things
131
164
  return value unless value.instance_of? Array
@@ -225,31 +258,35 @@ class Parslet::Atoms::Base
225
258
  # with #cause.
226
259
  #
227
260
  def error_tree
228
- Parslet::ErrorTree.new(self) if cause?
261
+ Parslet::ErrorTree.new(self)
229
262
  end
230
263
  def cause? # :nodoc:
231
264
  not @last_cause.nil?
232
265
  end
233
266
  private
267
+ # TODO comments!!!
234
268
  # Report/raise a parse error with the given message, printing the current
235
269
  # position as well. Appends 'at line X char Y.' to the message you give.
236
270
  # If +pos+ is given, it is used as the real position the error happened,
237
271
  # correcting the io's current position.
238
272
  #
239
273
  def error(io, str, pos=nil)
274
+ @last_cause = format_cause(io, str, pos)
275
+ throw :error, @last_cause
276
+ end
277
+ def parse_failed(str)
278
+ @last_cause = str
279
+ raise Parslet::ParseFailed,
280
+ @last_cause
281
+ end
282
+ def format_cause(io, str, pos=nil)
240
283
  pre = io.string[0..(pos||io.pos)]
241
284
  lines = Array(pre.lines)
242
285
 
243
- if lines.empty?
244
- formatted_cause = str
245
- else
246
- pos = lines.last.length
247
- formatted_cause = "#{str} at line #{lines.count} char #{pos}."
248
- end
249
-
250
- @last_cause = formatted_cause
251
-
252
- raise Parslet::ParseFailed, formatted_cause, nil
286
+ return str if lines.empty?
287
+
288
+ pos = lines.last.length
289
+ return "#{str} at line #{lines.count} char #{pos}."
253
290
  end
254
291
  def warn_about_duplicate_keys(h1, h2)
255
292
  d = h1.keys & h2.keys
@@ -16,27 +16,30 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
16
16
 
17
17
  def try(io) # :nodoc:
18
18
  pos = io.pos
19
- begin
19
+
20
+ failed = true
21
+ catch(:error) {
20
22
  bound_parslet.apply(io)
21
- rescue Parslet::ParseFailed
22
- return fail(io)
23
- ensure
24
- io.pos = pos
25
- end
26
- return success(io)
23
+ failed = false
24
+ }
25
+ return failed ? fail(io) : success(io)
26
+
27
+ ensure
28
+ io.pos = pos
27
29
  end
28
30
 
31
+ # TODO Both of these will produce results that could be reduced easily.
32
+ # Maybe do some shortcut reducing here?
29
33
  def fail(io) # :nodoc:
30
34
  if positive
31
35
  error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
32
36
  else
33
- # TODO: Squash this down to nothing? Return value handling here...
34
37
  return nil
35
38
  end
36
39
  end
37
40
  def success(io) # :nodoc:
38
41
  if positive
39
- return nil # see above, TODO
42
+ return nil
40
43
  else
41
44
  error(
42
45
  io,
@@ -8,16 +8,16 @@
8
8
  # match('\s') # like regexps: matches space characters
9
9
  #
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
- attr_reader :match
11
+ attr_reader :match, :re
12
12
  def initialize(match) # :nodoc:
13
13
  @match = match
14
+ @re = Regexp.new(match, Regexp::MULTILINE)
14
15
  end
15
16
 
16
17
  def try(io) # :nodoc:
17
- r = Regexp.new(match, Regexp::MULTILINE)
18
18
  s = io.read(1)
19
19
  error(io, "Premature end of input") unless s
20
- error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
20
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
21
21
  return s
22
22
  end
23
23
 
@@ -17,22 +17,21 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
17
17
  def try(io) # :nodoc:
18
18
  occ = 0
19
19
  result = [@tag] # initialize the result array with the tag (for flattening)
20
- loop do
21
- begin
22
- result << parslet.apply(io)
23
- occ += 1
24
-
25
- # If we're not greedy (max is defined), check if that has been
26
- # reached.
27
- return result if max && occ>=max
28
- rescue Parslet::ParseFailed => ex
29
- # Greedy matcher has produced a failure. Check if occ (which will
30
- # contain the number of sucesses) is in {min, max}.
31
- # p [:repetition, occ, min, max]
32
- error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
33
- return result
34
- end
35
- end
20
+ catch(:error) {
21
+ result << parslet.apply(io)
22
+ occ += 1
23
+
24
+ # If we're not greedy (max is defined), check if that has been
25
+ # reached.
26
+ return result if max && occ>=max
27
+ redo
28
+ }
29
+
30
+ # Greedy matcher has produced a failure. Check if occ (which will
31
+ # contain the number of sucesses) is in {min, max}.
32
+ # p [:repetition, occ, min, max]
33
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
34
+ return result
36
35
  end
37
36
 
38
37
  precedence REPETITION
@@ -16,12 +16,14 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
16
16
  end
17
17
 
18
18
  def try(io) # :nodoc:
19
- [:sequence]+parslets.map { |p|
20
- # Save each parslet as potentially offending (raising an error).
21
- @offending_parslet = p
22
- p.apply(io)
19
+ catch(:error) {
20
+ return [:sequence]+parslets.map { |p|
21
+ # Save each parslet as potentially offending (raising an error).
22
+ @offending_parslet = p
23
+ p.apply(io)
24
+ }
23
25
  }
24
- rescue Parslet::ParseFailed
26
+
25
27
  error(io, "Failed to match sequence (#{self.inspect})")
26
28
  end
27
29
 
@@ -99,7 +99,8 @@ class Parslet::Pattern
99
99
  end
100
100
 
101
101
  def element_match_hash(tree, exp, bindings)
102
- # p [:emh, tree, exp, bindings]
102
+ # Early failure when not all of the hash keys are matched.
103
+ return false unless exp.keys == tree.keys
103
104
 
104
105
  # We iterate over expected pattern, since we demand that the keys that
105
106
  # are there should be in tree as well.
@@ -0,0 +1,24 @@
1
+ RSpec::Matchers.define(:parse) do |input|
2
+ chain(:as) { |as| @as = as }
3
+
4
+ match do |parser|
5
+ begin
6
+ @result = parser.parse(input)
7
+ @as == @result or @as.nil?
8
+ rescue Parslet::ParseFailed
9
+ false
10
+ end
11
+ end
12
+
13
+ failure_message_for_should do |is|
14
+ "expected " << (@result ?
15
+ "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
+ "expected #{is.inspect} to be able to parse #{input.inspect}")
17
+ end
18
+
19
+ failure_message_for_should_not do |is|
20
+ "expected " << (@as ?
21
+ "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
+ "expected #{is.inspect} to be able to parse #{input.inspect}")
23
+ end
24
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 0
9
- version: 1.0.0
8
+ - 1
9
+ version: 1.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kaspar Schiess
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-29 00:00:00 +01:00
17
+ date: 2011-01-17 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -90,8 +90,21 @@ files:
90
90
  - lib/parslet/pattern/binding.rb
91
91
  - lib/parslet/pattern/context.rb
92
92
  - lib/parslet/pattern.rb
93
+ - lib/parslet/rig/rspec.rb
93
94
  - lib/parslet/transform.rb
94
95
  - lib/parslet.rb
96
+ - example/documentation.rb
97
+ - example/email_parser.rb
98
+ - example/empty.rb
99
+ - example/ip_address.rb
100
+ - example/minilisp.rb
101
+ - example/parens.rb
102
+ - example/readme.rb
103
+ - example/seasons.rb
104
+ - example/simple.lit
105
+ - example/simple_xml.rb
106
+ - example/string_parser.rb
107
+ - example/test.lit
95
108
  has_rdoc: true
96
109
  homepage: http://kschiess.github.com/parslet
97
110
  licenses: []