parslet 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,17 @@
1
+ = 1.1.0 / ???
2
+
3
+ + Uses throw/catch internally for an order of magnitude increase in execution
4
+ speed.
5
+
6
+ = 1.0.1 / 17Jan2011
7
+
8
+ A happy new year!
9
+
10
+ ! FIX: Parslet::Transform was wrongly fixed earlier - it now wont mangle
11
+ hashes anymore. (Blake Sweeney)
12
+
13
+ + parslet/rig/rspec.rb contains useful rspec matchers. (R. Konstantin Haase)
14
+
1
15
  = 1.0.0 / 29Dez2010
2
16
 
3
17
  - #each_match was removed. There was some duplication of code that even
data/README CHANGED
@@ -47,8 +47,6 @@ This library should work with both ruby 1.8 and ruby 1.9.
47
47
 
48
48
  STATUS
49
49
 
50
- 0.12.0
51
-
52
- On the road to 1.0; improving documentation, trying to ease access to the API.
50
+ one dot oh.
53
51
 
54
52
  (c) 2010 Kaspar Schiess
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ spec = Gem::Specification.new do |s|
18
18
 
19
19
  # Change these as appropriate
20
20
  s.name = "parslet"
21
- s.version = "1.0.0"
21
+ s.version = "1.0.1"
22
22
  s.summary = "Parser construction library with great error reporting in Ruby."
23
23
  s.author = "Kaspar Schiess"
24
24
  s.email = "kaspar.schiess@absurd.li"
@@ -29,7 +29,7 @@ spec = Gem::Specification.new do |s|
29
29
  s.rdoc_options = %w(--main README)
30
30
 
31
31
  # Add any extra files to include in the gem
32
- s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{spec,lib/**/*}")
32
+ s.files = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
33
33
  s.require_paths = ["lib"]
34
34
 
35
35
  # If you want to depend on other gems, add them here, along with any
@@ -0,0 +1,18 @@
1
+ # A small example that shows a really small parser and what happens on parser
2
+ # errors.
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ class MyParser < Parslet::Parser
10
+ rule(:a) { str('a').repeat }
11
+
12
+ def parse(str)
13
+ a.parse(str)
14
+ end
15
+ end
16
+
17
+ pp MyParser.new.parse('aaaa')
18
+ pp MyParser.new.parse('bbbb')
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Example contributed by Hal Brodigan (postmodern). Thanks!
4
+
5
+ require 'parslet'
6
+
7
+ class EmailParser < Parslet::Parser
8
+ rule(:space) { match('\s').repeat(1) }
9
+ rule(:space?) { space.maybe }
10
+ rule(:dash?) { match['_-'].maybe }
11
+
12
+ rule(:at) {
13
+ str('@') |
14
+ (dash? >> (str('at') | str('AT')) >> dash?)
15
+ }
16
+ rule(:dot) {
17
+ str('.') |
18
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
19
+ }
20
+
21
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
22
+ rule(:separator) { space? >> dot.as(:dot) >> space? | space }
23
+ rule(:words) { word >> (separator >> word).repeat }
24
+
25
+ rule(:email) {
26
+ (words >> space? >> at.as(:at) >> space? >> words).as(:email)
27
+ }
28
+
29
+ root(:email)
30
+ end
31
+
32
+ class EmailSanitizer < Parslet::Transform
33
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
34
+ rule(:at => simple(:at)) { '@' }
35
+ rule(:word => simple(:word)) { word }
36
+ rule(:email => sequence(:email)) { email.join }
37
+ end
38
+
39
+ parser = EmailParser.new
40
+ sanitizer = EmailSanitizer.new
41
+
42
+ unless ARGV[0]
43
+ STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
44
+ exit -1
45
+ end
46
+
47
+ begin
48
+ puts sanitizer.apply(parser.parse(ARGV[0]))
49
+ rescue Parslet::ParseFailed => error
50
+ puts error
51
+ puts parser.root.error_tree
52
+ end
@@ -0,0 +1,13 @@
1
+ # Basically just demonstrates that you can leave rules empty and get a nice
2
+ # NotImplementedError. A way to quickly spec out your parser rules?
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'parslet'
7
+
8
+ class Parser < Parslet::Parser
9
+ rule(:empty) { }
10
+ end
11
+
12
+
13
+ Parser.new.empty.parslet
@@ -0,0 +1,125 @@
1
+ # This example is heavily inspired by citrus' ip.citrus. Have a look at both
2
+ # of these to get some choice!
3
+
4
+ # The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
5
+ # Uniform Resource Identifier (URI): Generic Syntax.
6
+ #
7
+ # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
+
9
+ $:.unshift '../lib'
10
+
11
+ require 'pp'
12
+ require 'parslet'
13
+
14
+ module IPv4
15
+ include Parslet
16
+
17
+ # A host identified by an IPv4 literal address is represented in
18
+ # dotted-decimal notation (a sequence of four decimal numbers in the range 0
19
+ # to 255, separated by "."), as described in [RFC1123] by reference to
20
+ # [RFC0952]. Note that other forms of dotted notation may be interpreted on
21
+ # some platforms, as described in Section 7.4, but only the dotted-decimal
22
+ # form of four octets is allowed by this grammar.
23
+ rule(:ipv4) {
24
+ (dec_octet >> str('.') >> dec_octet >> str('.') >>
25
+ dec_octet >> str('.') >> dec_octet).as(:ipv4)
26
+ }
27
+
28
+ rule(:dec_octet) {
29
+ str('25') >> match("[0-5]") |
30
+ str('2') >> match("[0-4]") >> digit |
31
+ str('1') >> digit >> digit |
32
+ match('[1-9]') >> digit |
33
+ digit
34
+ }
35
+
36
+ rule(:digit) {
37
+ match('[0-9]')
38
+ }
39
+ end
40
+
41
+ # Must be used in concert with IPv4
42
+ module IPv6
43
+ include Parslet
44
+
45
+ rule(:colon) { str(':') }
46
+ rule(:dcolon) { colon >> colon }
47
+
48
+ # h16 :
49
+ def h16r(times)
50
+ (h16 >> colon).repeat(times, times)
51
+ end
52
+
53
+ # : h16
54
+ def h16l(times)
55
+ (colon >> h16).repeat(0,times)
56
+ end
57
+
58
+ # A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
59
+ # represented numerically in case-insensitive hexadecimal, using one to four
60
+ # hexadecimal digits (leading zeroes are permitted). The eight encoded
61
+ # pieces are given most-significant first, separated by colon characters.
62
+ # Optionally, the least-significant two pieces may instead be represented in
63
+ # IPv4 address textual format. A sequence of one or more consecutive
64
+ # zero-valued 16-bit pieces within the address may be elided, omitting all
65
+ # their digits and leaving exactly two consecutive colons in their place to
66
+ # mark the elision.
67
+ rule(:ipv6) {
68
+ (
69
+ (
70
+ h16r(6) |
71
+ dcolon >> h16r(5) |
72
+ h16.maybe >> dcolon >> h16r(4) |
73
+ (h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
74
+ (h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
75
+ (h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
76
+ (h16 >> h16l(4)).maybe >> dcolon
77
+ ) >> ls32 |
78
+ (h16 >> h16l(5)).maybe >> dcolon >> h16 |
79
+ (h16 >> h16l(6)).maybe >> dcolon
80
+ ).as(:ipv6)
81
+ }
82
+
83
+ rule(:h16) {
84
+ hexdigit.repeat(1,4)
85
+ }
86
+
87
+ rule(:ls32) {
88
+ (h16 >> colon >> h16) |
89
+ ipv4
90
+ }
91
+
92
+ rule(:hexdigit) {
93
+ digit | match("[a-fA-F]")
94
+ }
95
+ end
96
+
97
+ class Parser
98
+ include IPv4
99
+ include IPv6
100
+
101
+ def parse(str)
102
+ (ipv4 | ipv6).parse(str)
103
+ end
104
+ end
105
+
106
+ %W(
107
+ 0.0.0.0
108
+ 255.255.255.255
109
+ 255.255.255
110
+ 1:2:3:4:5:6:7:8
111
+ 12AD:34FC:A453:1922::
112
+ 12AD::34FC
113
+ 12AD::
114
+ ::
115
+ 1:2
116
+ ).each do |address|
117
+ parser = Parser.new
118
+ printf "%30s -> ", address
119
+ begin
120
+ result = parser.parse(address)
121
+ puts result.inspect
122
+ rescue Parslet::ParseFailed => m
123
+ puts "Failed: #{m}"
124
+ end
125
+ end
@@ -0,0 +1,101 @@
1
+ # Reproduces [1] using parslet.
2
+ # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ module MiniLisp
10
+ class Parser < Parslet::Parser
11
+ root :expression
12
+ rule(:expression) {
13
+ space? >> str('(') >> space? >> body >> str(')')
14
+ }
15
+
16
+ rule(:body) {
17
+ (expression | identifier | float | integer | string).repeat.as(:exp)
18
+ }
19
+
20
+ rule(:space) {
21
+ match('\s').repeat(1)
22
+ }
23
+ rule(:space?) {
24
+ space.maybe
25
+ }
26
+
27
+ rule(:identifier) {
28
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
29
+ }
30
+
31
+ rule(:float) {
32
+ (
33
+ integer >> (
34
+ str('.') >> match('[0-9]').repeat(1) |
35
+ str('e') >> match('[0-9]').repeat(1)
36
+ ).as(:e)
37
+ ).as(:float) >> space?
38
+ }
39
+
40
+ rule(:integer) {
41
+ ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
42
+ }
43
+
44
+ rule(:string) {
45
+ str('"') >> (
46
+ str('\\') >> any |
47
+ str('"').absnt? >> any
48
+ ).repeat.as(:string) >> str('"') >> space?
49
+ }
50
+ end
51
+
52
+ class Transform
53
+ include Parslet
54
+
55
+ attr_reader :t
56
+ def initialize
57
+ @t = Parslet::Transform.new
58
+
59
+ # To understand these, take a look at what comes out of the parser.
60
+ t.rule(:identifier => simple(:ident)) { ident.to_sym }
61
+
62
+ t.rule(:string => simple(:str)) { str }
63
+
64
+ t.rule(:integer => simple(:int)) { Integer(int) }
65
+
66
+ t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
67
+
68
+ t.rule(:exp => subtree(:exp)) { exp }
69
+ end
70
+
71
+ def do(tree)
72
+ t.apply(tree)
73
+ end
74
+ end
75
+ end
76
+
77
+ parser = MiniLisp::Parser.new
78
+ transform = MiniLisp::Transform.new
79
+
80
+ # Parse stage
81
+ begin
82
+ result = parser.parse %Q{
83
+ (define test (lambda ()
84
+ (begin
85
+ (display "something")
86
+ (display 1)
87
+ (display 3.08))))
88
+ (test)
89
+ }
90
+ rescue Parslet::ParseFailed => failure
91
+ puts failure
92
+ puts parser.root.error_tree if parser.root.cause
93
+ exit
94
+ end
95
+
96
+ # Transform the result
97
+ pp transform.do(result)
98
+
99
+ # Thereby reducing it to the earlier problem:
100
+ # http://github.com/kschiess/toylisp
101
+
@@ -0,0 +1,42 @@
1
+ # A small example that demonstrates the power of tree pattern matching. Also
2
+ # uses '.as(:name)' to construct a tree that can reliably be matched
3
+ # afterwards.
4
+
5
+ $:.unshift '../lib'
6
+
7
+ require 'pp'
8
+ require 'parslet'
9
+
10
+ module LISP # as in 'lots of insipid and stupid parenthesis'
11
+ class Parser < Parslet::Parser
12
+ rule(:balanced) {
13
+ str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
14
+ }
15
+
16
+ root(:balanced)
17
+ end
18
+
19
+ class Transform < Parslet::Transform
20
+ rule(:l => '(', :m => simple(:x), :r => ')') {
21
+ # innermost :m will contain nil
22
+ x.nil? ? 1 : x+1
23
+ }
24
+ end
25
+ end
26
+
27
+ parser = LISP::Parser.new
28
+ transform = LISP::Transform.new
29
+ %w!
30
+ ()
31
+ (())
32
+ ((((()))))
33
+ ((())
34
+ !.each do |pexp|
35
+ begin
36
+ result = parser.parse(pexp)
37
+ puts "#{"%20s"%pexp}: #{result.inspect} (#{transform.apply(result)} parens)"
38
+ rescue Parslet::ParseFailed => m
39
+ puts "#{"%20s"%pexp}: #{m}"
40
+ end
41
+ puts
42
+ end
@@ -0,0 +1,36 @@
1
+ # The example from the readme. With this, I am making sure that the readme
2
+ # 'works'. Is this too messy?
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+ include Parslet
9
+
10
+ require 'parslet'
11
+ include Parslet
12
+
13
+ # Constructs a parser using a Parser Expression Grammar like DSL:
14
+ parser = str('"') >>
15
+ (
16
+ str('\\') >> any |
17
+ str('"').absnt? >> any
18
+ ).repeat.as(:string) >>
19
+ str('"')
20
+
21
+ # Parse the string and capture parts of the interpretation (:string above)
22
+ tree = parser.parse(%Q{
23
+ "This is a \\"String\\" in which you can escape stuff"
24
+ }.strip)
25
+
26
+ tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
27
+
28
+ # Here's how you can grab results from that tree:
29
+
30
+ # 1)
31
+ transform = Parslet::Transform.new do
32
+ rule(:string => simple(:x)) {
33
+ puts "String contents: #{x}" }
34
+ end
35
+ transform.apply(tree)
36
+
@@ -0,0 +1,45 @@
1
+ $:.unshift '../lib/'
2
+ require 'parslet'
3
+ require 'pp'
4
+
5
+ tree = {:bud => {:stem => []}}
6
+
7
+ class Spring < Parslet::Transform
8
+ rule(:stem => sequence(:branches)) {
9
+ {:stem => (branches + [{:branch => :leaf}])}
10
+ }
11
+ end
12
+ class Summer < Parslet::Transform
13
+ rule(:stem => subtree(:branches)) {
14
+ new_branches = branches.map { |b| {:branch => [:leaf, :flower]} }
15
+ {:stem => new_branches}
16
+ }
17
+ end
18
+ class Fall < Parslet::Transform
19
+ rule(:branch => sequence(:x)) {
20
+ x.each { |e| puts "Fruit!" if e==:flower }
21
+ x.each { |e| puts "Falling Leaves!" if e==:leaf }
22
+ {:branch => []}
23
+ }
24
+ end
25
+ class Winter < Parslet::Transform
26
+ rule(:stem => subtree(:x)) {
27
+ {:stem => []}
28
+ }
29
+ end
30
+
31
+ def do_seasons(tree)
32
+ [Spring, Summer, Fall, Winter].each do |season|
33
+ p "And when #{season} comes"
34
+ tree = season.new.apply(tree)
35
+ pp tree
36
+ puts
37
+ end
38
+ tree
39
+ end
40
+
41
+ # What marvel of life!
42
+ tree = do_seasons(tree)
43
+ tree = do_seasons(tree)
44
+
45
+
@@ -0,0 +1,3 @@
1
+ 123
2
+ 12345
3
+ " Some String with \"escapes\""
@@ -0,0 +1,57 @@
1
+ # A simple xml parser. It is simple in the respect as that it doesn't address
2
+ # any of the complexities of XML. This is ruby 1.9.
3
+
4
+ $:.unshift '../lib'
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ module XML
10
+ include Parslet
11
+
12
+ root :document
13
+
14
+ rule(:document) {
15
+ tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) |
16
+ text
17
+ }
18
+
19
+ # Perhaps we could have some syntax sugar to make this more easy?
20
+ #
21
+ def tag(opts={})
22
+ close = opts[:close] || false
23
+
24
+ parslet = str('<')
25
+ parslet = parslet >> str('/') if close
26
+ parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
27
+ parslet = parslet >> str('>')
28
+
29
+ parslet
30
+ end
31
+
32
+ rule(:text) {
33
+ match('[^<>]').repeat(0)
34
+ }
35
+ end
36
+
37
+ def check(xml)
38
+ include XML
39
+ r=parse(xml)
40
+
41
+ # We'll validate the tree by reducing valid pairs of tags into simply the
42
+ # string "verified". If the transformation ends on a string, then the
43
+ # document was 'valid'.
44
+ #
45
+ t = Parslet::Transform.new do
46
+ rule(
47
+ o: {name: simple(:tag)},
48
+ c: {name: simple(:tag)},
49
+ i: simple(:t)
50
+ ) { 'verified' }
51
+ end
52
+
53
+ t.apply(r)
54
+ end
55
+
56
+ pp check("<a><b>some text in the tags</b></a>")
57
+ pp check("<b><b>some text in the tags</b></a>")
@@ -0,0 +1,75 @@
1
+ # A more complex parser that illustrates how a compiler might be constructed.
2
+ # The parser recognizes strings and integer literals and constructs almost a
3
+ # useful AST from the file contents.
4
+
5
+ require 'pp'
6
+
7
+ $:.unshift '../lib/'
8
+ require 'parslet'
9
+
10
+ include Parslet
11
+
12
+ class LiteralsParser < Parslet::Parser
13
+ rule :space do
14
+ (match '[ ]').repeat(1)
15
+ end
16
+
17
+ rule :literals do
18
+ (literal >> eol).repeat
19
+ end
20
+
21
+ rule :literal do
22
+ (integer | string).as(:literal) >> space.maybe
23
+ end
24
+
25
+ rule :string do
26
+ str('"') >>
27
+ (
28
+ (str('\\') >> any) |
29
+ (str('"').absnt? >> any)
30
+ ).repeat.as(:string) >>
31
+ str('"')
32
+ end
33
+
34
+ rule :integer do
35
+ match('[0-9]').repeat(1).as(:integer)
36
+ end
37
+
38
+ rule :eol do
39
+ line_end.repeat(1)
40
+ end
41
+
42
+ rule :line_end do
43
+ crlf >> space.maybe
44
+ end
45
+
46
+ rule :crlf do
47
+ match('[\r\n]').repeat(1)
48
+ end
49
+
50
+ root :literals
51
+ end
52
+
53
+ parsetree = LiteralsParser.new.parse(
54
+ File.read('simple.lit'))
55
+
56
+ class Lit < Struct.new(:text)
57
+ def to_s
58
+ text.inspect
59
+ end
60
+ end
61
+ class StringLit < Lit
62
+ end
63
+ class IntLit < Lit
64
+ def to_s
65
+ text
66
+ end
67
+ end
68
+
69
+ transform = Parslet::Transform.new do
70
+ rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) }
71
+ rule(:literal => {:string => simple(:s)}) { StringLit.new(s) }
72
+ end
73
+
74
+ ast = transform.apply(parsetree)
75
+ pp ast
@@ -0,0 +1,4 @@
1
+ "THis is a string"
2
+ "This is another string"
3
+ "This string is escaped \"embedded quoted stuff \" "
4
+ 12 // an integer literal and a comment
@@ -66,7 +66,7 @@ module Parslet
66
66
  # puts parslet.error_tree
67
67
  # end
68
68
  #
69
- class ParseFailed < Exception
69
+ class ParseFailed < StandardError
70
70
  end
71
71
 
72
72
  module ClassMethods
@@ -30,10 +30,9 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
30
30
 
31
31
  def try(io) # :nodoc:
32
32
  alternatives.each { |a|
33
- begin
33
+ catch(:error) {
34
34
  return a.apply(io)
35
- rescue Parslet::ParseFailed => ex
36
- end
35
+ }
37
36
  }
38
37
  # If we reach this point, all alternatives have failed.
39
38
  error(io, "Expected one of #{alternatives.inspect}.")
@@ -13,7 +13,18 @@ class Parslet::Atoms::Base
13
13
  io = StringIO.new(io)
14
14
  end
15
15
 
16
- result = apply(io)
16
+ result = nil
17
+ error_message_or_success = catch(:error) {
18
+ result = apply(io)
19
+ :success
20
+ }
21
+
22
+ # If we didn't succeed the parse, raise an exception for the user.
23
+ # Stack trace will be off, but the error tree should explain the reason
24
+ # it failed.
25
+ if error_message_or_success != :success
26
+ raise Parslet::ParseFailed, error_message_or_success
27
+ end
17
28
 
18
29
  # If we haven't consumed the input, then the pattern doesn't match. Try
19
30
  # to provide a good error message (even asking down below)
@@ -22,30 +33,49 @@ class Parslet::Atoms::Base
22
33
  # error to fail with. Otherwise just report that we cannot consume the
23
34
  # input.
24
35
  if cause
25
- raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
36
+ # Don't garnish the real cause; but the exception is different anyway.
37
+ raise Parslet::ParseFailed,
38
+ "Unconsumed input, maybe because of this: #{cause}"
26
39
  else
27
- error(io, "Don't know what to do with #{io.string[io.pos,100]}")
40
+ parse_failed(
41
+ format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
28
42
  end
29
43
  end
30
44
 
31
45
  return flatten(result)
32
46
  end
33
47
 
48
+ #---
49
+ # Calls the #try method of this parslet. In case of a parse error, apply
50
+ # leaves the io in the state it was before the attempt.
51
+ #+++
34
52
  def apply(io) # :nodoc:
35
53
  # p [:start, self, io.string[io.pos, 10]]
36
54
 
37
55
  old_pos = io.pos
38
56
 
39
57
  # p [:try, self, io.string[io.pos, 20]]
40
- begin
58
+ message = catch(:error) {
41
59
  r = try(io)
42
60
  # p [:return_from, self, r, flatten(r)]
61
+
62
+ # This has just succeeded, so last_cause must be empty
43
63
  @last_cause = nil
44
64
  return r
45
- rescue Parslet::ParseFailed => ex
46
- # p [:failing, self, io.string[io.pos, 20]]
47
- io.pos = old_pos; raise ex
48
- end
65
+ }
66
+
67
+ # We only reach this point if the parse has failed. message is not nil.
68
+ # p [:failing, self, io.string[io.pos, 20]]
69
+
70
+ io.pos = old_pos
71
+ throw :error, message
72
+ end
73
+
74
+ # Override this in your Atoms::Base subclasses to implement parsing
75
+ # behaviour.
76
+ #
77
+ def try(io)
78
+ raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
49
79
  end
50
80
 
51
81
  # Construct a new atom that repeats the current atom min times at least and
@@ -126,6 +156,9 @@ class Parslet::Atoms::Base
126
156
  Parslet::Atoms::Named.new(self, name)
127
157
  end
128
158
 
159
+ # Takes a mixed value coming out of a parslet and converts it to a return
160
+ # value for the user by dropping things and merging hashes.
161
+ #
129
162
  def flatten(value) # :nodoc:
130
163
  # Passes through everything that isn't an array of things
131
164
  return value unless value.instance_of? Array
@@ -225,31 +258,35 @@ class Parslet::Atoms::Base
225
258
  # with #cause.
226
259
  #
227
260
  def error_tree
228
- Parslet::ErrorTree.new(self) if cause?
261
+ Parslet::ErrorTree.new(self)
229
262
  end
230
263
  def cause? # :nodoc:
231
264
  not @last_cause.nil?
232
265
  end
233
266
  private
267
+ # TODO comments!!!
234
268
  # Report/raise a parse error with the given message, printing the current
235
269
  # position as well. Appends 'at line X char Y.' to the message you give.
236
270
  # If +pos+ is given, it is used as the real position the error happened,
237
271
  # correcting the io's current position.
238
272
  #
239
273
  def error(io, str, pos=nil)
274
+ @last_cause = format_cause(io, str, pos)
275
+ throw :error, @last_cause
276
+ end
277
+ def parse_failed(str)
278
+ @last_cause = str
279
+ raise Parslet::ParseFailed,
280
+ @last_cause
281
+ end
282
+ def format_cause(io, str, pos=nil)
240
283
  pre = io.string[0..(pos||io.pos)]
241
284
  lines = Array(pre.lines)
242
285
 
243
- if lines.empty?
244
- formatted_cause = str
245
- else
246
- pos = lines.last.length
247
- formatted_cause = "#{str} at line #{lines.count} char #{pos}."
248
- end
249
-
250
- @last_cause = formatted_cause
251
-
252
- raise Parslet::ParseFailed, formatted_cause, nil
286
+ return str if lines.empty?
287
+
288
+ pos = lines.last.length
289
+ return "#{str} at line #{lines.count} char #{pos}."
253
290
  end
254
291
  def warn_about_duplicate_keys(h1, h2)
255
292
  d = h1.keys & h2.keys
@@ -16,27 +16,30 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
16
16
 
17
17
  def try(io) # :nodoc:
18
18
  pos = io.pos
19
- begin
19
+
20
+ failed = true
21
+ catch(:error) {
20
22
  bound_parslet.apply(io)
21
- rescue Parslet::ParseFailed
22
- return fail(io)
23
- ensure
24
- io.pos = pos
25
- end
26
- return success(io)
23
+ failed = false
24
+ }
25
+ return failed ? fail(io) : success(io)
26
+
27
+ ensure
28
+ io.pos = pos
27
29
  end
28
30
 
31
+ # TODO Both of these will produce results that could be reduced easily.
32
+ # Maybe do some shortcut reducing here?
29
33
  def fail(io) # :nodoc:
30
34
  if positive
31
35
  error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
32
36
  else
33
- # TODO: Squash this down to nothing? Return value handling here...
34
37
  return nil
35
38
  end
36
39
  end
37
40
  def success(io) # :nodoc:
38
41
  if positive
39
- return nil # see above, TODO
42
+ return nil
40
43
  else
41
44
  error(
42
45
  io,
@@ -8,16 +8,16 @@
8
8
  # match('\s') # like regexps: matches space characters
9
9
  #
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
- attr_reader :match
11
+ attr_reader :match, :re
12
12
  def initialize(match) # :nodoc:
13
13
  @match = match
14
+ @re = Regexp.new(match, Regexp::MULTILINE)
14
15
  end
15
16
 
16
17
  def try(io) # :nodoc:
17
- r = Regexp.new(match, Regexp::MULTILINE)
18
18
  s = io.read(1)
19
19
  error(io, "Premature end of input") unless s
20
- error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
20
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
21
21
  return s
22
22
  end
23
23
 
@@ -17,22 +17,21 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
17
17
  def try(io) # :nodoc:
18
18
  occ = 0
19
19
  result = [@tag] # initialize the result array with the tag (for flattening)
20
- loop do
21
- begin
22
- result << parslet.apply(io)
23
- occ += 1
24
-
25
- # If we're not greedy (max is defined), check if that has been
26
- # reached.
27
- return result if max && occ>=max
28
- rescue Parslet::ParseFailed => ex
29
- # Greedy matcher has produced a failure. Check if occ (which will
30
- # contain the number of sucesses) is in {min, max}.
31
- # p [:repetition, occ, min, max]
32
- error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
33
- return result
34
- end
35
- end
20
+ catch(:error) {
21
+ result << parslet.apply(io)
22
+ occ += 1
23
+
24
+ # If we're not greedy (max is defined), check if that has been
25
+ # reached.
26
+ return result if max && occ>=max
27
+ redo
28
+ }
29
+
30
+ # Greedy matcher has produced a failure. Check if occ (which will
31
+ # contain the number of sucesses) is in {min, max}.
32
+ # p [:repetition, occ, min, max]
33
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
34
+ return result
36
35
  end
37
36
 
38
37
  precedence REPETITION
@@ -16,12 +16,14 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
16
16
  end
17
17
 
18
18
  def try(io) # :nodoc:
19
- [:sequence]+parslets.map { |p|
20
- # Save each parslet as potentially offending (raising an error).
21
- @offending_parslet = p
22
- p.apply(io)
19
+ catch(:error) {
20
+ return [:sequence]+parslets.map { |p|
21
+ # Save each parslet as potentially offending (raising an error).
22
+ @offending_parslet = p
23
+ p.apply(io)
24
+ }
23
25
  }
24
- rescue Parslet::ParseFailed
26
+
25
27
  error(io, "Failed to match sequence (#{self.inspect})")
26
28
  end
27
29
 
@@ -99,7 +99,8 @@ class Parslet::Pattern
99
99
  end
100
100
 
101
101
  def element_match_hash(tree, exp, bindings)
102
- # p [:emh, tree, exp, bindings]
102
+ # Early failure when not all of the hash keys are matched.
103
+ return false unless exp.keys == tree.keys
103
104
 
104
105
  # We iterate over expected pattern, since we demand that the keys that
105
106
  # are there should be in tree as well.
@@ -0,0 +1,24 @@
1
+ RSpec::Matchers.define(:parse) do |input|
2
+ chain(:as) { |as| @as = as }
3
+
4
+ match do |parser|
5
+ begin
6
+ @result = parser.parse(input)
7
+ @as == @result or @as.nil?
8
+ rescue Parslet::ParseFailed
9
+ false
10
+ end
11
+ end
12
+
13
+ failure_message_for_should do |is|
14
+ "expected " << (@result ?
15
+ "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
+ "expected #{is.inspect} to be able to parse #{input.inspect}")
17
+ end
18
+
19
+ failure_message_for_should_not do |is|
20
+ "expected " << (@as ?
21
+ "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
+ "expected #{is.inspect} to be able to parse #{input.inspect}")
23
+ end
24
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 0
9
- version: 1.0.0
8
+ - 1
9
+ version: 1.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - Kaspar Schiess
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-29 00:00:00 +01:00
17
+ date: 2011-01-17 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -90,8 +90,21 @@ files:
90
90
  - lib/parslet/pattern/binding.rb
91
91
  - lib/parslet/pattern/context.rb
92
92
  - lib/parslet/pattern.rb
93
+ - lib/parslet/rig/rspec.rb
93
94
  - lib/parslet/transform.rb
94
95
  - lib/parslet.rb
96
+ - example/documentation.rb
97
+ - example/email_parser.rb
98
+ - example/empty.rb
99
+ - example/ip_address.rb
100
+ - example/minilisp.rb
101
+ - example/parens.rb
102
+ - example/readme.rb
103
+ - example/seasons.rb
104
+ - example/simple.lit
105
+ - example/simple_xml.rb
106
+ - example/string_parser.rb
107
+ - example/test.lit
95
108
  has_rdoc: true
96
109
  homepage: http://kschiess.github.com/parslet
97
110
  licenses: []