parslet 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
data/example/seasons.rb CHANGED
@@ -1,4 +1,5 @@
1
- $:.unshift '../lib/'
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
2
3
  require 'parslet'
3
4
  require 'pp'
4
5
 
@@ -1,14 +1,12 @@
1
1
  # A simple xml parser. It is simple in the respect as that it doesn't address
2
2
  # any of the complexities of XML. This is ruby 1.9.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
8
 
9
- module XML
10
- include Parslet
11
-
9
+ class XML < Parslet::Parser
12
10
  root :document
13
11
 
14
12
  rule(:document) {
@@ -23,7 +21,7 @@ module XML
23
21
 
24
22
  parslet = str('<')
25
23
  parslet = parslet >> str('/') if close
26
- parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
24
+ parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
27
25
  parslet = parslet >> str('>')
28
26
 
29
27
  parslet
@@ -35,8 +33,7 @@ module XML
35
33
  end
36
34
 
37
35
  def check(xml)
38
- include XML
39
- r=parse(xml)
36
+ r = XML.new.parse(xml)
40
37
 
41
38
  # We'll validate the tree by reducing valid pairs of tags into simply the
42
39
  # string "verified". If the transformation ends on a string, then the
@@ -54,4 +51,4 @@ def check(xml)
54
51
  end
55
52
 
56
53
  pp check("<a><b>some text in the tags</b></a>")
57
- pp check("<b><b>some text in the tags</b></a>")
54
+ pp check("<b><b>some text in the tags</b></a>")
@@ -4,7 +4,7 @@
4
4
 
5
5
  require 'pp'
6
6
 
7
- $:.unshift '../lib/'
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
8
  require 'parslet'
9
9
 
10
10
  include Parslet
@@ -26,7 +26,7 @@ class LiteralsParser < Parslet::Parser
26
26
  str('"') >>
27
27
  (
28
28
  (str('\\') >> any) |
29
- (str('"').absnt? >> any)
29
+ (str('"').absent? >> any)
30
30
  ).repeat.as(:string) >>
31
31
  str('"')
32
32
  end
@@ -50,8 +50,10 @@ class LiteralsParser < Parslet::Parser
50
50
  root :literals
51
51
  end
52
52
 
53
- parsetree = LiteralsParser.new.parse(
54
- File.read('simple.lit'))
53
+ input_name = File.join(File.dirname(__FILE__), 'simple.lit')
54
+ file = File.read(input_name)
55
+
56
+ parsetree = LiteralsParser.new.parse(file)
55
57
 
56
58
  class Lit < Struct.new(:text)
57
59
  def to_s
@@ -72,4 +74,4 @@ transform = Parslet::Transform.new do
72
74
  end
73
75
 
74
76
  ast = transform.apply(parsetree)
75
- pp ast
77
+ pp ast
data/lib/parslet.rb CHANGED
@@ -7,7 +7,7 @@
7
7
  # root(:a)
8
8
  # end
9
9
  #
10
- # pp MyParser.new.parse('aaaa') # => 'aaaa'
10
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'@0
11
11
  # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
12
12
  # # Don't know what to do with bbbb at line 1 char 1.
13
13
  #
@@ -64,38 +64,18 @@ module Parslet
64
64
  # puts parslet.error_tree
65
65
  # end
66
66
  #
67
+ # Alternatively, you can just require 'parslet/convenience' and call
68
+ # the method #parse_with_debug instead of #parse. This method will never
69
+ # raise and print error trees to stdout.
70
+ #
71
+ # Example:
72
+ # require 'parslet/convenience'
73
+ # parslet.parse_with_debug(str)
74
+ #
67
75
  class ParseFailed < StandardError
68
76
  end
69
77
 
70
78
  module ClassMethods
71
- # Define the parsers #root function. This is the place where you start
72
- # parsing; if you have a rule for 'file' that describes what should be
73
- # in a file, this would be your root declaration:
74
- #
75
- # class Parser
76
- # root :file
77
- # rule(:file) { ... }
78
- # end
79
- #
80
- # #root declares a 'parse' function that works just like the parse
81
- # function that you can call on a simple parslet, taking a string as input
82
- # and producing parse output.
83
- #
84
- # In a way, #root is a shorthand for:
85
- #
86
- # def parse(str)
87
- # your_parser_root.parse(str)
88
- # end
89
- #
90
- def root(name)
91
- define_method(:root) do
92
- self.send(name)
93
- end
94
- define_method(:parse) do |str|
95
- root.parse(str)
96
- end
97
- end
98
-
99
79
  # Define an entity for the parser. This generates a method of the same
100
80
  # name that can be used as part of other patterns. Those methods can be
101
81
  # freely mixed in your parser class with real ruby methods.
@@ -116,8 +96,14 @@ module Parslet
116
96
  def rule(name, &definition)
117
97
  define_method(name) do
118
98
  @rules ||= {} # <name, rule> memoization
119
- @rules[name] or
120
- (@rules[name] = Atoms::Entity.new(name, self, definition))
99
+ return @rules[name] if @rules.has_key?(name)
100
+
101
+ # Capture the self of the parser class along with the definition.
102
+ definition_closure = proc {
103
+ self.instance_eval(&definition)
104
+ }
105
+
106
+ @rules[name] = Atoms::Entity.new(name, &definition_closure)
121
107
  end
122
108
  end
123
109
  end
@@ -164,6 +150,8 @@ module Parslet
164
150
  # Returns an atom matching any character. It acts like the '.' (dot)
165
151
  # character in regular expressions.
166
152
  #
153
+ # Example:
154
+ #
167
155
  # any.parse('a') # => 'a'
168
156
  #
169
157
  def any
@@ -227,6 +215,7 @@ module Parslet
227
215
  autoload :Expression, 'parslet/expression'
228
216
  end
229
217
 
218
+ require 'parslet/slice'
230
219
  require 'parslet/source'
231
220
  require 'parslet/error_tree'
232
221
  require 'parslet/atoms'
data/lib/parslet/atoms.rb CHANGED
@@ -16,6 +16,7 @@ module Parslet::Atoms
16
16
  end
17
17
 
18
18
  require 'parslet/atoms/context'
19
+ require 'parslet/atoms/dsl'
19
20
  require 'parslet/atoms/base'
20
21
  require 'parslet/atoms/named'
21
22
  require 'parslet/atoms/lookahead'
@@ -1,8 +1,11 @@
1
1
  # Base class for all parslets, handles orchestration of calls and implements
2
2
  # a lot of the operator and chaining methods.
3
3
  #
4
+ # Also see Parslet::Atoms::DSL chaining parslet atoms together.
5
+ #
4
6
  class Parslet::Atoms::Base
5
7
  include Parslet::Atoms::Precedence
8
+ include Parslet::Atoms::DSL
6
9
 
7
10
  # Internally, all parsing functions return either an instance of Fail
8
11
  # or an instance of Success.
@@ -89,84 +92,6 @@ class Parslet::Atoms::Base
89
92
  "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
90
93
  end
91
94
 
92
- # Construct a new atom that repeats the current atom min times at least and
93
- # at most max times. max can be nil to indicate that no maximum is present.
94
- #
95
- # Example:
96
- # # match any number of 'a's
97
- # str('a').repeat
98
- #
99
- # # match between 1 and 3 'a's
100
- # str('a').repeat(1,3)
101
- #
102
- def repeat(min=0, max=nil)
103
- Parslet::Atoms::Repetition.new(self, min, max)
104
- end
105
-
106
- # Returns a new parslet atom that is only maybe present in the input. This
107
- # is synonymous to calling #repeat(0,1). Generated tree value will be
108
- # either nil (if atom is not present in the input) or the matched subtree.
109
- #
110
- # Example:
111
- # str('foo').maybe
112
- #
113
- def maybe
114
- Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
115
- end
116
-
117
- # Chains two parslet atoms together as a sequence.
118
- #
119
- # Example:
120
- # str('a') >> str('b')
121
- #
122
- def >>(parslet)
123
- Parslet::Atoms::Sequence.new(self, parslet)
124
- end
125
-
126
- # Chains two parslet atoms together to express alternation. A match will
127
- # always be attempted with the parslet on the left side first. If it doesn't
128
- # match, the right side will be tried.
129
- #
130
- # Example:
131
- # # matches either 'a' OR 'b'
132
- # str('a') | str('b')
133
- #
134
- def |(parslet)
135
- Parslet::Atoms::Alternative.new(self, parslet)
136
- end
137
-
138
- # Tests for absence of a parslet atom in the input stream without consuming
139
- # it.
140
- #
141
- # Example:
142
- # # Only proceed the parse if 'a' is absent.
143
- # str('a').absnt?
144
- #
145
- def absnt?
146
- Parslet::Atoms::Lookahead.new(self, false)
147
- end
148
-
149
- # Tests for presence of a parslet atom in the input stream without consuming
150
- # it.
151
- #
152
- # Example:
153
- # # Only proceed the parse if 'a' is present.
154
- # str('a').prsnt?
155
- #
156
- def prsnt?
157
- Parslet::Atoms::Lookahead.new(self, true)
158
- end
159
-
160
- # Marks a parslet atom as important for the tree output. This must be used
161
- # to achieve meaningful output from the #parse method.
162
- #
163
- # Example:
164
- # str('a').as(:b) # will produce {:b => 'a'}
165
- #
166
- def as(name)
167
- Parslet::Atoms::Named.new(self, name)
168
- end
169
-
170
95
  # Takes a mixed value coming out of a parslet and converts it to a return
171
96
  # value for the user by dropping things and merging hashes.
172
97
  #
@@ -192,14 +117,24 @@ class Parslet::Atoms::Base
192
117
 
193
118
  fail "BUG: Unknown tag #{tag.inspect}."
194
119
  end
120
+
121
+ # Lisp style fold left where the first element builds the basis for
122
+ # an inject.
123
+ #
124
+ def foldl(list, &block)
125
+ return '' if list.empty?
126
+ list[1..-1].inject(list.first, &block)
127
+ end
195
128
 
129
+ # Flatten results from a sequence of parslets.
130
+ #
196
131
  def flatten_sequence(list) # :nodoc:
197
- list.compact.inject('') { |r, e| # and then merge flat elements
132
+ foldl(list.compact) { |r, e| # and then merge flat elements
198
133
  merge_fold(r, e)
199
134
  }
200
135
  end
201
136
  def merge_fold(l, r) # :nodoc:
202
- # equal pairs: merge.
137
+ # equal pairs: merge. ----------------------------------------------------
203
138
  if l.class == r.class
204
139
  if l.is_a?(Hash)
205
140
  warn_about_duplicate_keys(l, r)
@@ -209,11 +144,20 @@ class Parslet::Atoms::Base
209
144
  end
210
145
  end
211
146
 
212
- # unequal pairs: hoist to same level.
147
+ # unequal pairs: hoist to same level. ------------------------------------
213
148
 
214
- # special case: If one of them is a string, the other is more important
215
- return l if r.class == String
216
- return r if l.class == String
149
+ # Maybe classes are not equal, but both are stringlike?
150
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
151
+ # if we're merging a String with a Slice, the slice wins.
152
+ return r if r.respond_to? :to_slice
153
+ return l if l.respond_to? :to_slice
154
+
155
+ fail "NOTREACHED: What other stringlike classes are there?"
156
+ end
157
+
158
+ # special case: If one of them is a string/slice, the other is more important
159
+ return l if r.respond_to? :to_str
160
+ return r if l.respond_to? :to_str
217
161
 
218
162
  # otherwise just create an array for one of them to live in
219
163
  return l + [r] if r.class == Hash
@@ -222,6 +166,11 @@ class Parslet::Atoms::Base
222
166
  fail "Unhandled case when foldr'ing sequence."
223
167
  end
224
168
 
169
+ # Flatten results from a repetition of a single parslet. named indicates
170
+ # whether the user has named the result or not. If the user has named
171
+ # the results, we want to leave an empty list alone - otherwise it is
172
+ # turned into an empty string.
173
+ #
225
174
  def flatten_repetition(list, named) # :nodoc:
226
175
  if list.any? { |e| e.instance_of?(Hash) }
227
176
  # If keyed subtrees are in the array, we'll want to discard all
@@ -241,9 +190,11 @@ class Parslet::Atoms::Base
241
190
  return [] if named && list.empty?
242
191
 
243
192
  # If there are only strings, concatenate them and return that.
244
- list.inject('') { |s,e| s<<e }
193
+ foldl(list) { |s,e| s+e }
245
194
  end
246
195
 
196
+ # Debug printing - in Treetop syntax.
197
+ #
247
198
  def self.precedence(prec) # :nodoc:
248
199
  define_method(:precedence) { prec }
249
200
  end
@@ -272,7 +223,7 @@ class Parslet::Atoms::Base
272
223
 
273
224
  # Error tree returns what went wrong here plus what went wrong inside
274
225
  # subexpressions as a tree. The error stored for this node will be equal
275
- # with #cause.
226
+ # to #cause.
276
227
  #
277
228
  def error_tree
278
229
  Parslet::ErrorTree.new(self)
@@ -301,10 +252,18 @@ private
301
252
  @last_cause.to_s
302
253
  end
303
254
 
255
+ # An internal class that allows delaying the construction of error messages
256
+ # (as strings) until we really need to print them.
257
+ #
304
258
  class Cause < Struct.new(:message, :source, :pos)
305
259
  def to_s
306
260
  line, column = source.line_and_column(pos)
307
- message + " at line #{line} char #{column}."
261
+ # Allow message to be a list of objects. Join them here, since we now
262
+ # really need it.
263
+ Array(message).map { |o|
264
+ o.respond_to?(:to_slice) ?
265
+ o.str.inspect :
266
+ o.to_s }.join + " at line #{line} char #{column}."
308
267
  end
309
268
  end
310
269
 
@@ -0,0 +1,98 @@
1
+
2
+ # A mixin module that defines operations that can be called on any subclass
3
+ # of Parslet::Atoms::Base. These operations make parslets atoms chainable and
4
+ # allow combination of parslet atoms to form bigger parsers.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('foo') >> str('bar')
9
+ # str('f').repeat
10
+ # any.absent? # also called The Epsilon
11
+ #
12
+ module Parslet::Atoms::DSL
13
+ # Construct a new atom that repeats the current atom min times at least and
14
+ # at most max times. max can be nil to indicate that no maximum is present.
15
+ #
16
+ # Example:
17
+ # # match any number of 'a's
18
+ # str('a').repeat
19
+ #
20
+ # # match between 1 and 3 'a's
21
+ # str('a').repeat(1,3)
22
+ #
23
+ def repeat(min=0, max=nil)
24
+ Parslet::Atoms::Repetition.new(self, min, max)
25
+ end
26
+
27
+ # Returns a new parslet atom that is only maybe present in the input. This
28
+ # is synonymous to calling #repeat(0,1). Generated tree value will be
29
+ # either nil (if atom is not present in the input) or the matched subtree.
30
+ #
31
+ # Example:
32
+ # str('foo').maybe
33
+ #
34
+ def maybe
35
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
36
+ end
37
+
38
+ # Chains two parslet atoms together as a sequence.
39
+ #
40
+ # Example:
41
+ # str('a') >> str('b')
42
+ #
43
+ def >>(parslet)
44
+ Parslet::Atoms::Sequence.new(self, parslet)
45
+ end
46
+
47
+ # Chains two parslet atoms together to express alternation. A match will
48
+ # always be attempted with the parslet on the left side first. If it doesn't
49
+ # match, the right side will be tried.
50
+ #
51
+ # Example:
52
+ # # matches either 'a' OR 'b'
53
+ # str('a') | str('b')
54
+ #
55
+ def |(parslet)
56
+ Parslet::Atoms::Alternative.new(self, parslet)
57
+ end
58
+
59
+ # Tests for absence of a parslet atom in the input stream without consuming
60
+ # it.
61
+ #
62
+ # Example:
63
+ # # Only proceed the parse if 'a' is absent.
64
+ # str('a').absent?
65
+ #
66
+ def absent?
67
+ Parslet::Atoms::Lookahead.new(self, false)
68
+ end
69
+
70
+ # Tests for presence of a parslet atom in the input stream without consuming
71
+ # it.
72
+ #
73
+ # Example:
74
+ # # Only proceed the parse if 'a' is present.
75
+ # str('a').present?
76
+ #
77
+ def present?
78
+ Parslet::Atoms::Lookahead.new(self, true)
79
+ end
80
+
81
+ # Alias for present? that will disappear in 2.0 (deprecated)
82
+ #
83
+ alias prsnt? present?
84
+
85
+ # Alias for absent? that will disappear in 2.0 (deprecated)
86
+ #
87
+ alias absnt? absent?
88
+
89
+ # Marks a parslet atom as important for the tree output. This must be used
90
+ # to achieve meaningful output from the #parse method.
91
+ #
92
+ # Example:
93
+ # str('a').as(:b) # will produce {:b => 'a'}
94
+ #
95
+ def as(name)
96
+ Parslet::Atoms::Named.new(self, name)
97
+ end
98
+ end