parslet 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
data/example/seasons.rb CHANGED
@@ -1,4 +1,5 @@
1
- $:.unshift '../lib/'
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
2
3
  require 'parslet'
3
4
  require 'pp'
4
5
 
@@ -1,14 +1,12 @@
1
1
  # A simple xml parser. It is simple in the respect as that it doesn't address
2
2
  # any of the complexities of XML. This is ruby 1.9.
3
3
 
4
- $:.unshift '../lib'
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
5
 
6
6
  require 'pp'
7
7
  require 'parslet'
8
8
 
9
- module XML
10
- include Parslet
11
-
9
+ class XML < Parslet::Parser
12
10
  root :document
13
11
 
14
12
  rule(:document) {
@@ -23,7 +21,7 @@ module XML
23
21
 
24
22
  parslet = str('<')
25
23
  parslet = parslet >> str('/') if close
26
- parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
24
+ parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
27
25
  parslet = parslet >> str('>')
28
26
 
29
27
  parslet
@@ -35,8 +33,7 @@ module XML
35
33
  end
36
34
 
37
35
  def check(xml)
38
- include XML
39
- r=parse(xml)
36
+ r = XML.new.parse(xml)
40
37
 
41
38
  # We'll validate the tree by reducing valid pairs of tags into simply the
42
39
  # string "verified". If the transformation ends on a string, then the
@@ -54,4 +51,4 @@ def check(xml)
54
51
  end
55
52
 
56
53
  pp check("<a><b>some text in the tags</b></a>")
57
- pp check("<b><b>some text in the tags</b></a>")
54
+ pp check("<b><b>some text in the tags</b></a>")
@@ -4,7 +4,7 @@
4
4
 
5
5
  require 'pp'
6
6
 
7
- $:.unshift '../lib/'
7
+ $:.unshift File.dirname(__FILE__) + "/../lib"
8
8
  require 'parslet'
9
9
 
10
10
  include Parslet
@@ -26,7 +26,7 @@ class LiteralsParser < Parslet::Parser
26
26
  str('"') >>
27
27
  (
28
28
  (str('\\') >> any) |
29
- (str('"').absnt? >> any)
29
+ (str('"').absent? >> any)
30
30
  ).repeat.as(:string) >>
31
31
  str('"')
32
32
  end
@@ -50,8 +50,10 @@ class LiteralsParser < Parslet::Parser
50
50
  root :literals
51
51
  end
52
52
 
53
- parsetree = LiteralsParser.new.parse(
54
- File.read('simple.lit'))
53
+ input_name = File.join(File.dirname(__FILE__), 'simple.lit')
54
+ file = File.read(input_name)
55
+
56
+ parsetree = LiteralsParser.new.parse(file)
55
57
 
56
58
  class Lit < Struct.new(:text)
57
59
  def to_s
@@ -72,4 +74,4 @@ transform = Parslet::Transform.new do
72
74
  end
73
75
 
74
76
  ast = transform.apply(parsetree)
75
- pp ast
77
+ pp ast
data/lib/parslet.rb CHANGED
@@ -7,7 +7,7 @@
7
7
  # root(:a)
8
8
  # end
9
9
  #
10
- # pp MyParser.new.parse('aaaa') # => 'aaaa'
10
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'@0
11
11
  # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
12
12
  # # Don't know what to do with bbbb at line 1 char 1.
13
13
  #
@@ -64,38 +64,18 @@ module Parslet
64
64
  # puts parslet.error_tree
65
65
  # end
66
66
  #
67
+ # Alternatively, you can just require 'parslet/convenience' and call
68
+ # the method #parse_with_debug instead of #parse. This method will never
69
+ # raise and print error trees to stdout.
70
+ #
71
+ # Example:
72
+ # require 'parslet/convenience'
73
+ # parslet.parse_with_debug(str)
74
+ #
67
75
  class ParseFailed < StandardError
68
76
  end
69
77
 
70
78
  module ClassMethods
71
- # Define the parsers #root function. This is the place where you start
72
- # parsing; if you have a rule for 'file' that describes what should be
73
- # in a file, this would be your root declaration:
74
- #
75
- # class Parser
76
- # root :file
77
- # rule(:file) { ... }
78
- # end
79
- #
80
- # #root declares a 'parse' function that works just like the parse
81
- # function that you can call on a simple parslet, taking a string as input
82
- # and producing parse output.
83
- #
84
- # In a way, #root is a shorthand for:
85
- #
86
- # def parse(str)
87
- # your_parser_root.parse(str)
88
- # end
89
- #
90
- def root(name)
91
- define_method(:root) do
92
- self.send(name)
93
- end
94
- define_method(:parse) do |str|
95
- root.parse(str)
96
- end
97
- end
98
-
99
79
  # Define an entity for the parser. This generates a method of the same
100
80
  # name that can be used as part of other patterns. Those methods can be
101
81
  # freely mixed in your parser class with real ruby methods.
@@ -116,8 +96,14 @@ module Parslet
116
96
  def rule(name, &definition)
117
97
  define_method(name) do
118
98
  @rules ||= {} # <name, rule> memoization
119
- @rules[name] or
120
- (@rules[name] = Atoms::Entity.new(name, self, definition))
99
+ return @rules[name] if @rules.has_key?(name)
100
+
101
+ # Capture the self of the parser class along with the definition.
102
+ definition_closure = proc {
103
+ self.instance_eval(&definition)
104
+ }
105
+
106
+ @rules[name] = Atoms::Entity.new(name, &definition_closure)
121
107
  end
122
108
  end
123
109
  end
@@ -164,6 +150,8 @@ module Parslet
164
150
  # Returns an atom matching any character. It acts like the '.' (dot)
165
151
  # character in regular expressions.
166
152
  #
153
+ # Example:
154
+ #
167
155
  # any.parse('a') # => 'a'
168
156
  #
169
157
  def any
@@ -227,6 +215,7 @@ module Parslet
227
215
  autoload :Expression, 'parslet/expression'
228
216
  end
229
217
 
218
+ require 'parslet/slice'
230
219
  require 'parslet/source'
231
220
  require 'parslet/error_tree'
232
221
  require 'parslet/atoms'
data/lib/parslet/atoms.rb CHANGED
@@ -16,6 +16,7 @@ module Parslet::Atoms
16
16
  end
17
17
 
18
18
  require 'parslet/atoms/context'
19
+ require 'parslet/atoms/dsl'
19
20
  require 'parslet/atoms/base'
20
21
  require 'parslet/atoms/named'
21
22
  require 'parslet/atoms/lookahead'
@@ -1,8 +1,11 @@
1
1
  # Base class for all parslets, handles orchestration of calls and implements
2
2
  # a lot of the operator and chaining methods.
3
3
  #
4
+ # Also see Parslet::Atoms::DSL chaining parslet atoms together.
5
+ #
4
6
  class Parslet::Atoms::Base
5
7
  include Parslet::Atoms::Precedence
8
+ include Parslet::Atoms::DSL
6
9
 
7
10
  # Internally, all parsing functions return either an instance of Fail
8
11
  # or an instance of Success.
@@ -89,84 +92,6 @@ class Parslet::Atoms::Base
89
92
  "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
90
93
  end
91
94
 
92
- # Construct a new atom that repeats the current atom min times at least and
93
- # at most max times. max can be nil to indicate that no maximum is present.
94
- #
95
- # Example:
96
- # # match any number of 'a's
97
- # str('a').repeat
98
- #
99
- # # match between 1 and 3 'a's
100
- # str('a').repeat(1,3)
101
- #
102
- def repeat(min=0, max=nil)
103
- Parslet::Atoms::Repetition.new(self, min, max)
104
- end
105
-
106
- # Returns a new parslet atom that is only maybe present in the input. This
107
- # is synonymous to calling #repeat(0,1). Generated tree value will be
108
- # either nil (if atom is not present in the input) or the matched subtree.
109
- #
110
- # Example:
111
- # str('foo').maybe
112
- #
113
- def maybe
114
- Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
115
- end
116
-
117
- # Chains two parslet atoms together as a sequence.
118
- #
119
- # Example:
120
- # str('a') >> str('b')
121
- #
122
- def >>(parslet)
123
- Parslet::Atoms::Sequence.new(self, parslet)
124
- end
125
-
126
- # Chains two parslet atoms together to express alternation. A match will
127
- # always be attempted with the parslet on the left side first. If it doesn't
128
- # match, the right side will be tried.
129
- #
130
- # Example:
131
- # # matches either 'a' OR 'b'
132
- # str('a') | str('b')
133
- #
134
- def |(parslet)
135
- Parslet::Atoms::Alternative.new(self, parslet)
136
- end
137
-
138
- # Tests for absence of a parslet atom in the input stream without consuming
139
- # it.
140
- #
141
- # Example:
142
- # # Only proceed the parse if 'a' is absent.
143
- # str('a').absnt?
144
- #
145
- def absnt?
146
- Parslet::Atoms::Lookahead.new(self, false)
147
- end
148
-
149
- # Tests for presence of a parslet atom in the input stream without consuming
150
- # it.
151
- #
152
- # Example:
153
- # # Only proceed the parse if 'a' is present.
154
- # str('a').prsnt?
155
- #
156
- def prsnt?
157
- Parslet::Atoms::Lookahead.new(self, true)
158
- end
159
-
160
- # Marks a parslet atom as important for the tree output. This must be used
161
- # to achieve meaningful output from the #parse method.
162
- #
163
- # Example:
164
- # str('a').as(:b) # will produce {:b => 'a'}
165
- #
166
- def as(name)
167
- Parslet::Atoms::Named.new(self, name)
168
- end
169
-
170
95
  # Takes a mixed value coming out of a parslet and converts it to a return
171
96
  # value for the user by dropping things and merging hashes.
172
97
  #
@@ -192,14 +117,24 @@ class Parslet::Atoms::Base
192
117
 
193
118
  fail "BUG: Unknown tag #{tag.inspect}."
194
119
  end
120
+
121
+ # Lisp style fold left where the first element builds the basis for
122
+ # an inject.
123
+ #
124
+ def foldl(list, &block)
125
+ return '' if list.empty?
126
+ list[1..-1].inject(list.first, &block)
127
+ end
195
128
 
129
+ # Flatten results from a sequence of parslets.
130
+ #
196
131
  def flatten_sequence(list) # :nodoc:
197
- list.compact.inject('') { |r, e| # and then merge flat elements
132
+ foldl(list.compact) { |r, e| # and then merge flat elements
198
133
  merge_fold(r, e)
199
134
  }
200
135
  end
201
136
  def merge_fold(l, r) # :nodoc:
202
- # equal pairs: merge.
137
+ # equal pairs: merge. ----------------------------------------------------
203
138
  if l.class == r.class
204
139
  if l.is_a?(Hash)
205
140
  warn_about_duplicate_keys(l, r)
@@ -209,11 +144,20 @@ class Parslet::Atoms::Base
209
144
  end
210
145
  end
211
146
 
212
- # unequal pairs: hoist to same level.
147
+ # unequal pairs: hoist to same level. ------------------------------------
213
148
 
214
- # special case: If one of them is a string, the other is more important
215
- return l if r.class == String
216
- return r if l.class == String
149
+ # Maybe classes are not equal, but both are stringlike?
150
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
151
+ # if we're merging a String with a Slice, the slice wins.
152
+ return r if r.respond_to? :to_slice
153
+ return l if l.respond_to? :to_slice
154
+
155
+ fail "NOTREACHED: What other stringlike classes are there?"
156
+ end
157
+
158
+ # special case: If one of them is a string/slice, the other is more important
159
+ return l if r.respond_to? :to_str
160
+ return r if l.respond_to? :to_str
217
161
 
218
162
  # otherwise just create an array for one of them to live in
219
163
  return l + [r] if r.class == Hash
@@ -222,6 +166,11 @@ class Parslet::Atoms::Base
222
166
  fail "Unhandled case when foldr'ing sequence."
223
167
  end
224
168
 
169
+ # Flatten results from a repetition of a single parslet. named indicates
170
+ # whether the user has named the result or not. If the user has named
171
+ # the results, we want to leave an empty list alone - otherwise it is
172
+ # turned into an empty string.
173
+ #
225
174
  def flatten_repetition(list, named) # :nodoc:
226
175
  if list.any? { |e| e.instance_of?(Hash) }
227
176
  # If keyed subtrees are in the array, we'll want to discard all
@@ -241,9 +190,11 @@ class Parslet::Atoms::Base
241
190
  return [] if named && list.empty?
242
191
 
243
192
  # If there are only strings, concatenate them and return that.
244
- list.inject('') { |s,e| s<<e }
193
+ foldl(list) { |s,e| s+e }
245
194
  end
246
195
 
196
+ # Debug printing - in Treetop syntax.
197
+ #
247
198
  def self.precedence(prec) # :nodoc:
248
199
  define_method(:precedence) { prec }
249
200
  end
@@ -272,7 +223,7 @@ class Parslet::Atoms::Base
272
223
 
273
224
  # Error tree returns what went wrong here plus what went wrong inside
274
225
  # subexpressions as a tree. The error stored for this node will be equal
275
- # with #cause.
226
+ # to #cause.
276
227
  #
277
228
  def error_tree
278
229
  Parslet::ErrorTree.new(self)
@@ -301,10 +252,18 @@ private
301
252
  @last_cause.to_s
302
253
  end
303
254
 
255
+ # An internal class that allows delaying the construction of error messages
256
+ # (as strings) until we really need to print them.
257
+ #
304
258
  class Cause < Struct.new(:message, :source, :pos)
305
259
  def to_s
306
260
  line, column = source.line_and_column(pos)
307
- message + " at line #{line} char #{column}."
261
+ # Allow message to be a list of objects. Join them here, since we now
262
+ # really need it.
263
+ Array(message).map { |o|
264
+ o.respond_to?(:to_slice) ?
265
+ o.str.inspect :
266
+ o.to_s }.join + " at line #{line} char #{column}."
308
267
  end
309
268
  end
310
269
 
@@ -0,0 +1,98 @@
1
+
2
+ # A mixin module that defines operations that can be called on any subclass
3
+ # of Parslet::Atoms::Base. These operations make parslets atoms chainable and
4
+ # allow combination of parslet atoms to form bigger parsers.
5
+ #
6
+ # Example:
7
+ #
8
+ # str('foo') >> str('bar')
9
+ # str('f').repeat
10
+ # any.absent? # also called The Epsilon
11
+ #
12
+ module Parslet::Atoms::DSL
13
+ # Construct a new atom that repeats the current atom min times at least and
14
+ # at most max times. max can be nil to indicate that no maximum is present.
15
+ #
16
+ # Example:
17
+ # # match any number of 'a's
18
+ # str('a').repeat
19
+ #
20
+ # # match between 1 and 3 'a's
21
+ # str('a').repeat(1,3)
22
+ #
23
+ def repeat(min=0, max=nil)
24
+ Parslet::Atoms::Repetition.new(self, min, max)
25
+ end
26
+
27
+ # Returns a new parslet atom that is only maybe present in the input. This
28
+ # is synonymous to calling #repeat(0,1). Generated tree value will be
29
+ # either nil (if atom is not present in the input) or the matched subtree.
30
+ #
31
+ # Example:
32
+ # str('foo').maybe
33
+ #
34
+ def maybe
35
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
36
+ end
37
+
38
+ # Chains two parslet atoms together as a sequence.
39
+ #
40
+ # Example:
41
+ # str('a') >> str('b')
42
+ #
43
+ def >>(parslet)
44
+ Parslet::Atoms::Sequence.new(self, parslet)
45
+ end
46
+
47
+ # Chains two parslet atoms together to express alternation. A match will
48
+ # always be attempted with the parslet on the left side first. If it doesn't
49
+ # match, the right side will be tried.
50
+ #
51
+ # Example:
52
+ # # matches either 'a' OR 'b'
53
+ # str('a') | str('b')
54
+ #
55
+ def |(parslet)
56
+ Parslet::Atoms::Alternative.new(self, parslet)
57
+ end
58
+
59
+ # Tests for absence of a parslet atom in the input stream without consuming
60
+ # it.
61
+ #
62
+ # Example:
63
+ # # Only proceed the parse if 'a' is absent.
64
+ # str('a').absent?
65
+ #
66
+ def absent?
67
+ Parslet::Atoms::Lookahead.new(self, false)
68
+ end
69
+
70
+ # Tests for presence of a parslet atom in the input stream without consuming
71
+ # it.
72
+ #
73
+ # Example:
74
+ # # Only proceed the parse if 'a' is present.
75
+ # str('a').present?
76
+ #
77
+ def present?
78
+ Parslet::Atoms::Lookahead.new(self, true)
79
+ end
80
+
81
+ # Alias for present? that will disappear in 2.0 (deprecated)
82
+ #
83
+ alias prsnt? present?
84
+
85
+ # Alias for absent? that will disappear in 2.0 (deprecated)
86
+ #
87
+ alias absnt? absent?
88
+
89
+ # Marks a parslet atom as important for the tree output. This must be used
90
+ # to achieve meaningful output from the #parse method.
91
+ #
92
+ # Example:
93
+ # str('a').as(:b) # will produce {:b => 'a'}
94
+ #
95
+ def as(name)
96
+ Parslet::Atoms::Named.new(self, name)
97
+ end
98
+ end