parslet 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,27 @@
3
3
  - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
4
  the win.
5
5
 
6
+ = 1.3.1 / ???
7
+
8
+ = 1.3.0 / 5Mar2012
9
+
10
+ ! Parslet::Transform::Context is now much more well-behaved. It has
11
+ #respond_to? and #method_missing; it now looks like a plain old Ruby
12
+ object with instance variables and attribute readers.
13
+
14
+ - Grammar transforms turned out to be a dead end and have been removed.
15
+
16
+ ! A few problems in error message generation have been fixed. This will
17
+ improve diagnostics further.
18
+
19
+ + A VM driven parser engine: Removes the limitation that parsing needs a
20
+ lot of stack space, something dearly missing from Ruby 1.9.3 fibers.
21
+ This engine is experimental and might be removed in the future.
22
+
23
+ ! Interaction with mathn fixed - Line number generation will terminate.
24
+
25
+ . Internal reorganisation, removing cruft and bit rot.
26
+
6
27
  = 1.2.3 / 22Sep2011
7
28
 
8
29
  + Transform#apply can now be called with a hash as second argument. This
data/README CHANGED
@@ -53,6 +53,6 @@ ruby-1.8.7-p334 for better results.
53
53
 
54
54
  STATUS
55
55
 
56
- At version 1.2.3 - See HISTORY.txt for changes.
56
+ At version 1.3.0 - See HISTORY.txt for changes.
57
57
 
58
58
  (c) 2010 Kaspar Schiess
@@ -0,0 +1,66 @@
1
+ # An example on how to ignore whitespace. Use the composition, luke.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'pp'
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ class AParser < Parslet::Parser
10
+ root :as
11
+
12
+ rule(:as) { a.repeat }
13
+ rule(:a) { str('a').as(:a) }
14
+ end
15
+
16
+ class WsIgnoreSource
17
+ def initialize(string)
18
+ @io = StringIO.new(string)
19
+ @early_eof = nil
20
+ end
21
+
22
+ def pos
23
+ @io.pos
24
+ end
25
+
26
+ def pos=(n)
27
+ @io.pos = n
28
+ end
29
+
30
+ def gets(buf, n)
31
+ return nil if eof?
32
+
33
+ return read(n).tap {
34
+ @early_eof = pos unless can_read?
35
+ }
36
+ end
37
+
38
+ def eof?
39
+ @io.eof? || # the underlying source is EOF
40
+ @early_eof && pos >= @early_eof # we have no non-ws chars left
41
+ end
42
+
43
+ private
44
+
45
+ # Reads n chars from @io.
46
+ def read(n)
47
+ b = ''
48
+ while b.size < n && !@io.eof?
49
+ c = @io.gets(nil, 1)
50
+ b << c unless c == ' '
51
+ end
52
+ b
53
+ end
54
+
55
+ # True if there are any chars left in @io.
56
+ def can_read?
57
+ old_pos = @io.pos
58
+ read(1).size == 1
59
+ rescue => ex
60
+ return false
61
+ ensure
62
+ @io.pos = old_pos
63
+ end
64
+ end
65
+
66
+ pp AParser.new.parse_with_debug(WsIgnoreSource.new('a a a a '))
@@ -0,0 +1,44 @@
1
+ # Demonstrates that we have a compatibility fix to mathn's weird idea of
2
+ # integer mathematics.
3
+ # This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+ include Parslet
10
+
11
+ def attempt_parse
12
+ possible_whitespace = match['\s'].repeat
13
+
14
+ cephalopod =
15
+ str('octopus') |
16
+ str('squid')
17
+
18
+ parenthesized_cephalopod =
19
+ str('(') >>
20
+ possible_whitespace >>
21
+ cephalopod >>
22
+ possible_whitespace >>
23
+ str(')')
24
+
25
+ parser =
26
+ possible_whitespace >>
27
+ parenthesized_cephalopod >>
28
+ possible_whitespace
29
+
30
+ # This parse fails, but that is not the point. When mathn is in the current
31
+ # ruby environment, it modifies integer division in a way that makes
32
+ # parslet loop indefinitely.
33
+ parser.parse %{(\nsqeed)\n}
34
+ rescue Parslet::ParseFailed
35
+ end
36
+
37
+ attempt_parse
38
+ puts 'it terminates before we require mathn'
39
+
40
+ puts "requiring mathn now"
41
+ require 'mathn'
42
+ puts "and trying again (will hang without the fix)"
43
+ attempt_parse # but it doesn't terminate after requiring mathn
44
+ puts "okay!"
@@ -0,0 +1 @@
1
+ [{:a=>"a"@0}, {:a=>"a"@1}, {:a=>"a"@5}, {:a=>"a"@7}]
@@ -1,9 +1,9 @@
1
1
  0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
2
2
  255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
3
- 255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
3
+ 255.255.255 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1.
4
4
  1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
5
5
  12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
6
6
  12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
7
7
  12AD:: -> {:ipv6=>"12AD::"@0}
8
8
  :: -> {:ipv6=>"::"@0}
9
- 1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
9
+ 1:2 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1.
@@ -0,0 +1,4 @@
1
+ it terminates before we require mathn
2
+ requiring mathn now
3
+ and trying again (will hang without the fix)
4
+ okay!
@@ -73,6 +73,11 @@ module Parslet
73
73
  # parslet.parse_with_debug(str)
74
74
  #
75
75
  class ParseFailed < StandardError
76
+ def initialize(message, cause=nil)
77
+ super(message)
78
+ @cause = cause
79
+ end
80
+ attr_reader :cause
76
81
  end
77
82
 
78
83
  # Raised when the parse operation didn't consume all of its input. In this
@@ -224,10 +229,12 @@ module Parslet
224
229
  end
225
230
 
226
231
  require 'parslet/slice'
232
+ require 'parslet/cause'
227
233
  require 'parslet/source'
228
234
  require 'parslet/error_tree'
229
235
  require 'parslet/atoms'
230
236
  require 'parslet/pattern'
231
237
  require 'parslet/pattern/binding'
232
238
  require 'parslet/transform'
233
- require 'parslet/parser'
239
+ require 'parslet/parser'
240
+ require 'parslet/bytecode'
@@ -15,6 +15,7 @@ module Parslet::Atoms
15
15
  OUTER = (prec+=1) # printing is done here.
16
16
  end
17
17
 
18
+ require 'parslet/atoms/can_flatten'
18
19
  require 'parslet/atoms/context'
19
20
  require 'parslet/atoms/dsl'
20
21
  require 'parslet/atoms/base'
@@ -19,7 +19,7 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
19
19
  super()
20
20
 
21
21
  @alternatives = alternatives
22
- @error_msg = "Expected one of #{alternatives.inspect}."
22
+ @error_msg = "Expected one of #{alternatives.inspect}"
23
23
  end
24
24
 
25
25
  #---
@@ -6,6 +6,7 @@
6
6
  class Parslet::Atoms::Base
7
7
  include Parslet::Atoms::Precedence
8
8
  include Parslet::Atoms::DSL
9
+ include Parslet::Atoms::CanFlatten
9
10
 
10
11
  # Internally, all parsing functions return either an instance of Fail
11
12
  # or an instance of Success.
@@ -25,7 +26,23 @@ class Parslet::Atoms::Base
25
26
  # and return a result. If the parse fails, a Parslet::ParseFailed exception
26
27
  # will be thrown.
27
28
  #
28
- def parse(io)
29
+ def parse(io, traditional=true)
30
+ if traditional
31
+ parse_traditional(io)
32
+ else
33
+ parse_vm(io)
34
+ end
35
+ end
36
+
37
+ def parse_vm(io)
38
+ compiler = Parslet::Bytecode::Compiler.new
39
+ program = compiler.compile(self)
40
+
41
+ vm = Parslet::Bytecode::VM.new
42
+ vm.run(program, io)
43
+ end
44
+
45
+ def parse_traditional(io)
29
46
  source = Parslet::Source.new(io)
30
47
  context = Parslet::Atoms::Context.new
31
48
 
@@ -36,7 +53,8 @@ class Parslet::Atoms::Base
36
53
  # Stack trace will be off, but the error tree should explain the reason
37
54
  # it failed.
38
55
  if value.error?
39
- parse_failed(value.message)
56
+ @last_cause = value.message
57
+ @last_cause.raise
40
58
  end
41
59
 
42
60
  # assert: value is a success answer
@@ -48,16 +66,15 @@ class Parslet::Atoms::Base
48
66
  # error to fail with. Otherwise just report that we cannot consume the
49
67
  # input.
50
68
  if cause
51
- # We're not using #parse_failed here, since it assigns to @last_cause.
52
- # Still: We'll raise this differently, since the real cause is different.
69
+ # NOTE We don't overwrite last_cause here.
53
70
  raise Parslet::UnconsumedInput,
54
71
  "Unconsumed input, maybe because of this: #{cause}"
55
72
  else
56
73
  old_pos = source.pos
57
- parse_failed(
58
- format_cause(source,
59
- "Don't know what to do with #{source.read(100)}", old_pos),
60
- Parslet::UnconsumedInput)
74
+ @last_cause = source.error(
75
+ "Don't know what to do with #{source.read(100)}", old_pos)
76
+
77
+ @last_cause.raise(Parslet::UnconsumedInput)
61
78
  end
62
79
  end
63
80
 
@@ -94,110 +111,6 @@ class Parslet::Atoms::Base
94
111
  "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
95
112
  end
96
113
 
97
- # Takes a mixed value coming out of a parslet and converts it to a return
98
- # value for the user by dropping things and merging hashes.
99
- #
100
- # Named is set to true if this result will be embedded in a Hash result from
101
- # naming something using <code>.as(...)</code>. It changes the folding
102
- # semantics of repetition.
103
- #
104
- def flatten(value, named=false) # :nodoc:
105
- # Passes through everything that isn't an array of things
106
- return value unless value.instance_of? Array
107
-
108
- # Extracts the s-expression tag
109
- tag, *tail = value
110
-
111
- # Merges arrays:
112
- result = tail.
113
- map { |e| flatten(e) } # first flatten each element
114
-
115
- case tag
116
- when :sequence
117
- return flatten_sequence(result)
118
- when :maybe
119
- return named ? result.first : result.first || ''
120
- when :repetition
121
- return flatten_repetition(result, named)
122
- end
123
-
124
- fail "BUG: Unknown tag #{tag.inspect}."
125
- end
126
-
127
- # Lisp style fold left where the first element builds the basis for
128
- # an inject.
129
- #
130
- def foldl(list, &block)
131
- return '' if list.empty?
132
- list[1..-1].inject(list.first, &block)
133
- end
134
-
135
- # Flatten results from a sequence of parslets.
136
- #
137
- def flatten_sequence(list) # :nodoc:
138
- foldl(list.compact) { |r, e| # and then merge flat elements
139
- merge_fold(r, e)
140
- }
141
- end
142
- def merge_fold(l, r) # :nodoc:
143
- # equal pairs: merge. ----------------------------------------------------
144
- if l.class == r.class
145
- if l.is_a?(Hash)
146
- warn_about_duplicate_keys(l, r)
147
- return l.merge(r)
148
- else
149
- return l + r
150
- end
151
- end
152
-
153
- # unequal pairs: hoist to same level. ------------------------------------
154
-
155
- # Maybe classes are not equal, but both are stringlike?
156
- if l.respond_to?(:to_str) && r.respond_to?(:to_str)
157
- # if we're merging a String with a Slice, the slice wins.
158
- return r if r.respond_to? :to_slice
159
- return l if l.respond_to? :to_slice
160
-
161
- fail "NOTREACHED: What other stringlike classes are there?"
162
- end
163
-
164
- # special case: If one of them is a string/slice, the other is more important
165
- return l if r.respond_to? :to_str
166
- return r if l.respond_to? :to_str
167
-
168
- # otherwise just create an array for one of them to live in
169
- return l + [r] if r.class == Hash
170
- return [l] + r if l.class == Hash
171
-
172
- fail "Unhandled case when foldr'ing sequence."
173
- end
174
-
175
- # Flatten results from a repetition of a single parslet. named indicates
176
- # whether the user has named the result or not. If the user has named
177
- # the results, we want to leave an empty list alone - otherwise it is
178
- # turned into an empty string.
179
- #
180
- def flatten_repetition(list, named) # :nodoc:
181
- if list.any? { |e| e.instance_of?(Hash) }
182
- # If keyed subtrees are in the array, we'll want to discard all
183
- # strings inbetween. To keep them, name them.
184
- return list.select { |e| e.instance_of?(Hash) }
185
- end
186
-
187
- if list.any? { |e| e.instance_of?(Array) }
188
- # If any arrays are nested in this array, flatten all arrays to this
189
- # level.
190
- return list.
191
- select { |e| e.instance_of?(Array) }.
192
- flatten(1)
193
- end
194
-
195
- # Consistent handling of empty lists, when we act on a named result
196
- return [] if named && list.empty?
197
-
198
- # If there are only strings, concatenate them and return that.
199
- foldl(list) { |s,e| s+e }
200
- end
201
114
 
202
115
  # Debug printing - in Treetop syntax.
203
116
  #
@@ -245,51 +158,7 @@ private
245
158
  # Produces an instance of Fail and returns it.
246
159
  #
247
160
  def error(source, str, pos=nil)
248
- @last_cause = format_cause(source, str, pos)
161
+ @last_cause = source.error(str, pos)
249
162
  Fail.new(@last_cause)
250
163
  end
251
-
252
- # Signals to the outside that the parse has failed. Use this in conjunction
253
- # with #format_cause for nice error messages.
254
- #
255
- def parse_failed(cause, exception_klass=Parslet::ParseFailed)
256
- @last_cause = cause
257
- raise exception_klass,
258
- @last_cause.to_s
259
- end
260
-
261
- # An internal class that allows delaying the construction of error messages
262
- # (as strings) until we really need to print them.
263
- #
264
- class Cause < Struct.new(:message, :source, :pos)
265
- def to_s
266
- line, column = source.line_and_column(pos)
267
- # Allow message to be a list of objects. Join them here, since we now
268
- # really need it.
269
- Array(message).map { |o|
270
- o.respond_to?(:to_slice) ?
271
- o.str.inspect :
272
- o.to_s }.join + " at line #{line} char #{column}."
273
- end
274
- end
275
-
276
- # Appends 'at line ... char ...' to the string given. Use +pos+ to override
277
- # the position of the +source+. This method returns an object that can
278
- # be turned into a string using #to_s.
279
- #
280
- def format_cause(source, str, pos=nil)
281
- real_pos = (pos||source.pos)
282
- Cause.new(str, source, real_pos)
283
- end
284
-
285
- # That annoying warning 'Duplicate subtrees while merging result' comes
286
- # from here. You should add more '.as(...)' names to your intermediary tree.
287
- #
288
- def warn_about_duplicate_keys(h1, h2)
289
- d = h1.keys & h2.keys
290
- unless d.empty?
291
- warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
292
- " of the latter will be kept. (keys: #{d.inspect})"
293
- end
294
- end
295
164
  end
@@ -0,0 +1,132 @@
1
+
2
+ module Parslet::Atoms
3
+ # A series of helper functions that have the common topic of flattening
4
+ # result values into the intermediary tree that consists of Ruby Hashes and
5
+ # Arrays.
6
+ #
7
+ # This module has one main function, #flatten, that takes an annotated
8
+ # structure as input and returns the reduced form that users expect from
9
+ # Atom#parse.
10
+ #
11
+ # NOTE: Since all of these functions are just that, functions without
12
+ # side effects, they are in a module and not in a class. Its hard to draw
13
+ # the line sometimes, but this is beyond.
14
+ #
15
+ module CanFlatten
16
+ # Takes a mixed value coming out of a parslet and converts it to a return
17
+ # value for the user by dropping things and merging hashes.
18
+ #
19
+ # Named is set to true if this result will be embedded in a Hash result from
20
+ # naming something using <code>.as(...)</code>. It changes the folding
21
+ # semantics of repetition.
22
+ #
23
+ def flatten(value, named=false) # :nodoc:
24
+ # Passes through everything that isn't an array of things
25
+ return value unless value.instance_of? Array
26
+
27
+ # Extracts the s-expression tag
28
+ tag, *tail = value
29
+
30
+ # Merges arrays:
31
+ result = tail.
32
+ map { |e| flatten(e) } # first flatten each element
33
+
34
+ case tag
35
+ when :sequence
36
+ return flatten_sequence(result)
37
+ when :maybe
38
+ return named ? result.first : result.first || ''
39
+ when :repetition
40
+ return flatten_repetition(result, named)
41
+ end
42
+
43
+ fail "BUG: Unknown tag #{tag.inspect}."
44
+ end
45
+
46
+ # Lisp style fold left where the first element builds the basis for
47
+ # an inject.
48
+ #
49
+ def foldl(list, &block)
50
+ return '' if list.empty?
51
+ list[1..-1].inject(list.first, &block)
52
+ end
53
+
54
+ # Flatten results from a sequence of parslets.
55
+ #
56
+ def flatten_sequence(list) # :nodoc:
57
+ foldl(list.compact) { |r, e| # and then merge flat elements
58
+ merge_fold(r, e)
59
+ }
60
+ end
61
+ def merge_fold(l, r) # :nodoc:
62
+ # equal pairs: merge. ----------------------------------------------------
63
+ if l.class == r.class
64
+ if l.is_a?(Hash)
65
+ warn_about_duplicate_keys(l, r)
66
+ return l.merge(r)
67
+ else
68
+ return l + r
69
+ end
70
+ end
71
+
72
+ # unequal pairs: hoist to same level. ------------------------------------
73
+
74
+ # Maybe classes are not equal, but both are stringlike?
75
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
76
+ # if we're merging a String with a Slice, the slice wins.
77
+ return r if r.respond_to? :to_slice
78
+ return l if l.respond_to? :to_slice
79
+
80
+ fail "NOTREACHED: What other stringlike classes are there?"
81
+ end
82
+
83
+ # special case: If one of them is a string/slice, the other is more important
84
+ return l if r.respond_to? :to_str
85
+ return r if l.respond_to? :to_str
86
+
87
+ # otherwise just create an array for one of them to live in
88
+ return l + [r] if r.class == Hash
89
+ return [l] + r if l.class == Hash
90
+
91
+ fail "Unhandled case when foldr'ing sequence."
92
+ end
93
+
94
+ # Flatten results from a repetition of a single parslet. named indicates
95
+ # whether the user has named the result or not. If the user has named
96
+ # the results, we want to leave an empty list alone - otherwise it is
97
+ # turned into an empty string.
98
+ #
99
+ def flatten_repetition(list, named) # :nodoc:
100
+ if list.any? { |e| e.instance_of?(Hash) }
101
+ # If keyed subtrees are in the array, we'll want to discard all
102
+ # strings inbetween. To keep them, name them.
103
+ return list.select { |e| e.instance_of?(Hash) }
104
+ end
105
+
106
+ if list.any? { |e| e.instance_of?(Array) }
107
+ # If any arrays are nested in this array, flatten all arrays to this
108
+ # level.
109
+ return list.
110
+ select { |e| e.instance_of?(Array) }.
111
+ flatten(1)
112
+ end
113
+
114
+ # Consistent handling of empty lists, when we act on a named result
115
+ return [] if named && list.empty?
116
+
117
+ # If there are only strings, concatenate them and return that.
118
+ foldl(list) { |s,e| s+e }
119
+ end
120
+
121
+ # That annoying warning 'Duplicate subtrees while merging result' comes
122
+ # from here. You should add more '.as(...)' names to your intermediary tree.
123
+ #
124
+ def warn_about_duplicate_keys(h1, h2)
125
+ d = h1.keys & h2.keys
126
+ unless d.empty?
127
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
128
+ " of the latter will be kept. (keys: #{d.inspect})"
129
+ end
130
+ end
131
+ end
132
+ end