parslet 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,27 @@
3
3
  - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
4
  the win.
5
5
 
6
+ = 1.3.1 / ???
7
+
8
+ = 1.3.0 / 5Mar2012
9
+
10
+ ! Parslet::Transform::Context is now much more well-behaved. It has
11
+ #respond_to? and #method_missing; it now looks like a plain old Ruby
12
+ object with instance variables and attribute readers.
13
+
14
+ - Grammar transforms turned out to be a dead end and have been removed.
15
+
16
+ ! A few problems in error message generation have been fixed. This will
17
+ improve diagnostics further.
18
+
19
+ + A VM driven parser engine: Removes the limitation that parsing needs a
20
+ lot of stack space, something dearly missing from Ruby 1.9.3 fibers.
21
+ This engine is experimental and might be removed in the future.
22
+
23
+ ! Interaction with mathn fixed - Line number generation will terminate.
24
+
25
+ . Internal reorganisation, removing cruft and bit rot.
26
+
6
27
  = 1.2.3 / 22Sep2011
7
28
 
8
29
  + Transform#apply can now be called with a hash as second argument. This
data/README CHANGED
@@ -53,6 +53,6 @@ ruby-1.8.7-p334 for better results.
53
53
 
54
54
  STATUS
55
55
 
56
- At version 1.2.3 - See HISTORY.txt for changes.
56
+ At version 1.3.0 - See HISTORY.txt for changes.
57
57
 
58
58
  (c) 2010 Kaspar Schiess
@@ -0,0 +1,66 @@
1
+ # An example on how to ignore whitespace. Use the composition, luke.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'pp'
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ class AParser < Parslet::Parser
10
+ root :as
11
+
12
+ rule(:as) { a.repeat }
13
+ rule(:a) { str('a').as(:a) }
14
+ end
15
+
16
+ class WsIgnoreSource
17
+ def initialize(string)
18
+ @io = StringIO.new(string)
19
+ @early_eof = nil
20
+ end
21
+
22
+ def pos
23
+ @io.pos
24
+ end
25
+
26
+ def pos=(n)
27
+ @io.pos = n
28
+ end
29
+
30
+ def gets(buf, n)
31
+ return nil if eof?
32
+
33
+ return read(n).tap {
34
+ @early_eof = pos unless can_read?
35
+ }
36
+ end
37
+
38
+ def eof?
39
+ @io.eof? || # the underlying source is EOF
40
+ @early_eof && pos >= @early_eof # we have no non-ws chars left
41
+ end
42
+
43
+ private
44
+
45
+ # Reads n chars from @io.
46
+ def read(n)
47
+ b = ''
48
+ while b.size < n && !@io.eof?
49
+ c = @io.gets(nil, 1)
50
+ b << c unless c == ' '
51
+ end
52
+ b
53
+ end
54
+
55
+ # True if there are any chars left in @io.
56
+ def can_read?
57
+ old_pos = @io.pos
58
+ read(1).size == 1
59
+ rescue => ex
60
+ return false
61
+ ensure
62
+ @io.pos = old_pos
63
+ end
64
+ end
65
+
66
+ pp AParser.new.parse_with_debug(WsIgnoreSource.new('a a a a '))
@@ -0,0 +1,44 @@
1
+ # Demonstrates that we have a compatibility fix to mathn's weird idea of
2
+ # integer mathematics.
3
+ # This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+ include Parslet
10
+
11
+ def attempt_parse
12
+ possible_whitespace = match['\s'].repeat
13
+
14
+ cephalopod =
15
+ str('octopus') |
16
+ str('squid')
17
+
18
+ parenthesized_cephalopod =
19
+ str('(') >>
20
+ possible_whitespace >>
21
+ cephalopod >>
22
+ possible_whitespace >>
23
+ str(')')
24
+
25
+ parser =
26
+ possible_whitespace >>
27
+ parenthesized_cephalopod >>
28
+ possible_whitespace
29
+
30
+ # This parse fails, but that is not the point. When mathn is in the current
31
+ # ruby environment, it modifies integer division in a way that makes
32
+ # parslet loop indefinitely.
33
+ parser.parse %{(\nsqeed)\n}
34
+ rescue Parslet::ParseFailed
35
+ end
36
+
37
+ attempt_parse
38
+ puts 'it terminates before we require mathn'
39
+
40
+ puts "requiring mathn now"
41
+ require 'mathn'
42
+ puts "and trying again (will hang without the fix)"
43
+ attempt_parse # but it doesn't terminate after requiring mathn
44
+ puts "okay!"
@@ -0,0 +1 @@
1
+ [{:a=>"a"@0}, {:a=>"a"@1}, {:a=>"a"@5}, {:a=>"a"@7}]
@@ -1,9 +1,9 @@
1
1
  0.0.0.0 -> {:ipv4=>"0.0.0.0"@0}
2
2
  255.255.255.255 -> {:ipv4=>"255.255.255.255"@0}
3
- 255.255.255 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
3
+ 255.255.255 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1.
4
4
  1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0}
5
5
  12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0}
6
6
  12AD::34FC -> {:ipv6=>"12AD::34FC"@0}
7
7
  12AD:: -> {:ipv6=>"12AD::"@0}
8
8
  :: -> {:ipv6=>"::"@0}
9
- 1:2 -> Failed: Expected one of [IPV4, IPV6]. at line 1 char 1.
9
+ 1:2 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1.
@@ -0,0 +1,4 @@
1
+ it terminates before we require mathn
2
+ requiring mathn now
3
+ and trying again (will hang without the fix)
4
+ okay!
@@ -73,6 +73,11 @@ module Parslet
73
73
  # parslet.parse_with_debug(str)
74
74
  #
75
75
  class ParseFailed < StandardError
76
+ def initialize(message, cause=nil)
77
+ super(message)
78
+ @cause = cause
79
+ end
80
+ attr_reader :cause
76
81
  end
77
82
 
78
83
  # Raised when the parse operation didn't consume all of its input. In this
@@ -224,10 +229,12 @@ module Parslet
224
229
  end
225
230
 
226
231
  require 'parslet/slice'
232
+ require 'parslet/cause'
227
233
  require 'parslet/source'
228
234
  require 'parslet/error_tree'
229
235
  require 'parslet/atoms'
230
236
  require 'parslet/pattern'
231
237
  require 'parslet/pattern/binding'
232
238
  require 'parslet/transform'
233
- require 'parslet/parser'
239
+ require 'parslet/parser'
240
+ require 'parslet/bytecode'
@@ -15,6 +15,7 @@ module Parslet::Atoms
15
15
  OUTER = (prec+=1) # printing is done here.
16
16
  end
17
17
 
18
+ require 'parslet/atoms/can_flatten'
18
19
  require 'parslet/atoms/context'
19
20
  require 'parslet/atoms/dsl'
20
21
  require 'parslet/atoms/base'
@@ -19,7 +19,7 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
19
19
  super()
20
20
 
21
21
  @alternatives = alternatives
22
- @error_msg = "Expected one of #{alternatives.inspect}."
22
+ @error_msg = "Expected one of #{alternatives.inspect}"
23
23
  end
24
24
 
25
25
  #---
@@ -6,6 +6,7 @@
6
6
  class Parslet::Atoms::Base
7
7
  include Parslet::Atoms::Precedence
8
8
  include Parslet::Atoms::DSL
9
+ include Parslet::Atoms::CanFlatten
9
10
 
10
11
  # Internally, all parsing functions return either an instance of Fail
11
12
  # or an instance of Success.
@@ -25,7 +26,23 @@ class Parslet::Atoms::Base
25
26
  # and return a result. If the parse fails, a Parslet::ParseFailed exception
26
27
  # will be thrown.
27
28
  #
28
- def parse(io)
29
+ def parse(io, traditional=true)
30
+ if traditional
31
+ parse_traditional(io)
32
+ else
33
+ parse_vm(io)
34
+ end
35
+ end
36
+
37
+ def parse_vm(io)
38
+ compiler = Parslet::Bytecode::Compiler.new
39
+ program = compiler.compile(self)
40
+
41
+ vm = Parslet::Bytecode::VM.new
42
+ vm.run(program, io)
43
+ end
44
+
45
+ def parse_traditional(io)
29
46
  source = Parslet::Source.new(io)
30
47
  context = Parslet::Atoms::Context.new
31
48
 
@@ -36,7 +53,8 @@ class Parslet::Atoms::Base
36
53
  # Stack trace will be off, but the error tree should explain the reason
37
54
  # it failed.
38
55
  if value.error?
39
- parse_failed(value.message)
56
+ @last_cause = value.message
57
+ @last_cause.raise
40
58
  end
41
59
 
42
60
  # assert: value is a success answer
@@ -48,16 +66,15 @@ class Parslet::Atoms::Base
48
66
  # error to fail with. Otherwise just report that we cannot consume the
49
67
  # input.
50
68
  if cause
51
- # We're not using #parse_failed here, since it assigns to @last_cause.
52
- # Still: We'll raise this differently, since the real cause is different.
69
+ # NOTE We don't overwrite last_cause here.
53
70
  raise Parslet::UnconsumedInput,
54
71
  "Unconsumed input, maybe because of this: #{cause}"
55
72
  else
56
73
  old_pos = source.pos
57
- parse_failed(
58
- format_cause(source,
59
- "Don't know what to do with #{source.read(100)}", old_pos),
60
- Parslet::UnconsumedInput)
74
+ @last_cause = source.error(
75
+ "Don't know what to do with #{source.read(100)}", old_pos)
76
+
77
+ @last_cause.raise(Parslet::UnconsumedInput)
61
78
  end
62
79
  end
63
80
 
@@ -94,110 +111,6 @@ class Parslet::Atoms::Base
94
111
  "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
95
112
  end
96
113
 
97
- # Takes a mixed value coming out of a parslet and converts it to a return
98
- # value for the user by dropping things and merging hashes.
99
- #
100
- # Named is set to true if this result will be embedded in a Hash result from
101
- # naming something using <code>.as(...)</code>. It changes the folding
102
- # semantics of repetition.
103
- #
104
- def flatten(value, named=false) # :nodoc:
105
- # Passes through everything that isn't an array of things
106
- return value unless value.instance_of? Array
107
-
108
- # Extracts the s-expression tag
109
- tag, *tail = value
110
-
111
- # Merges arrays:
112
- result = tail.
113
- map { |e| flatten(e) } # first flatten each element
114
-
115
- case tag
116
- when :sequence
117
- return flatten_sequence(result)
118
- when :maybe
119
- return named ? result.first : result.first || ''
120
- when :repetition
121
- return flatten_repetition(result, named)
122
- end
123
-
124
- fail "BUG: Unknown tag #{tag.inspect}."
125
- end
126
-
127
- # Lisp style fold left where the first element builds the basis for
128
- # an inject.
129
- #
130
- def foldl(list, &block)
131
- return '' if list.empty?
132
- list[1..-1].inject(list.first, &block)
133
- end
134
-
135
- # Flatten results from a sequence of parslets.
136
- #
137
- def flatten_sequence(list) # :nodoc:
138
- foldl(list.compact) { |r, e| # and then merge flat elements
139
- merge_fold(r, e)
140
- }
141
- end
142
- def merge_fold(l, r) # :nodoc:
143
- # equal pairs: merge. ----------------------------------------------------
144
- if l.class == r.class
145
- if l.is_a?(Hash)
146
- warn_about_duplicate_keys(l, r)
147
- return l.merge(r)
148
- else
149
- return l + r
150
- end
151
- end
152
-
153
- # unequal pairs: hoist to same level. ------------------------------------
154
-
155
- # Maybe classes are not equal, but both are stringlike?
156
- if l.respond_to?(:to_str) && r.respond_to?(:to_str)
157
- # if we're merging a String with a Slice, the slice wins.
158
- return r if r.respond_to? :to_slice
159
- return l if l.respond_to? :to_slice
160
-
161
- fail "NOTREACHED: What other stringlike classes are there?"
162
- end
163
-
164
- # special case: If one of them is a string/slice, the other is more important
165
- return l if r.respond_to? :to_str
166
- return r if l.respond_to? :to_str
167
-
168
- # otherwise just create an array for one of them to live in
169
- return l + [r] if r.class == Hash
170
- return [l] + r if l.class == Hash
171
-
172
- fail "Unhandled case when foldr'ing sequence."
173
- end
174
-
175
- # Flatten results from a repetition of a single parslet. named indicates
176
- # whether the user has named the result or not. If the user has named
177
- # the results, we want to leave an empty list alone - otherwise it is
178
- # turned into an empty string.
179
- #
180
- def flatten_repetition(list, named) # :nodoc:
181
- if list.any? { |e| e.instance_of?(Hash) }
182
- # If keyed subtrees are in the array, we'll want to discard all
183
- # strings inbetween. To keep them, name them.
184
- return list.select { |e| e.instance_of?(Hash) }
185
- end
186
-
187
- if list.any? { |e| e.instance_of?(Array) }
188
- # If any arrays are nested in this array, flatten all arrays to this
189
- # level.
190
- return list.
191
- select { |e| e.instance_of?(Array) }.
192
- flatten(1)
193
- end
194
-
195
- # Consistent handling of empty lists, when we act on a named result
196
- return [] if named && list.empty?
197
-
198
- # If there are only strings, concatenate them and return that.
199
- foldl(list) { |s,e| s+e }
200
- end
201
114
 
202
115
  # Debug printing - in Treetop syntax.
203
116
  #
@@ -245,51 +158,7 @@ private
245
158
  # Produces an instance of Fail and returns it.
246
159
  #
247
160
  def error(source, str, pos=nil)
248
- @last_cause = format_cause(source, str, pos)
161
+ @last_cause = source.error(str, pos)
249
162
  Fail.new(@last_cause)
250
163
  end
251
-
252
- # Signals to the outside that the parse has failed. Use this in conjunction
253
- # with #format_cause for nice error messages.
254
- #
255
- def parse_failed(cause, exception_klass=Parslet::ParseFailed)
256
- @last_cause = cause
257
- raise exception_klass,
258
- @last_cause.to_s
259
- end
260
-
261
- # An internal class that allows delaying the construction of error messages
262
- # (as strings) until we really need to print them.
263
- #
264
- class Cause < Struct.new(:message, :source, :pos)
265
- def to_s
266
- line, column = source.line_and_column(pos)
267
- # Allow message to be a list of objects. Join them here, since we now
268
- # really need it.
269
- Array(message).map { |o|
270
- o.respond_to?(:to_slice) ?
271
- o.str.inspect :
272
- o.to_s }.join + " at line #{line} char #{column}."
273
- end
274
- end
275
-
276
- # Appends 'at line ... char ...' to the string given. Use +pos+ to override
277
- # the position of the +source+. This method returns an object that can
278
- # be turned into a string using #to_s.
279
- #
280
- def format_cause(source, str, pos=nil)
281
- real_pos = (pos||source.pos)
282
- Cause.new(str, source, real_pos)
283
- end
284
-
285
- # That annoying warning 'Duplicate subtrees while merging result' comes
286
- # from here. You should add more '.as(...)' names to your intermediary tree.
287
- #
288
- def warn_about_duplicate_keys(h1, h2)
289
- d = h1.keys & h2.keys
290
- unless d.empty?
291
- warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
292
- " of the latter will be kept. (keys: #{d.inspect})"
293
- end
294
- end
295
164
  end
@@ -0,0 +1,132 @@
1
+
2
+ module Parslet::Atoms
3
+ # A series of helper functions that have the common topic of flattening
4
+ # result values into the intermediary tree that consists of Ruby Hashes and
5
+ # Arrays.
6
+ #
7
+ # This module has one main function, #flatten, that takes an annotated
8
+ # structure as input and returns the reduced form that users expect from
9
+ # Atom#parse.
10
+ #
11
+ # NOTE: Since all of these functions are just that, functions without
12
+ # side effects, they are in a module and not in a class. Its hard to draw
13
+ # the line sometimes, but this is beyond.
14
+ #
15
+ module CanFlatten
16
+ # Takes a mixed value coming out of a parslet and converts it to a return
17
+ # value for the user by dropping things and merging hashes.
18
+ #
19
+ # Named is set to true if this result will be embedded in a Hash result from
20
+ # naming something using <code>.as(...)</code>. It changes the folding
21
+ # semantics of repetition.
22
+ #
23
+ def flatten(value, named=false) # :nodoc:
24
+ # Passes through everything that isn't an array of things
25
+ return value unless value.instance_of? Array
26
+
27
+ # Extracts the s-expression tag
28
+ tag, *tail = value
29
+
30
+ # Merges arrays:
31
+ result = tail.
32
+ map { |e| flatten(e) } # first flatten each element
33
+
34
+ case tag
35
+ when :sequence
36
+ return flatten_sequence(result)
37
+ when :maybe
38
+ return named ? result.first : result.first || ''
39
+ when :repetition
40
+ return flatten_repetition(result, named)
41
+ end
42
+
43
+ fail "BUG: Unknown tag #{tag.inspect}."
44
+ end
45
+
46
+ # Lisp style fold left where the first element builds the basis for
47
+ # an inject.
48
+ #
49
+ def foldl(list, &block)
50
+ return '' if list.empty?
51
+ list[1..-1].inject(list.first, &block)
52
+ end
53
+
54
+ # Flatten results from a sequence of parslets.
55
+ #
56
+ def flatten_sequence(list) # :nodoc:
57
+ foldl(list.compact) { |r, e| # and then merge flat elements
58
+ merge_fold(r, e)
59
+ }
60
+ end
61
+ def merge_fold(l, r) # :nodoc:
62
+ # equal pairs: merge. ----------------------------------------------------
63
+ if l.class == r.class
64
+ if l.is_a?(Hash)
65
+ warn_about_duplicate_keys(l, r)
66
+ return l.merge(r)
67
+ else
68
+ return l + r
69
+ end
70
+ end
71
+
72
+ # unequal pairs: hoist to same level. ------------------------------------
73
+
74
+ # Maybe classes are not equal, but both are stringlike?
75
+ if l.respond_to?(:to_str) && r.respond_to?(:to_str)
76
+ # if we're merging a String with a Slice, the slice wins.
77
+ return r if r.respond_to? :to_slice
78
+ return l if l.respond_to? :to_slice
79
+
80
+ fail "NOTREACHED: What other stringlike classes are there?"
81
+ end
82
+
83
+ # special case: If one of them is a string/slice, the other is more important
84
+ return l if r.respond_to? :to_str
85
+ return r if l.respond_to? :to_str
86
+
87
+ # otherwise just create an array for one of them to live in
88
+ return l + [r] if r.class == Hash
89
+ return [l] + r if l.class == Hash
90
+
91
+ fail "Unhandled case when foldr'ing sequence."
92
+ end
93
+
94
+ # Flatten results from a repetition of a single parslet. named indicates
95
+ # whether the user has named the result or not. If the user has named
96
+ # the results, we want to leave an empty list alone - otherwise it is
97
+ # turned into an empty string.
98
+ #
99
+ def flatten_repetition(list, named) # :nodoc:
100
+ if list.any? { |e| e.instance_of?(Hash) }
101
+ # If keyed subtrees are in the array, we'll want to discard all
102
+ # strings inbetween. To keep them, name them.
103
+ return list.select { |e| e.instance_of?(Hash) }
104
+ end
105
+
106
+ if list.any? { |e| e.instance_of?(Array) }
107
+ # If any arrays are nested in this array, flatten all arrays to this
108
+ # level.
109
+ return list.
110
+ select { |e| e.instance_of?(Array) }.
111
+ flatten(1)
112
+ end
113
+
114
+ # Consistent handling of empty lists, when we act on a named result
115
+ return [] if named && list.empty?
116
+
117
+ # If there are only strings, concatenate them and return that.
118
+ foldl(list) { |s,e| s+e }
119
+ end
120
+
121
+ # That annoying warning 'Duplicate subtrees while merging result' comes
122
+ # from here. You should add more '.as(...)' names to your intermediary tree.
123
+ #
124
+ def warn_about_duplicate_keys(h1, h2)
125
+ d = h1.keys & h2.keys
126
+ unless d.empty?
127
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
128
+ " of the latter will be kept. (keys: #{d.inspect})"
129
+ end
130
+ end
131
+ end
132
+ end