parslet 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,9 +14,10 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
14
14
  # Model positive and negative lookahead by testing this flag.
15
15
  @positive = positive
16
16
  @bound_parslet = bound_parslet
17
+
17
18
  @error_msgs = {
18
- :positive => "lookahead: #{bound_parslet.inspect} didn't match, but should have",
19
- :negative => "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have"
19
+ :positive => ["Input should start with ", bound_parslet],
20
+ :negative => ["Input should not start with ", bound_parslet]
20
21
  }
21
22
  end
22
23
 
@@ -26,8 +27,8 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
26
27
  value = bound_parslet.apply(source, context)
27
28
  return success(nil) if positive ^ value.error?
28
29
 
29
- return error(source, @error_msgs[:positive]) if positive
30
- return error(source, @error_msgs[:negative])
30
+ return error(source, @error_msgs[:positive], pos) if positive
31
+ return error(source, @error_msgs[:negative], pos)
31
32
 
32
33
  # This is probably the only parslet that rewinds its input in #try.
33
34
  # Lookaheads NEVER consume their input, even on success, that's why.
@@ -41,8 +42,4 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
41
42
 
42
43
  "#{char}#{bound_parslet.to_s(prec)}"
43
44
  end
44
-
45
- def error_tree # :nodoc:
46
- bound_parslet.error_tree
47
- end
48
45
  end
@@ -21,7 +21,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
21
21
  # contents of parslets inner loop. Changes here affect parslets speed
22
22
  # enormously.
23
23
  error_pos = source.pos
24
- s = source.read(str.size)
24
+ s = source.read(str.bytesize)
25
25
 
26
26
  return success(s) if s == str
27
27
 
@@ -10,7 +10,7 @@ module Parslet::Atoms
10
10
  end
11
11
 
12
12
  class Str
13
- # Call back visitors #str method. See parslet/export for an example.
13
+ # Call back visitors #visit_str method. See parslet/export for an example.
14
14
  #
15
15
  def accept(visitor)
16
16
  visitor.visit_str(str)
@@ -18,7 +18,8 @@ module Parslet::Atoms
18
18
  end
19
19
 
20
20
  class Entity
21
- # Call back visitors #entity method. See parslet/export for an example.
21
+ # Call back visitors #visit_entity method. See parslet/export for an
22
+ # example.
22
23
  #
23
24
  def accept(visitor)
24
25
  visitor.visit_entity(name, block)
@@ -26,7 +27,8 @@ module Parslet::Atoms
26
27
  end
27
28
 
28
29
  class Named
29
- # Call back visitors #named method. See parslet/export for an example.
30
+ # Call back visitors #visit_named method. See parslet/export for an
31
+ # example.
30
32
  #
31
33
  def accept(visitor)
32
34
  visitor.visit_named(name, parslet)
@@ -34,7 +36,8 @@ module Parslet::Atoms
34
36
  end
35
37
 
36
38
  class Sequence
37
- # Call back visitors #sequence method. See parslet/export for an example.
39
+ # Call back visitors #visit_sequence method. See parslet/export for an
40
+ # example.
38
41
  #
39
42
  def accept(visitor)
40
43
  visitor.visit_sequence(parslets)
@@ -42,15 +45,17 @@ module Parslet::Atoms
42
45
  end
43
46
 
44
47
  class Repetition
45
- # Call back visitors #repetition method. See parslet/export for an example.
48
+ # Call back visitors #visit_repetition method. See parslet/export for an
49
+ # example.
46
50
  #
47
51
  def accept(visitor)
48
- visitor.visit_repetition(min, max, parslet)
52
+ visitor.visit_repetition(@tag, min, max, parslet)
49
53
  end
50
54
  end
51
55
 
52
56
  class Alternative
53
- # Call back visitors #alternative method. See parslet/export for an example.
57
+ # Call back visitors #visit_alternative method. See parslet/export for an
58
+ # example.
54
59
  #
55
60
  def accept(visitor)
56
61
  visitor.visit_alternative(alternatives)
@@ -58,7 +63,8 @@ module Parslet::Atoms
58
63
  end
59
64
 
60
65
  class Lookahead
61
- # Call back visitors #lookahead method. See parslet/export for an example.
66
+ # Call back visitors #visit_lookahead method. See parslet/export for an
67
+ # example.
62
68
  #
63
69
  def accept(visitor)
64
70
  visitor.visit_lookahead(positive, bound_parslet)
@@ -66,10 +72,18 @@ module Parslet::Atoms
66
72
  end
67
73
 
68
74
  class Re
69
- # Call back visitors #re method. See parslet/export for an example.
75
+ # Call back visitors #visit_re method. See parslet/export for an example.
70
76
  #
71
77
  def accept(visitor)
72
78
  visitor.visit_re(match)
73
79
  end
74
80
  end
75
81
  end
82
+
83
+ class Parslet::Parser
84
+ # Call back visitors #visit_parser method.
85
+ #
86
+ def accept(visitor)
87
+ visitor.visit_parser(root)
88
+ end
89
+ end
@@ -0,0 +1,6 @@
1
+ module Parslet::Bytecode
2
+ end
3
+
4
+ require 'parslet/bytecode/instructions'
5
+ require 'parslet/bytecode/compiler'
6
+ require 'parslet/bytecode/vm'
@@ -0,0 +1,138 @@
1
+ require 'parslet/atoms/visitor'
2
+
3
+ module Parslet::Bytecode
4
+ class Compiler
5
+ def initialize
6
+ @buffer = []
7
+ @blocks = Hash.new
8
+ end
9
+
10
+ class Address
11
+ attr_reader :address
12
+ def initialize(address=nil)
13
+ @address = address
14
+ end
15
+ def resolve(vm)
16
+ @address = vm.buffer_pointer
17
+ end
18
+ def inspect
19
+ "@#{@address}"
20
+ end
21
+ def to_s
22
+ "@#{address}"
23
+ end
24
+ end
25
+ class Block
26
+ def initialize(name, block, compiler)
27
+ @name = name
28
+ @block = block
29
+ @compiler = compiler
30
+ end
31
+ def address
32
+ return @address if @address
33
+
34
+ # Actual compilation:
35
+
36
+ # TODO raise not implemented if the block returns nil (see Entity)
37
+ @address = @compiler.current_address
38
+ atom.accept(@compiler)
39
+ @compiler.add Return.new
40
+
41
+ return @address
42
+ end
43
+ def atom
44
+ @atom ||= @block.call
45
+ end
46
+ end
47
+
48
+ def compile(atom)
49
+ atom.accept(self)
50
+ add Stop.new
51
+ @buffer
52
+ end
53
+ def add(instruction)
54
+ @buffer << instruction
55
+ end
56
+
57
+ def fwd_address
58
+ Address.new
59
+ end
60
+ def current_address
61
+ Address.new(buffer_pointer)
62
+ end
63
+ def buffer_pointer
64
+ @buffer.size
65
+ end
66
+
67
+ def visit_str(str)
68
+ add Match.new(str)
69
+ end
70
+ def visit_re(match)
71
+ add Re.new(match, 1)
72
+ end
73
+ def visit_sequence(parslets)
74
+ emit_block do
75
+ sequence = Parslet::Atoms::Sequence.new(*parslets)
76
+ error_msg = "Failed to match sequence (#{sequence.inspect})"
77
+
78
+ end_adr = fwd_address
79
+ parslets.each_with_index do |atom, idx|
80
+ atom.accept(self)
81
+ add CheckSequence.new(idx, end_adr, error_msg)
82
+ end
83
+
84
+ add PackSequence.new(parslets.size)
85
+
86
+ end_adr.resolve(self)
87
+ end
88
+ end
89
+ def visit_alternative(alternatives)
90
+ emit_block do
91
+ adr_end = fwd_address
92
+
93
+ add EnterFrame.new
94
+ add PushPos.new
95
+ alternatives.each_with_index do |alternative, idx|
96
+ alternative.accept(self)
97
+ add BranchOnSuccess.new(adr_end, idx)
98
+ end
99
+ add Fail.new(["Expected one of ", alternatives.inspect], alternatives.size)
100
+
101
+ adr_end.resolve(self)
102
+ end
103
+ end
104
+ def visit_repetition(tag, min, max, parslet)
105
+ add SetupRepeat.new(tag)
106
+ start = current_address
107
+ parslet.accept(self)
108
+ add Repeat.new(min, max, start, parslet)
109
+ end
110
+ def visit_named(name, parslet)
111
+ parslet.accept(self)
112
+ add Box.new(name)
113
+ end
114
+ def visit_lookahead(positive, parslet)
115
+ add PushPos.new
116
+ parslet.accept(self)
117
+ add CheckAndReset.new(positive, parslet)
118
+ end
119
+ def visit_entity(name, block)
120
+ @blocks[name] ||= Block.new(name, block, self)
121
+ add CallBlock.new(@blocks[name])
122
+ end
123
+ def visit_parser(root)
124
+ root.accept(self)
125
+ end
126
+
127
+ def emit_block
128
+ end_adr = fwd_address
129
+ cache_adr = current_address
130
+ add CheckCache.new(end_adr)
131
+
132
+ yield
133
+
134
+ add StoreResult.new(cache_adr)
135
+ end_adr.resolve(self)
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,358 @@
1
+ module Parslet::Bytecode
2
+ # Matches the string and pushes the result on the stack (looks like the
3
+ # string, but is really the slice that was matched).
4
+ #
5
+ Match = Struct.new(:str) do
6
+ def initialize(str)
7
+ super
8
+ @mismatch_error_prefix = "Expected #{str.inspect}, but got "
9
+ end
10
+
11
+ def to_s
12
+ "MATCH #{str.inspect}"
13
+ end
14
+
15
+ def run(vm)
16
+ source = vm.source
17
+ error_pos = source.pos
18
+ s = source.read(str.bytesize)
19
+
20
+ if s.size != str.size
21
+ source.pos = error_pos
22
+ vm.set_error source.error("Premature end of input")
23
+ else
24
+ if s == str
25
+ vm.push(s)
26
+ else
27
+ source.pos = error_pos
28
+ vm.set_error source.error([@mismatch_error_prefix, s])
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Re = Struct.new(:re, :size) do
35
+ def initialize(re, size)
36
+ super
37
+ @failure = "Failed to match #{re.inspect[1..-2]}"
38
+ end
39
+
40
+ def to_s
41
+ "RE #{re.inspect}, #{size}"
42
+ end
43
+
44
+ def run(vm)
45
+ source = vm.source
46
+
47
+ error_pos = source.pos
48
+ s = source.read(size)
49
+
50
+ if s.size != size
51
+ source.pos = error_pos
52
+ vm.set_error source.error("Premature end of input")
53
+ return
54
+ end
55
+
56
+ if !s.match(re)
57
+ source.pos = error_pos
58
+ vm.set_error source.error(@failure)
59
+ return
60
+ end
61
+
62
+ vm.push s
63
+ end
64
+ end
65
+
66
+ SetupRepeat = Struct.new(:tag) do
67
+ def run(vm)
68
+ vm.push vm.source.pos
69
+ vm.push 0 # occurrences
70
+ vm.push [tag] # will collect results
71
+ end
72
+
73
+ def to_s
74
+ "STPRE #{tag.inspect}"
75
+ end
76
+ end
77
+
78
+ # Repeat matching with a minimum of min and a maximum of max times.
79
+ #
80
+ Repeat = Struct.new(:min, :max, :adr, :parslet) do
81
+ def initialize(*args)
82
+ super
83
+
84
+ @minrep_error = ["Expected at least #{min} of ", parslet]
85
+ end
86
+ def to_s
87
+ "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
88
+ end
89
+ def run(vm)
90
+ source = vm.source
91
+ start_position = source.pos
92
+
93
+ unless vm.success?
94
+ pos, occurrences, accumulator = vm.pop(3)
95
+ source.pos = pos
96
+
97
+ # We've encountered an error. Are we still below the minimum number of
98
+ # matches?
99
+ if occurrences < min
100
+ error = source.error(@minrep_error, pos)
101
+ error.children << vm.error
102
+ vm.set_error error
103
+ return
104
+ end
105
+
106
+ # assert: occurrences >= min
107
+
108
+ # We've matched the minimum number required, so this is a success:
109
+ vm.clear_error
110
+ vm.push accumulator
111
+ return
112
+ end
113
+
114
+ # assert: vm.success?
115
+
116
+ result = vm.pop
117
+ pos, occurrences, accumulator = vm.pop(3)
118
+
119
+ accumulator << result
120
+ occurrences += 1
121
+
122
+ # All went well but we have reached our maximum?
123
+ if max && occurrences >= max
124
+ # We're done! Push the result.
125
+ vm.push accumulator
126
+ return
127
+ end
128
+
129
+ # No maximum was set or it was not reached. Continue matching.
130
+ vm.push vm.source.pos
131
+ vm.push occurrences
132
+ vm.push accumulator
133
+ vm.jump adr
134
+ end
135
+ end
136
+
137
+ # Checks if a sequence must be aborted early because of a parse failure.
138
+ # Cleans up the stack and jumps after the sequence, having set error.
139
+ #
140
+ CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
141
+ def run(vm)
142
+ unless vm.success?
143
+ vm.pop(cleanup_items)
144
+
145
+ cause = vm.source.error(error)
146
+ cause.children << vm.error
147
+ vm.set_error cause
148
+ vm.jump(adr)
149
+ end
150
+ end
151
+ def to_s
152
+ "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
153
+ end
154
+ end
155
+
156
+ # Packs size stack elements into an array that is prefixed with the
157
+ # :sequence tag. This will later be converted by #flatten
158
+ #
159
+ PackSequence = Struct.new(:size) do
160
+ def run(vm)
161
+ source = vm.source
162
+
163
+ fail "Sequence runs into PackSequence with error flag set!" \
164
+ unless vm.success?
165
+
166
+ elts = vm.pop(size)
167
+ vm.push [:sequence, *elts]
168
+ end
169
+ def to_s
170
+ "PACK #{size}"
171
+ end
172
+ end
173
+
174
+ # Enters a new stack frame that can be discarded with vm.discard_frame. This
175
+ # helps in situations where you need to pop a state that you don't know the
176
+ # size of.
177
+ #
178
+ EnterFrame = Class.new do
179
+ def run(vm)
180
+ vm.enter_frame
181
+ end
182
+ def to_s
183
+ "ENTER"
184
+ end
185
+ end
186
+
187
+ # Fails at this point with the given error message. Size indicates how many
188
+ # different alternatives should have generated an error message on the
189
+ # stack.
190
+ #
191
+ Fail = Struct.new(:message, :size) do
192
+ def run(vm)
193
+ children = vm.pop(size)
194
+ error = vm.source.error(message)
195
+ error.children.replace(children)
196
+
197
+ # Clean up the stack frames:
198
+ vm.discard_frame
199
+
200
+ vm.set_error error
201
+ end
202
+ def to_s
203
+ "FAIL #{message}, #{size}"
204
+ end
205
+ end
206
+
207
+ # If the vm.success? is true, branches to the given address.
208
+ #
209
+ BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
210
+ def run(vm)
211
+ source = vm.source
212
+ if vm.success?
213
+ # Stack will look like this:
214
+ # (n*) previous failures
215
+ # successful match
216
+ # So we pop the match, discard the failures and push the success
217
+ # again. This way, it looks like a success should look.
218
+ value = vm.pop
219
+ vm.discard_frame
220
+ vm.push value
221
+
222
+ vm.jump(adr)
223
+ else
224
+ # Otherwise, clear the error and try the alternative that comes
225
+ # right here in the byte code.
226
+
227
+ # We need to reset the source.pos to what it was before starting on
228
+ # one of several alternatives:
229
+ source.pos = vm.value_at(pos_ptr)
230
+
231
+ # Push the error as if it were a value. If all branches fail, this can
232
+ # be used to create a complete error trace. If not, VM#discard_frame
233
+ # will take care of those.
234
+ vm.push vm.error
235
+
236
+ vm.clear_error
237
+ end
238
+ end
239
+ def to_s
240
+ "BRSUC #{adr}, #{pos_ptr}"
241
+ end
242
+ end
243
+
244
+ # Boxes a value inside a name tag.
245
+ #
246
+ # Consumes: parslet result
247
+ # Pushes: boxed result
248
+ #
249
+ Box = Struct.new(:name) do
250
+ def run(vm)
251
+ if vm.success?
252
+ result = vm.pop
253
+ vm.push(name => result)
254
+ end
255
+ end
256
+ def to_s
257
+ "BOX #{name.inspect}"
258
+ end
259
+ end
260
+
261
+ # Pushes the current source pos to the stack.
262
+ #
263
+ # Consumes: Nothing
264
+ # Pushes: the current source.pos
265
+ #
266
+ PushPos = Class.new do
267
+ def run(vm)
268
+ source = vm.source
269
+ vm.push source.pos
270
+ end
271
+ def to_s
272
+ "PSHPS"
273
+ end
274
+ end
275
+
276
+ # Assumes that the stack contains the result of a parslet and above it
277
+ # the source position from before parsing that parslet (as per PushPos).
278
+ # Will remove both and leave the vm in a state that indicates the result
279
+ # of a lookahead, stack will be nil (no capture) and the error flag will
280
+ # be set.
281
+ #
282
+ # Consumes: VM state, source.pos
283
+ # Pushes: VM.state
284
+ # Effects: resets source.pos
285
+ #
286
+ CheckAndReset = Struct.new(:positive, :parslet) do
287
+ def run(vm)
288
+ source = vm.source
289
+
290
+ vm.pop if vm.success?
291
+
292
+ # Retrieve the parse position from before attempting to match the
293
+ # parslet.
294
+ start_pos = vm.pop
295
+ source.pos = start_pos
296
+
297
+ if positive && vm.success? || !positive && !vm.success?
298
+ vm.clear_error
299
+ vm.push nil
300
+ else
301
+ error_msg = positive ?
302
+ ["Input should start with ", parslet] :
303
+ ["Input should not start with ", parslet]
304
+ vm.set_error source.error(error_msg, start_pos)
305
+ end
306
+ end
307
+ def to_s
308
+ "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
309
+ end
310
+ end
311
+
312
+ # Compiles the block or 'calls' the subroutine that was compiled earlier.
313
+ #
314
+ CallBlock = Struct.new(:block) do
315
+ def run(vm)
316
+ vm.call(block.address)
317
+ end
318
+ def to_s
319
+ "LCALL #{block.address} (was atom<#{block.atom}>)"
320
+ end
321
+ end
322
+ Return = Class.new do
323
+ def run(vm)
324
+ vm.call_ret
325
+ end
326
+ def to_s
327
+ "RETRN"
328
+ end
329
+ end
330
+ Stop = Class.new do
331
+ def run(vm)
332
+ vm.stop
333
+ end
334
+ def to_s
335
+ "STPVM"
336
+ end
337
+ end
338
+
339
+ # Caching
340
+ CheckCache = Struct.new(:skip_adr) do
341
+ def run(vm)
342
+ return if vm.access_cache(skip_adr)
343
+
344
+ vm.push vm.source.pos
345
+ end
346
+ def to_s
347
+ "RETCA #{skip_adr}"
348
+ end
349
+ end
350
+ StoreResult = Struct.new(:adr) do
351
+ def run(vm)
352
+ vm.store_cache(adr)
353
+ end
354
+ def to_s
355
+ "STOCA #{adr}"
356
+ end
357
+ end
358
+ end