parslet 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,9 +14,10 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
14
14
  # Model positive and negative lookahead by testing this flag.
15
15
  @positive = positive
16
16
  @bound_parslet = bound_parslet
17
+
17
18
  @error_msgs = {
18
- :positive => "lookahead: #{bound_parslet.inspect} didn't match, but should have",
19
- :negative => "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have"
19
+ :positive => ["Input should start with ", bound_parslet],
20
+ :negative => ["Input should not start with ", bound_parslet]
20
21
  }
21
22
  end
22
23
 
@@ -26,8 +27,8 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
26
27
  value = bound_parslet.apply(source, context)
27
28
  return success(nil) if positive ^ value.error?
28
29
 
29
- return error(source, @error_msgs[:positive]) if positive
30
- return error(source, @error_msgs[:negative])
30
+ return error(source, @error_msgs[:positive], pos) if positive
31
+ return error(source, @error_msgs[:negative], pos)
31
32
 
32
33
  # This is probably the only parslet that rewinds its input in #try.
33
34
  # Lookaheads NEVER consume their input, even on success, that's why.
@@ -41,8 +42,4 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
41
42
 
42
43
  "#{char}#{bound_parslet.to_s(prec)}"
43
44
  end
44
-
45
- def error_tree # :nodoc:
46
- bound_parslet.error_tree
47
- end
48
45
  end
@@ -21,7 +21,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
21
21
  # contents of parslets inner loop. Changes here affect parslets speed
22
22
  # enormously.
23
23
  error_pos = source.pos
24
- s = source.read(str.size)
24
+ s = source.read(str.bytesize)
25
25
 
26
26
  return success(s) if s == str
27
27
 
@@ -10,7 +10,7 @@ module Parslet::Atoms
10
10
  end
11
11
 
12
12
  class Str
13
- # Call back visitors #str method. See parslet/export for an example.
13
+ # Call back visitors #visit_str method. See parslet/export for an example.
14
14
  #
15
15
  def accept(visitor)
16
16
  visitor.visit_str(str)
@@ -18,7 +18,8 @@ module Parslet::Atoms
18
18
  end
19
19
 
20
20
  class Entity
21
- # Call back visitors #entity method. See parslet/export for an example.
21
+ # Call back visitors #visit_entity method. See parslet/export for an
22
+ # example.
22
23
  #
23
24
  def accept(visitor)
24
25
  visitor.visit_entity(name, block)
@@ -26,7 +27,8 @@ module Parslet::Atoms
26
27
  end
27
28
 
28
29
  class Named
29
- # Call back visitors #named method. See parslet/export for an example.
30
+ # Call back visitors #visit_named method. See parslet/export for an
31
+ # example.
30
32
  #
31
33
  def accept(visitor)
32
34
  visitor.visit_named(name, parslet)
@@ -34,7 +36,8 @@ module Parslet::Atoms
34
36
  end
35
37
 
36
38
  class Sequence
37
- # Call back visitors #sequence method. See parslet/export for an example.
39
+ # Call back visitors #visit_sequence method. See parslet/export for an
40
+ # example.
38
41
  #
39
42
  def accept(visitor)
40
43
  visitor.visit_sequence(parslets)
@@ -42,15 +45,17 @@ module Parslet::Atoms
42
45
  end
43
46
 
44
47
  class Repetition
45
- # Call back visitors #repetition method. See parslet/export for an example.
48
+ # Call back visitors #visit_repetition method. See parslet/export for an
49
+ # example.
46
50
  #
47
51
  def accept(visitor)
48
- visitor.visit_repetition(min, max, parslet)
52
+ visitor.visit_repetition(@tag, min, max, parslet)
49
53
  end
50
54
  end
51
55
 
52
56
  class Alternative
53
- # Call back visitors #alternative method. See parslet/export for an example.
57
+ # Call back visitors #visit_alternative method. See parslet/export for an
58
+ # example.
54
59
  #
55
60
  def accept(visitor)
56
61
  visitor.visit_alternative(alternatives)
@@ -58,7 +63,8 @@ module Parslet::Atoms
58
63
  end
59
64
 
60
65
  class Lookahead
61
- # Call back visitors #lookahead method. See parslet/export for an example.
66
+ # Call back visitors #visit_lookahead method. See parslet/export for an
67
+ # example.
62
68
  #
63
69
  def accept(visitor)
64
70
  visitor.visit_lookahead(positive, bound_parslet)
@@ -66,10 +72,18 @@ module Parslet::Atoms
66
72
  end
67
73
 
68
74
  class Re
69
- # Call back visitors #re method. See parslet/export for an example.
75
+ # Call back visitors #visit_re method. See parslet/export for an example.
70
76
  #
71
77
  def accept(visitor)
72
78
  visitor.visit_re(match)
73
79
  end
74
80
  end
75
81
  end
82
+
83
+ class Parslet::Parser
84
+ # Call back visitors #visit_parser method.
85
+ #
86
+ def accept(visitor)
87
+ visitor.visit_parser(root)
88
+ end
89
+ end
@@ -0,0 +1,6 @@
1
+ module Parslet::Bytecode
2
+ end
3
+
4
+ require 'parslet/bytecode/instructions'
5
+ require 'parslet/bytecode/compiler'
6
+ require 'parslet/bytecode/vm'
@@ -0,0 +1,138 @@
1
+ require 'parslet/atoms/visitor'
2
+
3
+ module Parslet::Bytecode
4
+ class Compiler
5
+ def initialize
6
+ @buffer = []
7
+ @blocks = Hash.new
8
+ end
9
+
10
+ class Address
11
+ attr_reader :address
12
+ def initialize(address=nil)
13
+ @address = address
14
+ end
15
+ def resolve(vm)
16
+ @address = vm.buffer_pointer
17
+ end
18
+ def inspect
19
+ "@#{@address}"
20
+ end
21
+ def to_s
22
+ "@#{address}"
23
+ end
24
+ end
25
+ class Block
26
+ def initialize(name, block, compiler)
27
+ @name = name
28
+ @block = block
29
+ @compiler = compiler
30
+ end
31
+ def address
32
+ return @address if @address
33
+
34
+ # Actual compilation:
35
+
36
+ # TODO raise not implemented if the block returns nil (see Entity)
37
+ @address = @compiler.current_address
38
+ atom.accept(@compiler)
39
+ @compiler.add Return.new
40
+
41
+ return @address
42
+ end
43
+ def atom
44
+ @atom ||= @block.call
45
+ end
46
+ end
47
+
48
+ def compile(atom)
49
+ atom.accept(self)
50
+ add Stop.new
51
+ @buffer
52
+ end
53
+ def add(instruction)
54
+ @buffer << instruction
55
+ end
56
+
57
+ def fwd_address
58
+ Address.new
59
+ end
60
+ def current_address
61
+ Address.new(buffer_pointer)
62
+ end
63
+ def buffer_pointer
64
+ @buffer.size
65
+ end
66
+
67
+ def visit_str(str)
68
+ add Match.new(str)
69
+ end
70
+ def visit_re(match)
71
+ add Re.new(match, 1)
72
+ end
73
+ def visit_sequence(parslets)
74
+ emit_block do
75
+ sequence = Parslet::Atoms::Sequence.new(*parslets)
76
+ error_msg = "Failed to match sequence (#{sequence.inspect})"
77
+
78
+ end_adr = fwd_address
79
+ parslets.each_with_index do |atom, idx|
80
+ atom.accept(self)
81
+ add CheckSequence.new(idx, end_adr, error_msg)
82
+ end
83
+
84
+ add PackSequence.new(parslets.size)
85
+
86
+ end_adr.resolve(self)
87
+ end
88
+ end
89
+ def visit_alternative(alternatives)
90
+ emit_block do
91
+ adr_end = fwd_address
92
+
93
+ add EnterFrame.new
94
+ add PushPos.new
95
+ alternatives.each_with_index do |alternative, idx|
96
+ alternative.accept(self)
97
+ add BranchOnSuccess.new(adr_end, idx)
98
+ end
99
+ add Fail.new(["Expected one of ", alternatives.inspect], alternatives.size)
100
+
101
+ adr_end.resolve(self)
102
+ end
103
+ end
104
+ def visit_repetition(tag, min, max, parslet)
105
+ add SetupRepeat.new(tag)
106
+ start = current_address
107
+ parslet.accept(self)
108
+ add Repeat.new(min, max, start, parslet)
109
+ end
110
+ def visit_named(name, parslet)
111
+ parslet.accept(self)
112
+ add Box.new(name)
113
+ end
114
+ def visit_lookahead(positive, parslet)
115
+ add PushPos.new
116
+ parslet.accept(self)
117
+ add CheckAndReset.new(positive, parslet)
118
+ end
119
+ def visit_entity(name, block)
120
+ @blocks[name] ||= Block.new(name, block, self)
121
+ add CallBlock.new(@blocks[name])
122
+ end
123
+ def visit_parser(root)
124
+ root.accept(self)
125
+ end
126
+
127
+ def emit_block
128
+ end_adr = fwd_address
129
+ cache_adr = current_address
130
+ add CheckCache.new(end_adr)
131
+
132
+ yield
133
+
134
+ add StoreResult.new(cache_adr)
135
+ end_adr.resolve(self)
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,358 @@
1
+ module Parslet::Bytecode
2
+ # Matches the string and pushes the result on the stack (looks like the
3
+ # string, but is really the slice that was matched).
4
+ #
5
+ Match = Struct.new(:str) do
6
+ def initialize(str)
7
+ super
8
+ @mismatch_error_prefix = "Expected #{str.inspect}, but got "
9
+ end
10
+
11
+ def to_s
12
+ "MATCH #{str.inspect}"
13
+ end
14
+
15
+ def run(vm)
16
+ source = vm.source
17
+ error_pos = source.pos
18
+ s = source.read(str.bytesize)
19
+
20
+ if s.size != str.size
21
+ source.pos = error_pos
22
+ vm.set_error source.error("Premature end of input")
23
+ else
24
+ if s == str
25
+ vm.push(s)
26
+ else
27
+ source.pos = error_pos
28
+ vm.set_error source.error([@mismatch_error_prefix, s])
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Re = Struct.new(:re, :size) do
35
+ def initialize(re, size)
36
+ super
37
+ @failure = "Failed to match #{re.inspect[1..-2]}"
38
+ end
39
+
40
+ def to_s
41
+ "RE #{re.inspect}, #{size}"
42
+ end
43
+
44
+ def run(vm)
45
+ source = vm.source
46
+
47
+ error_pos = source.pos
48
+ s = source.read(size)
49
+
50
+ if s.size != size
51
+ source.pos = error_pos
52
+ vm.set_error source.error("Premature end of input")
53
+ return
54
+ end
55
+
56
+ if !s.match(re)
57
+ source.pos = error_pos
58
+ vm.set_error source.error(@failure)
59
+ return
60
+ end
61
+
62
+ vm.push s
63
+ end
64
+ end
65
+
66
+ SetupRepeat = Struct.new(:tag) do
67
+ def run(vm)
68
+ vm.push vm.source.pos
69
+ vm.push 0 # occurrences
70
+ vm.push [tag] # will collect results
71
+ end
72
+
73
+ def to_s
74
+ "STPRE #{tag.inspect}"
75
+ end
76
+ end
77
+
78
+ # Repeat matching with a minimum of min and a maximum of max times.
79
+ #
80
+ Repeat = Struct.new(:min, :max, :adr, :parslet) do
81
+ def initialize(*args)
82
+ super
83
+
84
+ @minrep_error = ["Expected at least #{min} of ", parslet]
85
+ end
86
+ def to_s
87
+ "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
88
+ end
89
+ def run(vm)
90
+ source = vm.source
91
+ start_position = source.pos
92
+
93
+ unless vm.success?
94
+ pos, occurrences, accumulator = vm.pop(3)
95
+ source.pos = pos
96
+
97
+ # We've encountered an error. Are we still below the minimum number of
98
+ # matches?
99
+ if occurrences < min
100
+ error = source.error(@minrep_error, pos)
101
+ error.children << vm.error
102
+ vm.set_error error
103
+ return
104
+ end
105
+
106
+ # assert: occurrences >= min
107
+
108
+ # We've matched the minimum number required, so this is a success:
109
+ vm.clear_error
110
+ vm.push accumulator
111
+ return
112
+ end
113
+
114
+ # assert: vm.success?
115
+
116
+ result = vm.pop
117
+ pos, occurrences, accumulator = vm.pop(3)
118
+
119
+ accumulator << result
120
+ occurrences += 1
121
+
122
+ # All went well but we have reached our maximum?
123
+ if max && occurrences >= max
124
+ # We're done! Push the result.
125
+ vm.push accumulator
126
+ return
127
+ end
128
+
129
+ # No maximum was set or it was not reached. Continue matching.
130
+ vm.push vm.source.pos
131
+ vm.push occurrences
132
+ vm.push accumulator
133
+ vm.jump adr
134
+ end
135
+ end
136
+
137
+ # Checks if a sequence must be aborted early because of a parse failure.
138
+ # Cleans up the stack and jumps after the sequence, having set error.
139
+ #
140
+ CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
141
+ def run(vm)
142
+ unless vm.success?
143
+ vm.pop(cleanup_items)
144
+
145
+ cause = vm.source.error(error)
146
+ cause.children << vm.error
147
+ vm.set_error cause
148
+ vm.jump(adr)
149
+ end
150
+ end
151
+ def to_s
152
+ "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
153
+ end
154
+ end
155
+
156
+ # Packs size stack elements into an array that is prefixed with the
157
+ # :sequence tag. This will later be converted by #flatten
158
+ #
159
+ PackSequence = Struct.new(:size) do
160
+ def run(vm)
161
+ source = vm.source
162
+
163
+ fail "Sequence runs into PackSequence with error flag set!" \
164
+ unless vm.success?
165
+
166
+ elts = vm.pop(size)
167
+ vm.push [:sequence, *elts]
168
+ end
169
+ def to_s
170
+ "PACK #{size}"
171
+ end
172
+ end
173
+
174
+ # Enters a new stack frame that can be discarded with vm.discard_frame. This
175
+ # helps in situations where you need to pop a state that you don't know the
176
+ # size of.
177
+ #
178
+ EnterFrame = Class.new do
179
+ def run(vm)
180
+ vm.enter_frame
181
+ end
182
+ def to_s
183
+ "ENTER"
184
+ end
185
+ end
186
+
187
+ # Fails at this point with the given error message. Size indicates how many
188
+ # different alternatives should have generated an error message on the
189
+ # stack.
190
+ #
191
+ Fail = Struct.new(:message, :size) do
192
+ def run(vm)
193
+ children = vm.pop(size)
194
+ error = vm.source.error(message)
195
+ error.children.replace(children)
196
+
197
+ # Clean up the stack frames:
198
+ vm.discard_frame
199
+
200
+ vm.set_error error
201
+ end
202
+ def to_s
203
+ "FAIL #{message}, #{size}"
204
+ end
205
+ end
206
+
207
+ # If the vm.success? is true, branches to the given address.
208
+ #
209
+ BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
210
+ def run(vm)
211
+ source = vm.source
212
+ if vm.success?
213
+ # Stack will look like this:
214
+ # (n*) previous failures
215
+ # successful match
216
+ # So we pop the match, discard the failures and push the success
217
+ # again. This way, it looks like a success should look.
218
+ value = vm.pop
219
+ vm.discard_frame
220
+ vm.push value
221
+
222
+ vm.jump(adr)
223
+ else
224
+ # Otherwise, clear the error and try the alternative that comes
225
+ # right here in the byte code.
226
+
227
+ # We need to reset the source.pos to what it was before starting on
228
+ # one of several alternatives:
229
+ source.pos = vm.value_at(pos_ptr)
230
+
231
+ # Push the error as if it were a value. If all branches fail, this can
232
+ # be used to create a complete error trace. If not, VM#discard_frame
233
+ # will take care of those.
234
+ vm.push vm.error
235
+
236
+ vm.clear_error
237
+ end
238
+ end
239
+ def to_s
240
+ "BRSUC #{adr}, #{pos_ptr}"
241
+ end
242
+ end
243
+
244
+ # Boxes a value inside a name tag.
245
+ #
246
+ # Consumes: parslet result
247
+ # Pushes: boxed result
248
+ #
249
+ Box = Struct.new(:name) do
250
+ def run(vm)
251
+ if vm.success?
252
+ result = vm.pop
253
+ vm.push(name => result)
254
+ end
255
+ end
256
+ def to_s
257
+ "BOX #{name.inspect}"
258
+ end
259
+ end
260
+
261
+ # Pushes the current source pos to the stack.
262
+ #
263
+ # Consumes: Nothing
264
+ # Pushes: the current source.pos
265
+ #
266
+ PushPos = Class.new do
267
+ def run(vm)
268
+ source = vm.source
269
+ vm.push source.pos
270
+ end
271
+ def to_s
272
+ "PSHPS"
273
+ end
274
+ end
275
+
276
+ # Assumes that the stack contains the result of a parslet and above it
277
+ # the source position from before parsing that parslet (as per PushPos).
278
+ # Will remove both and leave the vm in a state that indicates the result
279
+ # of a lookahead, stack will be nil (no capture) and the error flag will
280
+ # be set.
281
+ #
282
+ # Consumes: VM state, source.pos
283
+ # Pushes: VM.state
284
+ # Effects: resets source.pos
285
+ #
286
+ CheckAndReset = Struct.new(:positive, :parslet) do
287
+ def run(vm)
288
+ source = vm.source
289
+
290
+ vm.pop if vm.success?
291
+
292
+ # Retrieve the parse position from before attempting to match the
293
+ # parslet.
294
+ start_pos = vm.pop
295
+ source.pos = start_pos
296
+
297
+ if positive && vm.success? || !positive && !vm.success?
298
+ vm.clear_error
299
+ vm.push nil
300
+ else
301
+ error_msg = positive ?
302
+ ["Input should start with ", parslet] :
303
+ ["Input should not start with ", parslet]
304
+ vm.set_error source.error(error_msg, start_pos)
305
+ end
306
+ end
307
+ def to_s
308
+ "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
309
+ end
310
+ end
311
+
312
+ # Compiles the block or 'calls' the subroutine that was compiled earlier.
313
+ #
314
+ CallBlock = Struct.new(:block) do
315
+ def run(vm)
316
+ vm.call(block.address)
317
+ end
318
+ def to_s
319
+ "LCALL #{block.address} (was atom<#{block.atom}>)"
320
+ end
321
+ end
322
+ Return = Class.new do
323
+ def run(vm)
324
+ vm.call_ret
325
+ end
326
+ def to_s
327
+ "RETRN"
328
+ end
329
+ end
330
+ Stop = Class.new do
331
+ def run(vm)
332
+ vm.stop
333
+ end
334
+ def to_s
335
+ "STPVM"
336
+ end
337
+ end
338
+
339
+ # Caching
340
+ CheckCache = Struct.new(:skip_adr) do
341
+ def run(vm)
342
+ return if vm.access_cache(skip_adr)
343
+
344
+ vm.push vm.source.pos
345
+ end
346
+ def to_s
347
+ "RETCA #{skip_adr}"
348
+ end
349
+ end
350
+ StoreResult = Struct.new(:adr) do
351
+ def run(vm)
352
+ vm.store_cache(adr)
353
+ end
354
+ def to_s
355
+ "STOCA #{adr}"
356
+ end
357
+ end
358
+ end