parslet 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -1,358 +0,0 @@
1
- module Parslet::Bytecode
2
- # Matches the string and pushes the result on the stack (looks like the
3
- # string, but is really the slice that was matched).
4
- #
5
- Match = Struct.new(:str) do
6
- def initialize(str)
7
- super
8
- @mismatch_error_prefix = "Expected #{str.inspect}, but got "
9
- end
10
-
11
- def to_s
12
- "MATCH #{str.inspect}"
13
- end
14
-
15
- def run(vm)
16
- source = vm.source
17
- error_pos = source.pos
18
- s = source.read(str.bytesize)
19
-
20
- if s.size != str.size
21
- source.pos = error_pos
22
- vm.set_error source.error("Premature end of input")
23
- else
24
- if s == str
25
- vm.push(s)
26
- else
27
- source.pos = error_pos
28
- vm.set_error source.error([@mismatch_error_prefix, s])
29
- end
30
- end
31
- end
32
- end
33
-
34
- Re = Struct.new(:re, :size) do
35
- def initialize(re, size)
36
- super
37
- @failure = "Failed to match #{re.inspect[1..-2]}"
38
- end
39
-
40
- def to_s
41
- "RE #{re.inspect}, #{size}"
42
- end
43
-
44
- def run(vm)
45
- source = vm.source
46
-
47
- error_pos = source.pos
48
- s = source.read(size)
49
-
50
- if s.size != size
51
- source.pos = error_pos
52
- vm.set_error source.error("Premature end of input")
53
- return
54
- end
55
-
56
- if !s.match(re)
57
- source.pos = error_pos
58
- vm.set_error source.error(@failure)
59
- return
60
- end
61
-
62
- vm.push s
63
- end
64
- end
65
-
66
- SetupRepeat = Struct.new(:tag) do
67
- def run(vm)
68
- vm.push vm.source.pos
69
- vm.push 0 # occurrences
70
- vm.push [tag] # will collect results
71
- end
72
-
73
- def to_s
74
- "STPRE #{tag.inspect}"
75
- end
76
- end
77
-
78
- # Repeat matching with a minimum of min and a maximum of max times.
79
- #
80
- Repeat = Struct.new(:min, :max, :adr, :parslet) do
81
- def initialize(*args)
82
- super
83
-
84
- @minrep_error = ["Expected at least #{min} of ", parslet]
85
- end
86
- def to_s
87
- "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
88
- end
89
- def run(vm)
90
- source = vm.source
91
- start_position = source.pos
92
-
93
- unless vm.success?
94
- pos, occurrences, accumulator = vm.pop(3)
95
- source.pos = pos
96
-
97
- # We've encountered an error. Are we still below the minimum number of
98
- # matches?
99
- if occurrences < min
100
- error = source.error(@minrep_error, pos)
101
- error.children << vm.error
102
- vm.set_error error
103
- return
104
- end
105
-
106
- # assert: occurrences >= min
107
-
108
- # We've matched the minimum number required, so this is a success:
109
- vm.clear_error
110
- vm.push accumulator
111
- return
112
- end
113
-
114
- # assert: vm.success?
115
-
116
- result = vm.pop
117
- pos, occurrences, accumulator = vm.pop(3)
118
-
119
- accumulator << result
120
- occurrences += 1
121
-
122
- # All went well but we have reached our maximum?
123
- if max && occurrences >= max
124
- # We're done! Push the result.
125
- vm.push accumulator
126
- return
127
- end
128
-
129
- # No maximum was set or it was not reached. Continue matching.
130
- vm.push vm.source.pos
131
- vm.push occurrences
132
- vm.push accumulator
133
- vm.jump adr
134
- end
135
- end
136
-
137
- # Checks if a sequence must be aborted early because of a parse failure.
138
- # Cleans up the stack and jumps after the sequence, having set error.
139
- #
140
- CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
141
- def run(vm)
142
- unless vm.success?
143
- vm.pop(cleanup_items)
144
-
145
- cause = vm.source.error(error)
146
- cause.children << vm.error
147
- vm.set_error cause
148
- vm.jump(adr)
149
- end
150
- end
151
- def to_s
152
- "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
153
- end
154
- end
155
-
156
- # Packs size stack elements into an array that is prefixed with the
157
- # :sequence tag. This will later be converted by #flatten
158
- #
159
- PackSequence = Struct.new(:size) do
160
- def run(vm)
161
- source = vm.source
162
-
163
- fail "Sequence runs into PackSequence with error flag set!" \
164
- unless vm.success?
165
-
166
- elts = vm.pop(size)
167
- vm.push [:sequence, *elts]
168
- end
169
- def to_s
170
- "PACK #{size}"
171
- end
172
- end
173
-
174
- # Enters a new stack frame that can be discarded with vm.discard_frame. This
175
- # helps in situations where you need to pop a state that you don't know the
176
- # size of.
177
- #
178
- EnterFrame = Class.new do
179
- def run(vm)
180
- vm.enter_frame
181
- end
182
- def to_s
183
- "ENTER"
184
- end
185
- end
186
-
187
- # Fails at this point with the given error message. Size indicates how many
188
- # different alternatives should have generated an error message on the
189
- # stack.
190
- #
191
- Fail = Struct.new(:message, :size) do
192
- def run(vm)
193
- children = vm.pop(size)
194
- error = vm.source.error(message)
195
- error.children.replace(children)
196
-
197
- # Clean up the stack frames:
198
- vm.discard_frame
199
-
200
- vm.set_error error
201
- end
202
- def to_s
203
- "FAIL #{message}, #{size}"
204
- end
205
- end
206
-
207
- # If the vm.success? is true, branches to the given address.
208
- #
209
- BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
210
- def run(vm)
211
- source = vm.source
212
- if vm.success?
213
- # Stack will look like this:
214
- # (n*) previous failures
215
- # successful match
216
- # So we pop the match, discard the failures and push the success
217
- # again. This way, it looks like a success should look.
218
- value = vm.pop
219
- vm.discard_frame
220
- vm.push value
221
-
222
- vm.jump(adr)
223
- else
224
- # Otherwise, clear the error and try the alternative that comes
225
- # right here in the byte code.
226
-
227
- # We need to reset the source.pos to what it was before starting on
228
- # one of several alternatives:
229
- source.pos = vm.value_at(pos_ptr)
230
-
231
- # Push the error as if it were a value. If all branches fail, this can
232
- # be used to create a complete error trace. If not, VM#discard_frame
233
- # will take care of those.
234
- vm.push vm.error
235
-
236
- vm.clear_error
237
- end
238
- end
239
- def to_s
240
- "BRSUC #{adr}, #{pos_ptr}"
241
- end
242
- end
243
-
244
- # Boxes a value inside a name tag.
245
- #
246
- # Consumes: parslet result
247
- # Pushes: boxed result
248
- #
249
- Box = Struct.new(:name) do
250
- def run(vm)
251
- if vm.success?
252
- result = vm.pop
253
- vm.push(name => result)
254
- end
255
- end
256
- def to_s
257
- "BOX #{name.inspect}"
258
- end
259
- end
260
-
261
- # Pushes the current source pos to the stack.
262
- #
263
- # Consumes: Nothing
264
- # Pushes: the current source.pos
265
- #
266
- PushPos = Class.new do
267
- def run(vm)
268
- source = vm.source
269
- vm.push source.pos
270
- end
271
- def to_s
272
- "PSHPS"
273
- end
274
- end
275
-
276
- # Assumes that the stack contains the result of a parslet and above it
277
- # the source position from before parsing that parslet (as per PushPos).
278
- # Will remove both and leave the vm in a state that indicates the result
279
- # of a lookahead, stack will be nil (no capture) and the error flag will
280
- # be set.
281
- #
282
- # Consumes: VM state, source.pos
283
- # Pushes: VM.state
284
- # Effects: resets source.pos
285
- #
286
- CheckAndReset = Struct.new(:positive, :parslet) do
287
- def run(vm)
288
- source = vm.source
289
-
290
- vm.pop if vm.success?
291
-
292
- # Retrieve the parse position from before attempting to match the
293
- # parslet.
294
- start_pos = vm.pop
295
- source.pos = start_pos
296
-
297
- if positive && vm.success? || !positive && !vm.success?
298
- vm.clear_error
299
- vm.push nil
300
- else
301
- error_msg = positive ?
302
- ["Input should start with ", parslet] :
303
- ["Input should not start with ", parslet]
304
- vm.set_error source.error(error_msg, start_pos)
305
- end
306
- end
307
- def to_s
308
- "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
309
- end
310
- end
311
-
312
- # Compiles the block or 'calls' the subroutine that was compiled earlier.
313
- #
314
- CallBlock = Struct.new(:block) do
315
- def run(vm)
316
- vm.call(block.address)
317
- end
318
- def to_s
319
- "LCALL #{block.address} (was atom<#{block.atom}>)"
320
- end
321
- end
322
- Return = Class.new do
323
- def run(vm)
324
- vm.call_ret
325
- end
326
- def to_s
327
- "RETRN"
328
- end
329
- end
330
- Stop = Class.new do
331
- def run(vm)
332
- vm.stop
333
- end
334
- def to_s
335
- "STPVM"
336
- end
337
- end
338
-
339
- # Caching
340
- CheckCache = Struct.new(:skip_adr) do
341
- def run(vm)
342
- return if vm.access_cache(skip_adr)
343
-
344
- vm.push vm.source.pos
345
- end
346
- def to_s
347
- "RETCA #{skip_adr}"
348
- end
349
- end
350
- StoreResult = Struct.new(:adr) do
351
- def run(vm)
352
- vm.store_cache(adr)
353
- end
354
- def to_s
355
- "STOCA #{adr}"
356
- end
357
- end
358
- end
@@ -1,209 +0,0 @@
1
- module Parslet::Bytecode
2
- class VM
3
- include Parslet::Atoms::CanFlatten
4
-
5
- def initialize(debug=false)
6
- @debug = debug
7
- end
8
-
9
- def debug?
10
- @debug
11
- end
12
-
13
- def run(program, io)
14
- init(program, io)
15
-
16
- loop do
17
- old_ip = @ip
18
- instruction = fetch
19
- break unless instruction
20
-
21
- # Diagnostics
22
- printf("executing %5d: %s\n", old_ip, instruction) if debug?
23
-
24
- # Run the current instruction
25
- instruction.run(self)
26
-
27
- # Diagnostics
28
- dump_state(0) if debug?
29
- break if @stop
30
- end
31
-
32
- fail "Stack contains too many values." if @values.size>1
33
-
34
- # In the best case, we have successfully matched and consumed all input.
35
- # This is what we want, from now on down it's all error cases.
36
- return flatten(@values.last) if success? && source.eof?
37
-
38
- # Maybe we've matched some, but not all of the input? In parslets books,
39
- # this is an error as well.
40
- if success?
41
- # assert: not source.eof?
42
- current_pos = source.pos
43
- source.error(
44
- "Don't know what to do with #{source.read(100)}", current_pos).
45
- raise(Parslet::UnconsumedInput)
46
- end
47
-
48
- # assert: ! @error.nil?
49
-
50
- # And maybe we just could not do it for a reason. Raise that.
51
- @error.raise
52
-
53
- rescue => ex
54
- dump_state(-1) unless ex.kind_of?(Parslet::ParseFailed)
55
- raise
56
- end
57
-
58
- attr_reader :source
59
- attr_reader :context
60
-
61
- def init(program, io)
62
- @ip = 0
63
- @program = program
64
- @source = Parslet::Source.new(io)
65
- @context = Parslet::Atoms::Context.new
66
- @values = []
67
- @calls = []
68
- @frames = []
69
- @cache = {}
70
- end
71
-
72
- def fetch
73
- @program.at(@ip).tap { @ip += 1 }
74
- end
75
-
76
- # Dumps the VM state so that the user can track errors down.
77
- #
78
- def dump_state(ip_offset)
79
- return unless debug?
80
- puts "\nVM STATE -------------------------------------------- "
81
-
82
- old_pos = source.pos
83
- debug_pos = old_pos - 10
84
- source.pos = debug_pos < 0 ? 0 : debug_pos
85
- puts "Source: #{source.read(20)}"
86
- puts (" "*"Source: ".size) << (" "*(10+(debug_pos<0 ? debug_pos : 0))) << '^'
87
- source.pos = old_pos
88
-
89
- if @error
90
- puts "Error register: #{@error}"
91
- else
92
- puts "Error register: EMPTY"
93
- end
94
-
95
- puts "Program: "
96
- for adr in (@ip-5)..(@ip+5)
97
- printf("%s%5d: %s\n",
98
- adr == @ip+ip_offset ? '->' : ' ',
99
- adr,
100
- @program.at(adr)) if adr >= 0 && @program.at(adr)
101
- end
102
-
103
- puts "\nStack(#{@values.size}): (last 5, top is top of stack)"
104
- @values.last(5).reverse.each_with_index do |v,i|
105
- printf(" %5d: %s\n", i, v.inspect)
106
- end
107
-
108
- puts "\nStack Frames(#{@frames.size}): (last 5, top is top of stack)"
109
- @frames.last(5).reverse.each_with_index do |v,i|
110
- printf(" %5d: trunc stack at %s\n", i, v)
111
- end
112
-
113
- puts "\nCall Stack(#{@calls.size}): (last 5, top is top of stack)"
114
- @calls.last(5).reverse.each_with_index do |v,i|
115
- printf(" %5d: return to @%s\n", i, v)
116
- end
117
- puts "---------------------- -------------------------------- "
118
- end
119
-
120
- # --------------------------------------------- interface for instructions
121
- def access_cache(skip_adr)
122
- key = [source.pos, @ip-1]
123
-
124
- # Is the given vm state in the cache yet?
125
- if @cache[key]
126
- # Restore state
127
- success, value, advance = @cache[key]
128
-
129
- if success
130
- push value
131
- else
132
- set_error value
133
- end
134
-
135
- source.pos += advance
136
-
137
- # Skip to skip_adr
138
- jump skip_adr
139
- return true
140
- end
141
-
142
- return false
143
- end
144
- def store_cache(adr)
145
- if success?
146
- pos, result = pop(2)
147
- key = [pos, adr.address]
148
- @cache[key] = [true, result, source.pos-pos]
149
- push result
150
- else
151
- pos = pop
152
- key = [pos, adr.address]
153
- @cache[key] = [false, @error, source.pos-pos]
154
- end
155
- end
156
- def push(value)
157
- @values.push value
158
- end
159
- def pop(n=nil)
160
- if n
161
- fail "Stack corruption detected, popping too many values (#{n}/#{@values.size})." \
162
- if n>@values.size
163
-
164
- @values.pop(n)
165
- else
166
- fail "Stack corruption detected, popping too many values. (stack is empty)" \
167
- if @values.empty?
168
-
169
- @values.pop
170
- end
171
- end
172
- def value_at(ptr)
173
- @values.at(-ptr-1)
174
- end
175
- def enter_frame
176
- @frames.push @values.size
177
- end
178
- def discard_frame
179
- size = @frames.pop
180
- fail "No stack frame." unless size
181
- fail "Stack frame larger than the current stack." if size > @values.size
182
- @values = @values[0,size]
183
- end
184
- def jump(address)
185
- @ip = address.address
186
- end
187
- def success?
188
- !@error
189
- end
190
- def call(adr)
191
- @calls.push @ip
192
- jump(adr)
193
- end
194
- def call_ret
195
- @ip = @calls.pop
196
- fail "One pop too many - empty call stack in #call_ret." unless @ip
197
- end
198
- def set_error(error)
199
- @error = error
200
- end
201
- def clear_error
202
- @error = nil
203
- end
204
- attr_reader :error
205
- def stop
206
- @stop = true
207
- end
208
- end
209
- end