parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -1,358 +0,0 @@
1
- module Parslet::Bytecode
2
- # Matches the string and pushes the result on the stack (looks like the
3
- # string, but is really the slice that was matched).
4
- #
5
- Match = Struct.new(:str) do
6
- def initialize(str)
7
- super
8
- @mismatch_error_prefix = "Expected #{str.inspect}, but got "
9
- end
10
-
11
- def to_s
12
- "MATCH #{str.inspect}"
13
- end
14
-
15
- def run(vm)
16
- source = vm.source
17
- error_pos = source.pos
18
- s = source.read(str.bytesize)
19
-
20
- if s.size != str.size
21
- source.pos = error_pos
22
- vm.set_error source.error("Premature end of input")
23
- else
24
- if s == str
25
- vm.push(s)
26
- else
27
- source.pos = error_pos
28
- vm.set_error source.error([@mismatch_error_prefix, s])
29
- end
30
- end
31
- end
32
- end
33
-
34
- Re = Struct.new(:re, :size) do
35
- def initialize(re, size)
36
- super
37
- @failure = "Failed to match #{re.inspect[1..-2]}"
38
- end
39
-
40
- def to_s
41
- "RE #{re.inspect}, #{size}"
42
- end
43
-
44
- def run(vm)
45
- source = vm.source
46
-
47
- error_pos = source.pos
48
- s = source.read(size)
49
-
50
- if s.size != size
51
- source.pos = error_pos
52
- vm.set_error source.error("Premature end of input")
53
- return
54
- end
55
-
56
- if !s.match(re)
57
- source.pos = error_pos
58
- vm.set_error source.error(@failure)
59
- return
60
- end
61
-
62
- vm.push s
63
- end
64
- end
65
-
66
- SetupRepeat = Struct.new(:tag) do
67
- def run(vm)
68
- vm.push vm.source.pos
69
- vm.push 0 # occurrences
70
- vm.push [tag] # will collect results
71
- end
72
-
73
- def to_s
74
- "STPRE #{tag.inspect}"
75
- end
76
- end
77
-
78
- # Repeat matching with a minimum of min and a maximum of max times.
79
- #
80
- Repeat = Struct.new(:min, :max, :adr, :parslet) do
81
- def initialize(*args)
82
- super
83
-
84
- @minrep_error = ["Expected at least #{min} of ", parslet]
85
- end
86
- def to_s
87
- "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
88
- end
89
- def run(vm)
90
- source = vm.source
91
- start_position = source.pos
92
-
93
- unless vm.success?
94
- pos, occurrences, accumulator = vm.pop(3)
95
- source.pos = pos
96
-
97
- # We've encountered an error. Are we still below the minimum number of
98
- # matches?
99
- if occurrences < min
100
- error = source.error(@minrep_error, pos)
101
- error.children << vm.error
102
- vm.set_error error
103
- return
104
- end
105
-
106
- # assert: occurrences >= min
107
-
108
- # We've matched the minimum number required, so this is a success:
109
- vm.clear_error
110
- vm.push accumulator
111
- return
112
- end
113
-
114
- # assert: vm.success?
115
-
116
- result = vm.pop
117
- pos, occurrences, accumulator = vm.pop(3)
118
-
119
- accumulator << result
120
- occurrences += 1
121
-
122
- # All went well but we have reached our maximum?
123
- if max && occurrences >= max
124
- # We're done! Push the result.
125
- vm.push accumulator
126
- return
127
- end
128
-
129
- # No maximum was set or it was not reached. Continue matching.
130
- vm.push vm.source.pos
131
- vm.push occurrences
132
- vm.push accumulator
133
- vm.jump adr
134
- end
135
- end
136
-
137
- # Checks if a sequence must be aborted early because of a parse failure.
138
- # Cleans up the stack and jumps after the sequence, having set error.
139
- #
140
- CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
141
- def run(vm)
142
- unless vm.success?
143
- vm.pop(cleanup_items)
144
-
145
- cause = vm.source.error(error)
146
- cause.children << vm.error
147
- vm.set_error cause
148
- vm.jump(adr)
149
- end
150
- end
151
- def to_s
152
- "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
153
- end
154
- end
155
-
156
- # Packs size stack elements into an array that is prefixed with the
157
- # :sequence tag. This will later be converted by #flatten
158
- #
159
- PackSequence = Struct.new(:size) do
160
- def run(vm)
161
- source = vm.source
162
-
163
- fail "Sequence runs into PackSequence with error flag set!" \
164
- unless vm.success?
165
-
166
- elts = vm.pop(size)
167
- vm.push [:sequence, *elts]
168
- end
169
- def to_s
170
- "PACK #{size}"
171
- end
172
- end
173
-
174
- # Enters a new stack frame that can be discarded with vm.discard_frame. This
175
- # helps in situations where you need to pop a state that you don't know the
176
- # size of.
177
- #
178
- EnterFrame = Class.new do
179
- def run(vm)
180
- vm.enter_frame
181
- end
182
- def to_s
183
- "ENTER"
184
- end
185
- end
186
-
187
- # Fails at this point with the given error message. Size indicates how many
188
- # different alternatives should have generated an error message on the
189
- # stack.
190
- #
191
- Fail = Struct.new(:message, :size) do
192
- def run(vm)
193
- children = vm.pop(size)
194
- error = vm.source.error(message)
195
- error.children.replace(children)
196
-
197
- # Clean up the stack frames:
198
- vm.discard_frame
199
-
200
- vm.set_error error
201
- end
202
- def to_s
203
- "FAIL #{message}, #{size}"
204
- end
205
- end
206
-
207
- # If the vm.success? is true, branches to the given address.
208
- #
209
- BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
210
- def run(vm)
211
- source = vm.source
212
- if vm.success?
213
- # Stack will look like this:
214
- # (n*) previous failures
215
- # successful match
216
- # So we pop the match, discard the failures and push the success
217
- # again. This way, it looks like a success should look.
218
- value = vm.pop
219
- vm.discard_frame
220
- vm.push value
221
-
222
- vm.jump(adr)
223
- else
224
- # Otherwise, clear the error and try the alternative that comes
225
- # right here in the byte code.
226
-
227
- # We need to reset the source.pos to what it was before starting on
228
- # one of several alternatives:
229
- source.pos = vm.value_at(pos_ptr)
230
-
231
- # Push the error as if it were a value. If all branches fail, this can
232
- # be used to create a complete error trace. If not, VM#discard_frame
233
- # will take care of those.
234
- vm.push vm.error
235
-
236
- vm.clear_error
237
- end
238
- end
239
- def to_s
240
- "BRSUC #{adr}, #{pos_ptr}"
241
- end
242
- end
243
-
244
- # Boxes a value inside a name tag.
245
- #
246
- # Consumes: parslet result
247
- # Pushes: boxed result
248
- #
249
- Box = Struct.new(:name) do
250
- def run(vm)
251
- if vm.success?
252
- result = vm.pop
253
- vm.push(name => result)
254
- end
255
- end
256
- def to_s
257
- "BOX #{name.inspect}"
258
- end
259
- end
260
-
261
- # Pushes the current source pos to the stack.
262
- #
263
- # Consumes: Nothing
264
- # Pushes: the current source.pos
265
- #
266
- PushPos = Class.new do
267
- def run(vm)
268
- source = vm.source
269
- vm.push source.pos
270
- end
271
- def to_s
272
- "PSHPS"
273
- end
274
- end
275
-
276
- # Assumes that the stack contains the result of a parslet and above it
277
- # the source position from before parsing that parslet (as per PushPos).
278
- # Will remove both and leave the vm in a state that indicates the result
279
- # of a lookahead, stack will be nil (no capture) and the error flag will
280
- # be set.
281
- #
282
- # Consumes: VM state, source.pos
283
- # Pushes: VM.state
284
- # Effects: resets source.pos
285
- #
286
- CheckAndReset = Struct.new(:positive, :parslet) do
287
- def run(vm)
288
- source = vm.source
289
-
290
- vm.pop if vm.success?
291
-
292
- # Retrieve the parse position from before attempting to match the
293
- # parslet.
294
- start_pos = vm.pop
295
- source.pos = start_pos
296
-
297
- if positive && vm.success? || !positive && !vm.success?
298
- vm.clear_error
299
- vm.push nil
300
- else
301
- error_msg = positive ?
302
- ["Input should start with ", parslet] :
303
- ["Input should not start with ", parslet]
304
- vm.set_error source.error(error_msg, start_pos)
305
- end
306
- end
307
- def to_s
308
- "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
309
- end
310
- end
311
-
312
- # Compiles the block or 'calls' the subroutine that was compiled earlier.
313
- #
314
- CallBlock = Struct.new(:block) do
315
- def run(vm)
316
- vm.call(block.address)
317
- end
318
- def to_s
319
- "LCALL #{block.address} (was atom<#{block.atom}>)"
320
- end
321
- end
322
- Return = Class.new do
323
- def run(vm)
324
- vm.call_ret
325
- end
326
- def to_s
327
- "RETRN"
328
- end
329
- end
330
- Stop = Class.new do
331
- def run(vm)
332
- vm.stop
333
- end
334
- def to_s
335
- "STPVM"
336
- end
337
- end
338
-
339
- # Caching
340
- CheckCache = Struct.new(:skip_adr) do
341
- def run(vm)
342
- return if vm.access_cache(skip_adr)
343
-
344
- vm.push vm.source.pos
345
- end
346
- def to_s
347
- "RETCA #{skip_adr}"
348
- end
349
- end
350
- StoreResult = Struct.new(:adr) do
351
- def run(vm)
352
- vm.store_cache(adr)
353
- end
354
- def to_s
355
- "STOCA #{adr}"
356
- end
357
- end
358
- end
@@ -1,209 +0,0 @@
1
- module Parslet::Bytecode
2
- class VM
3
- include Parslet::Atoms::CanFlatten
4
-
5
- def initialize(debug=false)
6
- @debug = debug
7
- end
8
-
9
- def debug?
10
- @debug
11
- end
12
-
13
- def run(program, io)
14
- init(program, io)
15
-
16
- loop do
17
- old_ip = @ip
18
- instruction = fetch
19
- break unless instruction
20
-
21
- # Diagnostics
22
- printf("executing %5d: %s\n", old_ip, instruction) if debug?
23
-
24
- # Run the current instruction
25
- instruction.run(self)
26
-
27
- # Diagnostics
28
- dump_state(0) if debug?
29
- break if @stop
30
- end
31
-
32
- fail "Stack contains too many values." if @values.size>1
33
-
34
- # In the best case, we have successfully matched and consumed all input.
35
- # This is what we want, from now on down it's all error cases.
36
- return flatten(@values.last) if success? && source.eof?
37
-
38
- # Maybe we've matched some, but not all of the input? In parslets books,
39
- # this is an error as well.
40
- if success?
41
- # assert: not source.eof?
42
- current_pos = source.pos
43
- source.error(
44
- "Don't know what to do with #{source.read(100)}", current_pos).
45
- raise(Parslet::UnconsumedInput)
46
- end
47
-
48
- # assert: ! @error.nil?
49
-
50
- # And maybe we just could not do it for a reason. Raise that.
51
- @error.raise
52
-
53
- rescue => ex
54
- dump_state(-1) unless ex.kind_of?(Parslet::ParseFailed)
55
- raise
56
- end
57
-
58
- attr_reader :source
59
- attr_reader :context
60
-
61
- def init(program, io)
62
- @ip = 0
63
- @program = program
64
- @source = Parslet::Source.new(io)
65
- @context = Parslet::Atoms::Context.new
66
- @values = []
67
- @calls = []
68
- @frames = []
69
- @cache = {}
70
- end
71
-
72
- def fetch
73
- @program.at(@ip).tap { @ip += 1 }
74
- end
75
-
76
- # Dumps the VM state so that the user can track errors down.
77
- #
78
- def dump_state(ip_offset)
79
- return unless debug?
80
- puts "\nVM STATE -------------------------------------------- "
81
-
82
- old_pos = source.pos
83
- debug_pos = old_pos - 10
84
- source.pos = debug_pos < 0 ? 0 : debug_pos
85
- puts "Source: #{source.read(20)}"
86
- puts (" "*"Source: ".size) << (" "*(10+(debug_pos<0 ? debug_pos : 0))) << '^'
87
- source.pos = old_pos
88
-
89
- if @error
90
- puts "Error register: #{@error}"
91
- else
92
- puts "Error register: EMPTY"
93
- end
94
-
95
- puts "Program: "
96
- for adr in (@ip-5)..(@ip+5)
97
- printf("%s%5d: %s\n",
98
- adr == @ip+ip_offset ? '->' : ' ',
99
- adr,
100
- @program.at(adr)) if adr >= 0 && @program.at(adr)
101
- end
102
-
103
- puts "\nStack(#{@values.size}): (last 5, top is top of stack)"
104
- @values.last(5).reverse.each_with_index do |v,i|
105
- printf(" %5d: %s\n", i, v.inspect)
106
- end
107
-
108
- puts "\nStack Frames(#{@frames.size}): (last 5, top is top of stack)"
109
- @frames.last(5).reverse.each_with_index do |v,i|
110
- printf(" %5d: trunc stack at %s\n", i, v)
111
- end
112
-
113
- puts "\nCall Stack(#{@calls.size}): (last 5, top is top of stack)"
114
- @calls.last(5).reverse.each_with_index do |v,i|
115
- printf(" %5d: return to @%s\n", i, v)
116
- end
117
- puts "---------------------- -------------------------------- "
118
- end
119
-
120
- # --------------------------------------------- interface for instructions
121
- def access_cache(skip_adr)
122
- key = [source.pos, @ip-1]
123
-
124
- # Is the given vm state in the cache yet?
125
- if @cache[key]
126
- # Restore state
127
- success, value, advance = @cache[key]
128
-
129
- if success
130
- push value
131
- else
132
- set_error value
133
- end
134
-
135
- source.pos += advance
136
-
137
- # Skip to skip_adr
138
- jump skip_adr
139
- return true
140
- end
141
-
142
- return false
143
- end
144
- def store_cache(adr)
145
- if success?
146
- pos, result = pop(2)
147
- key = [pos, adr.address]
148
- @cache[key] = [true, result, source.pos-pos]
149
- push result
150
- else
151
- pos = pop
152
- key = [pos, adr.address]
153
- @cache[key] = [false, @error, source.pos-pos]
154
- end
155
- end
156
- def push(value)
157
- @values.push value
158
- end
159
- def pop(n=nil)
160
- if n
161
- fail "Stack corruption detected, popping too many values (#{n}/#{@values.size})." \
162
- if n>@values.size
163
-
164
- @values.pop(n)
165
- else
166
- fail "Stack corruption detected, popping too many values. (stack is empty)" \
167
- if @values.empty?
168
-
169
- @values.pop
170
- end
171
- end
172
- def value_at(ptr)
173
- @values.at(-ptr-1)
174
- end
175
- def enter_frame
176
- @frames.push @values.size
177
- end
178
- def discard_frame
179
- size = @frames.pop
180
- fail "No stack frame." unless size
181
- fail "Stack frame larger than the current stack." if size > @values.size
182
- @values = @values[0,size]
183
- end
184
- def jump(address)
185
- @ip = address.address
186
- end
187
- def success?
188
- !@error
189
- end
190
- def call(adr)
191
- @calls.push @ip
192
- jump(adr)
193
- end
194
- def call_ret
195
- @ip = @calls.pop
196
- fail "One pop too many - empty call stack in #call_ret." unless @ip
197
- end
198
- def set_error(error)
199
- @error = error
200
- end
201
- def clear_error
202
- @error = nil
203
- end
204
- attr_reader :error
205
- def stop
206
- @stop = true
207
- end
208
- end
209
- end