parslet 1.2.3 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +21 -0
- data/README +1 -1
- data/example/ignore_whitespace.rb +66 -0
- data/example/mathn.rb +44 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +2 -2
- data/example/output/mathn.out +4 -0
- data/lib/parslet.rb +8 -1
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/alternative.rb +1 -1
- data/lib/parslet/atoms/base.rb +26 -157
- data/lib/parslet/atoms/can_flatten.rb +132 -0
- data/lib/parslet/atoms/lookahead.rb +5 -8
- data/lib/parslet/atoms/str.rb +1 -1
- data/lib/parslet/atoms/visitor.rb +23 -9
- data/lib/parslet/bytecode.rb +6 -0
- data/lib/parslet/bytecode/compiler.rb +138 -0
- data/lib/parslet/bytecode/instructions.rb +358 -0
- data/lib/parslet/bytecode/vm.rb +209 -0
- data/lib/parslet/cause.rb +62 -0
- data/lib/parslet/export.rb +2 -2
- data/lib/parslet/rig/rspec.rb +18 -17
- data/lib/parslet/source.rb +66 -48
- data/lib/parslet/source/line_cache.rb +7 -1
- data/lib/parslet/transform/context.rb +15 -7
- metadata +57 -16
- data/Gemfile +0 -16
- data/lib/parslet/atoms/transform.rb +0 -75
@@ -0,0 +1,209 @@
|
|
1
|
+
module Parslet::Bytecode
|
2
|
+
class VM
|
3
|
+
include Parslet::Atoms::CanFlatten
|
4
|
+
|
5
|
+
def initialize(debug=false)
|
6
|
+
@debug = debug
|
7
|
+
end
|
8
|
+
|
9
|
+
def debug?
|
10
|
+
@debug
|
11
|
+
end
|
12
|
+
|
13
|
+
def run(program, io)
|
14
|
+
init(program, io)
|
15
|
+
|
16
|
+
loop do
|
17
|
+
old_ip = @ip
|
18
|
+
instruction = fetch
|
19
|
+
break unless instruction
|
20
|
+
|
21
|
+
# Diagnostics
|
22
|
+
printf("executing %5d: %s\n", old_ip, instruction) if debug?
|
23
|
+
|
24
|
+
# Run the current instruction
|
25
|
+
instruction.run(self)
|
26
|
+
|
27
|
+
# Diagnostics
|
28
|
+
dump_state(0) if debug?
|
29
|
+
break if @stop
|
30
|
+
end
|
31
|
+
|
32
|
+
fail "Stack contains too many values." if @values.size>1
|
33
|
+
|
34
|
+
# In the best case, we have successfully matched and consumed all input.
|
35
|
+
# This is what we want, from now on down it's all error cases.
|
36
|
+
return flatten(@values.last) if success? && source.eof?
|
37
|
+
|
38
|
+
# Maybe we've matched some, but not all of the input? In parslets books,
|
39
|
+
# this is an error as well.
|
40
|
+
if success?
|
41
|
+
# assert: not source.eof?
|
42
|
+
current_pos = source.pos
|
43
|
+
source.error(
|
44
|
+
"Don't know what to do with #{source.read(100)}", current_pos).
|
45
|
+
raise(Parslet::UnconsumedInput)
|
46
|
+
end
|
47
|
+
|
48
|
+
# assert: ! @error.nil?
|
49
|
+
|
50
|
+
# And maybe we just could not do it for a reason. Raise that.
|
51
|
+
@error.raise
|
52
|
+
|
53
|
+
rescue => ex
|
54
|
+
dump_state(-1) unless ex.kind_of?(Parslet::ParseFailed)
|
55
|
+
raise
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :source
|
59
|
+
attr_reader :context
|
60
|
+
|
61
|
+
def init(program, io)
|
62
|
+
@ip = 0
|
63
|
+
@program = program
|
64
|
+
@source = Parslet::Source.new(io)
|
65
|
+
@context = Parslet::Atoms::Context.new
|
66
|
+
@values = []
|
67
|
+
@calls = []
|
68
|
+
@frames = []
|
69
|
+
@cache = {}
|
70
|
+
end
|
71
|
+
|
72
|
+
def fetch
|
73
|
+
@program.at(@ip).tap { @ip += 1 }
|
74
|
+
end
|
75
|
+
|
76
|
+
# Dumps the VM state so that the user can track errors down.
|
77
|
+
#
|
78
|
+
def dump_state(ip_offset)
|
79
|
+
return unless debug?
|
80
|
+
puts "\nVM STATE -------------------------------------------- "
|
81
|
+
|
82
|
+
old_pos = source.pos
|
83
|
+
debug_pos = old_pos - 10
|
84
|
+
source.pos = debug_pos < 0 ? 0 : debug_pos
|
85
|
+
puts "Source: #{source.read(20)}"
|
86
|
+
puts (" "*"Source: ".size) << (" "*(10+(debug_pos<0 ? debug_pos : 0))) << '^'
|
87
|
+
source.pos = old_pos
|
88
|
+
|
89
|
+
if @error
|
90
|
+
puts "Error register: #{@error}"
|
91
|
+
else
|
92
|
+
puts "Error register: EMPTY"
|
93
|
+
end
|
94
|
+
|
95
|
+
puts "Program: "
|
96
|
+
for adr in (@ip-5)..(@ip+5)
|
97
|
+
printf("%s%5d: %s\n",
|
98
|
+
adr == @ip+ip_offset ? '->' : ' ',
|
99
|
+
adr,
|
100
|
+
@program.at(adr)) if adr >= 0 && @program.at(adr)
|
101
|
+
end
|
102
|
+
|
103
|
+
puts "\nStack(#{@values.size}): (last 5, top is top of stack)"
|
104
|
+
@values.last(5).reverse.each_with_index do |v,i|
|
105
|
+
printf(" %5d: %s\n", i, v.inspect)
|
106
|
+
end
|
107
|
+
|
108
|
+
puts "\nStack Frames(#{@frames.size}): (last 5, top is top of stack)"
|
109
|
+
@frames.last(5).reverse.each_with_index do |v,i|
|
110
|
+
printf(" %5d: trunc stack at %s\n", i, v)
|
111
|
+
end
|
112
|
+
|
113
|
+
puts "\nCall Stack(#{@calls.size}): (last 5, top is top of stack)"
|
114
|
+
@calls.last(5).reverse.each_with_index do |v,i|
|
115
|
+
printf(" %5d: return to @%s\n", i, v)
|
116
|
+
end
|
117
|
+
puts "---------------------- -------------------------------- "
|
118
|
+
end
|
119
|
+
|
120
|
+
# --------------------------------------------- interface for instructions
|
121
|
+
def access_cache(skip_adr)
|
122
|
+
key = [source.pos, @ip-1]
|
123
|
+
|
124
|
+
# Is the given vm state in the cache yet?
|
125
|
+
if @cache[key]
|
126
|
+
# Restore state
|
127
|
+
success, value, advance = @cache[key]
|
128
|
+
|
129
|
+
if success
|
130
|
+
push value
|
131
|
+
else
|
132
|
+
set_error value
|
133
|
+
end
|
134
|
+
|
135
|
+
source.pos += advance
|
136
|
+
|
137
|
+
# Skip to skip_adr
|
138
|
+
jump skip_adr
|
139
|
+
return true
|
140
|
+
end
|
141
|
+
|
142
|
+
return false
|
143
|
+
end
|
144
|
+
def store_cache(adr)
|
145
|
+
if success?
|
146
|
+
pos, result = pop(2)
|
147
|
+
key = [pos, adr.address]
|
148
|
+
@cache[key] = [true, result, source.pos-pos]
|
149
|
+
push result
|
150
|
+
else
|
151
|
+
pos = pop
|
152
|
+
key = [pos, adr.address]
|
153
|
+
@cache[key] = [false, @error, source.pos-pos]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
def push(value)
|
157
|
+
@values.push value
|
158
|
+
end
|
159
|
+
def pop(n=nil)
|
160
|
+
if n
|
161
|
+
fail "Stack corruption detected, popping too many values (#{n}/#{@values.size})." \
|
162
|
+
if n>@values.size
|
163
|
+
|
164
|
+
@values.pop(n)
|
165
|
+
else
|
166
|
+
fail "Stack corruption detected, popping too many values. (stack is empty)" \
|
167
|
+
if @values.empty?
|
168
|
+
|
169
|
+
@values.pop
|
170
|
+
end
|
171
|
+
end
|
172
|
+
def value_at(ptr)
|
173
|
+
@values.at(-ptr-1)
|
174
|
+
end
|
175
|
+
def enter_frame
|
176
|
+
@frames.push @values.size
|
177
|
+
end
|
178
|
+
def discard_frame
|
179
|
+
size = @frames.pop
|
180
|
+
fail "No stack frame." unless size
|
181
|
+
fail "Stack frame larger than the current stack." if size > @values.size
|
182
|
+
@values = @values[0,size]
|
183
|
+
end
|
184
|
+
def jump(address)
|
185
|
+
@ip = address.address
|
186
|
+
end
|
187
|
+
def success?
|
188
|
+
!@error
|
189
|
+
end
|
190
|
+
def call(adr)
|
191
|
+
@calls.push @ip
|
192
|
+
jump(adr)
|
193
|
+
end
|
194
|
+
def call_ret
|
195
|
+
@ip = @calls.pop
|
196
|
+
fail "One pop too many - empty call stack in #call_ret." unless @ip
|
197
|
+
end
|
198
|
+
def set_error(error)
|
199
|
+
@error = error
|
200
|
+
end
|
201
|
+
def clear_error
|
202
|
+
@error = nil
|
203
|
+
end
|
204
|
+
attr_reader :error
|
205
|
+
def stop
|
206
|
+
@stop = true
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Parslet
|
2
|
+
# An internal class that allows delaying the construction of error messages
|
3
|
+
# (as strings) until we really need to print them.
|
4
|
+
#
|
5
|
+
class Cause < Struct.new(:message, :source, :pos) # :nodoc:
|
6
|
+
# Appends 'at line ... char ...' to the string given. Use +pos+ to
|
7
|
+
# override the position of the +source+. This method returns an object
|
8
|
+
# that can be turned into a string using #to_s.
|
9
|
+
#
|
10
|
+
def self.format(source, pos, str)
|
11
|
+
self.new(str, source, pos)
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
line, column = source.line_and_column(pos)
|
16
|
+
# Allow message to be a list of objects. Join them here, since we now
|
17
|
+
# really need it.
|
18
|
+
Array(message).map { |o|
|
19
|
+
o.respond_to?(:to_slice) ?
|
20
|
+
o.str.inspect :
|
21
|
+
o.to_s }.join + " at line #{line} char #{column}."
|
22
|
+
end
|
23
|
+
|
24
|
+
# Signals to the outside that the parse has failed. Use this in
|
25
|
+
# conjunction with .format for nice error messages.
|
26
|
+
#
|
27
|
+
def raise(exception_klass=Parslet::ParseFailed)
|
28
|
+
exception = exception_klass.new(self.to_s, self)
|
29
|
+
Kernel.raise exception
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns an ascii tree representation of the causes of this node and its
|
33
|
+
# children.
|
34
|
+
#
|
35
|
+
def ascii_tree
|
36
|
+
StringIO.new.tap { |io|
|
37
|
+
recursive_ascii_tree(self, io, [true]) }.
|
38
|
+
string
|
39
|
+
end
|
40
|
+
|
41
|
+
def children
|
42
|
+
@children ||= Array.new
|
43
|
+
end
|
44
|
+
private
|
45
|
+
def recursive_ascii_tree(node, stream, curved) # :nodoc:
|
46
|
+
append_prefix(stream, curved)
|
47
|
+
stream.puts node.to_s
|
48
|
+
|
49
|
+
node.children.each do |child|
|
50
|
+
last_child = (node.children.last == child)
|
51
|
+
|
52
|
+
recursive_ascii_tree(child, stream, curved + [last_child])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
def append_prefix(stream, curved) # :nodoc:
|
56
|
+
curved[0..-2].each do |c|
|
57
|
+
stream.print c ? " " : "| "
|
58
|
+
end
|
59
|
+
stream.print curved.last ? "`- " : "|- "
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/parslet/export.rb
CHANGED
@@ -34,7 +34,7 @@ class Parslet::Parser
|
|
34
34
|
join(' ') <<
|
35
35
|
')'
|
36
36
|
end
|
37
|
-
def visit_repetition(min, max, parslet)
|
37
|
+
def visit_repetition(tag, min, max, parslet)
|
38
38
|
parslet.accept(self) << "#{min}*#{max}"
|
39
39
|
end
|
40
40
|
def visit_alternative(alternatives)
|
@@ -52,7 +52,7 @@ class Parslet::Parser
|
|
52
52
|
end
|
53
53
|
|
54
54
|
class Treetop < Citrus
|
55
|
-
def visit_repetition(min, max, parslet)
|
55
|
+
def visit_repetition(tag, min, max, parslet)
|
56
56
|
parslet.accept(self) << "#{min}..#{max}"
|
57
57
|
end
|
58
58
|
|
data/lib/parslet/rig/rspec.rb
CHANGED
@@ -1,40 +1,42 @@
|
|
1
1
|
RSpec::Matchers.define(:parse) do |input, opts|
|
2
|
+
as = block = nil
|
3
|
+
result = trace = nil
|
2
4
|
match do |parser|
|
3
5
|
begin
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
(
|
6
|
+
result = parser.parse(input)
|
7
|
+
block ?
|
8
|
+
block.call(result) :
|
9
|
+
(as == result || as.nil?)
|
8
10
|
rescue Parslet::ParseFailed
|
9
|
-
|
11
|
+
trace = parser.error_tree.ascii_tree if opts && opts[:trace]
|
10
12
|
false
|
11
13
|
end
|
12
14
|
end
|
13
15
|
|
14
16
|
failure_message_for_should do |is|
|
15
|
-
if
|
17
|
+
if block
|
16
18
|
"expected output of parsing #{input.inspect}" <<
|
17
19
|
" with #{is.inspect} to meet block conditions, but it didn't"
|
18
20
|
else
|
19
21
|
"expected " <<
|
20
|
-
(
|
22
|
+
(as ?
|
21
23
|
"output of parsing #{input.inspect}"<<
|
22
|
-
" with #{is.inspect} to equal #{
|
24
|
+
" with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
|
23
25
|
"#{is.inspect} to be able to parse #{input.inspect}") <<
|
24
|
-
(
|
25
|
-
"\n"
|
26
|
+
(trace ?
|
27
|
+
"\n"+trace :
|
26
28
|
'')
|
27
29
|
end
|
28
30
|
end
|
29
31
|
|
30
32
|
failure_message_for_should_not do |is|
|
31
|
-
if
|
33
|
+
if block
|
32
34
|
"expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
|
33
35
|
else
|
34
36
|
"expected " <<
|
35
|
-
(
|
37
|
+
(as ?
|
36
38
|
"output of parsing #{input.inspect}"<<
|
37
|
-
" with #{is.inspect} not to equal #{
|
39
|
+
" with #{is.inspect} not to equal #{as.inspect}" :
|
38
40
|
|
39
41
|
"#{is.inspect} to not parse #{input.inspect}, but it did")
|
40
42
|
end
|
@@ -42,9 +44,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
|
|
42
44
|
|
43
45
|
# NOTE: This has a nodoc tag since the rdoc parser puts this into
|
44
46
|
# Object, a thing I would never allow.
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
self
|
47
|
+
chain :as do |expected_output, &block|
|
48
|
+
as = expected_output
|
49
|
+
block = block
|
49
50
|
end
|
50
51
|
end
|
data/lib/parslet/source.rb
CHANGED
@@ -3,65 +3,83 @@ require 'stringio'
|
|
3
3
|
|
4
4
|
require 'parslet/source/line_cache'
|
5
5
|
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
io
|
14
|
-
|
6
|
+
module Parslet
|
7
|
+
# Wraps the input IO to parslet. The interface defined by this class is
|
8
|
+
# smaller than what IO offers, but enhances it with a #column and #line
|
9
|
+
# method for the current position.
|
10
|
+
#
|
11
|
+
class Source
|
12
|
+
def initialize(io)
|
13
|
+
if io.respond_to? :to_str
|
14
|
+
io = StringIO.new(io)
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
@io = io
|
18
|
+
@line_cache = LineCache.new
|
19
|
+
end
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
# Reads n bytes from the input and returns a Range instance. If the n
|
22
|
+
# bytes end in the middle of a multibyte representation of a char, that
|
23
|
+
# char is returned fully.
|
24
|
+
#
|
25
|
+
# Example:
|
26
|
+
# source.read(1) # always returns at least one valid char
|
27
|
+
# source.read(7) # reads 7 bytes, then to the next char boundary.
|
28
|
+
#
|
29
|
+
def read(n)
|
30
|
+
raise ArgumentError, "Cannot read < 1 characters at a time." if n < 1
|
31
|
+
read_slice(n)
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
def eof?
|
35
|
+
@io.eof?
|
36
|
+
end
|
37
|
+
def pos
|
38
|
+
@io.pos
|
39
|
+
end
|
40
|
+
def pos=(new_pos)
|
41
|
+
@io.pos = new_pos
|
42
|
+
end
|
36
43
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
# Returns a <line, column> tuple for the given position. If no position is
|
45
|
+
# given, line/column information is returned for the current position given
|
46
|
+
# by #pos.
|
47
|
+
#
|
48
|
+
def line_and_column(position=nil)
|
49
|
+
@line_cache.line_and_column(position || self.pos)
|
50
|
+
end
|
44
51
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
52
|
+
# Formats an error cause at the current position or at the position given
|
53
|
+
# by pos. If pos is nil, the current source position will be the error
|
54
|
+
# position.
|
55
|
+
#
|
56
|
+
def error(message, error_pos=nil)
|
57
|
+
real_pos = (error_pos||self.pos)
|
58
|
+
|
59
|
+
Cause.format(self, real_pos, message)
|
60
|
+
end
|
55
61
|
|
56
|
-
|
62
|
+
private
|
57
63
|
def read_slice(needed)
|
58
64
|
start = @io.pos
|
59
|
-
buf = @io.
|
65
|
+
buf = @io.gets(nil, needed)
|
60
66
|
|
61
67
|
# cache line ends
|
62
68
|
@line_cache.scan_for_line_endings(start, buf)
|
63
|
-
|
69
|
+
|
64
70
|
Parslet::Slice.new(buf || '', start, @line_cache)
|
65
71
|
end
|
72
|
+
|
73
|
+
if RUBY_VERSION !~ /^1.9/
|
74
|
+
def read_slice(needed)
|
75
|
+
start = @io.pos
|
76
|
+
buf = @io.read(needed)
|
77
|
+
|
78
|
+
# cache line ends
|
79
|
+
@line_cache.scan_for_line_endings(start, buf)
|
80
|
+
|
81
|
+
Parslet::Slice.new(buf || '', start, @line_cache)
|
82
|
+
end
|
83
|
+
end
|
66
84
|
end
|
67
|
-
end
|
85
|
+
end
|