ruby_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ruby_parser might be problematic. Click here for more details.
- data/.autotest +21 -0
- data/History.txt +5 -0
- data/Manifest.txt +9 -0
- data/README.txt +64 -0
- data/Rakefile +56 -0
- data/lib/ruby_lexer.rb +2751 -0
- data/lib/ruby_parser.rb +5987 -0
- data/lib/ruby_parser.y +1648 -0
- data/test/test_ruby_lexer.rb +398 -0
- data/test/test_ruby_parser.rb +326 -0
- metadata +83 -0
data/.autotest
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
Autotest.add_hook :initialize do |at|
|
4
|
+
at.extra_files << "../../ParseTree/dev/test/pt_testcase.rb"
|
5
|
+
at.libs << ":../../ParseTree/dev/lib:../../ParseTree/dev/test"
|
6
|
+
at.exceptions << 'unit'
|
7
|
+
|
8
|
+
at.unit_diff = "unit_diff -u -b"
|
9
|
+
|
10
|
+
at.add_mapping(/^lib\/.*\.y$/) do |f, _|
|
11
|
+
at.files_matching %r%^test/.*#{File.basename(f, '.y').gsub '_', '_?'}.rb$%
|
12
|
+
end
|
13
|
+
|
14
|
+
at.add_mapping(/pt_testcase.rb/) do |f, _|
|
15
|
+
at.files_matching(/^test.*rb$/)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
Autotest.add_hook :run_command do |at|
|
20
|
+
system "rake parser"
|
21
|
+
end
|
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
ruby_parser
|
2
|
+
by Ryan Davis
|
3
|
+
http://parsetree.rubyforge.org/
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
|
8
|
+
racc--which does by default use a C extension). RP's output is
|
9
|
+
the same as ParseTree's output: s-expressions using ruby's arrays and
|
10
|
+
base types.
|
11
|
+
|
12
|
+
== FEATURES/PROBLEMS:
|
13
|
+
|
14
|
+
* Pure ruby, no compiles.
|
15
|
+
* Incredibly simple interface.
|
16
|
+
* Output is 100% equivalent to ParseTree.
|
17
|
+
* Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
|
18
|
+
* Known Issue: Speed sucks currently. 5500 tests currently run in 21 min.
|
19
|
+
* Known Issue: Code is waaay ugly. Port of a port. Not my fault. Will fix RSN.
|
20
|
+
* Known Issue: I don't currently support newline nodes.
|
21
|
+
* Known Issue: Totally awesome.
|
22
|
+
* Known Issue: dasgn_curr decls can be out of order from ParseTree's.
|
23
|
+
* TODO: Add comment nodes.
|
24
|
+
|
25
|
+
== SYNOPSIS:
|
26
|
+
|
27
|
+
RubyParser.new.parse "1+1"
|
28
|
+
# => s(:call, s(:lit, 1), :+, s(:array, s(:lit, 1)))
|
29
|
+
|
30
|
+
== REQUIREMENTS:
|
31
|
+
|
32
|
+
* ruby. woot.
|
33
|
+
* ParseTree is needed for Sexp class... crap. I might break that out.
|
34
|
+
* ParseTree for testing.
|
35
|
+
* racc full package for parser development.
|
36
|
+
|
37
|
+
== INSTALL:
|
38
|
+
|
39
|
+
* sudo gem install ruby_parser
|
40
|
+
|
41
|
+
== LICENSE:
|
42
|
+
|
43
|
+
(The MIT License)
|
44
|
+
|
45
|
+
Copyright (c) 2007 Ryan Davis
|
46
|
+
|
47
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
48
|
+
a copy of this software and associated documentation files (the
|
49
|
+
'Software'), to deal in the Software without restriction, including
|
50
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
51
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
52
|
+
permit persons to whom the Software is furnished to do so, subject to
|
53
|
+
the following conditions:
|
54
|
+
|
55
|
+
The above copyright notice and this permission notice shall be
|
56
|
+
included in all copies or substantial portions of the Software.
|
57
|
+
|
58
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
59
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
60
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
61
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
62
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
63
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
64
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require './lib/ruby_lexer.rb'
|
6
|
+
|
7
|
+
hoe = Hoe.new('ruby_parser', RubyParser::VERSION) do |p|
|
8
|
+
p.rubyforge_name = 'parsetree'
|
9
|
+
p.author = 'Ryan Davis'
|
10
|
+
p.email = 'ryand-ruby@zenspider.com'
|
11
|
+
p.summary = p.paragraphs_of('README.txt', 2).join("\n\n")
|
12
|
+
p.description = p.paragraphs_of('README.txt', 2..6).join("\n\n")
|
13
|
+
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[-1]
|
14
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
15
|
+
p.extra_deps << 'ParseTree'
|
16
|
+
end
|
17
|
+
|
18
|
+
hoe.spec.files += ['lib/ruby_parser.rb'] # jim.... cmon man
|
19
|
+
|
20
|
+
module Rake::TaskManager
|
21
|
+
def all_tasks
|
22
|
+
@tasks
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Rake.application.all_tasks["default"].prerequisites.clear
|
27
|
+
|
28
|
+
task :default => :parser
|
29
|
+
task :test => :parser
|
30
|
+
|
31
|
+
path = "pkg/ruby_parser-#{RubyParser::VERSION}"
|
32
|
+
task path => :parser do
|
33
|
+
Dir.chdir path do
|
34
|
+
sh "rake parser"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
task :parser => ["lib/ruby_parser.rb"]
|
39
|
+
|
40
|
+
rule '.rb' => '.y' do |t|
|
41
|
+
sh "racc -g -o #{t.name} #{t.source}"
|
42
|
+
end
|
43
|
+
|
44
|
+
task :clean do
|
45
|
+
rm_f(Dir["**/*~"] +
|
46
|
+
Dir["**/*.diff"] +
|
47
|
+
Dir["lib/ruby_parser.rb"] +
|
48
|
+
Dir["lib/*.output"])
|
49
|
+
end
|
50
|
+
|
51
|
+
# require 'rcov/rcovtask'
|
52
|
+
# Rcov::RcovTask.new do |t|
|
53
|
+
# t.test_files = FileList['test/test_ruby_lexer.rb']
|
54
|
+
# end
|
55
|
+
|
56
|
+
# vim: syntax=Ruby
|
data/lib/ruby_lexer.rb
ADDED
@@ -0,0 +1,2751 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'stringio'
|
3
|
+
require 'racc/parser'
|
4
|
+
$: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
|
5
|
+
require 'sexp'
|
6
|
+
|
7
|
+
############################################################
|
8
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
9
|
+
|
10
|
+
class Module
|
11
|
+
def kill *methods
|
12
|
+
methods.each do |method|
|
13
|
+
define_method method do |*args|
|
14
|
+
c = caller
|
15
|
+
raise "#{method} is dead - called from #{c[0]}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# END HACK
|
22
|
+
############################################################
|
23
|
+
|
24
|
+
class RubyParser < Racc::Parser
|
25
|
+
VERSION = '1.0.0'
|
26
|
+
|
27
|
+
attr_accessor :lexer, :in_def, :in_single, :file
|
28
|
+
attr_reader :env, :warnings
|
29
|
+
|
30
|
+
def initialize
|
31
|
+
super
|
32
|
+
self.lexer = RubyLexer.new
|
33
|
+
self.in_def = false
|
34
|
+
self.in_single = 0
|
35
|
+
@env = Environment.new
|
36
|
+
end
|
37
|
+
|
38
|
+
alias :old_yyerror :yyerror
|
39
|
+
def yyerror msg=nil
|
40
|
+
warn msg if msg
|
41
|
+
old_yyerror
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse(str, file = "(string)")
|
45
|
+
raise "bad val: #{str.inspect}" unless String === str
|
46
|
+
|
47
|
+
self.file = file
|
48
|
+
self.lexer.src = StringIO.new(str)
|
49
|
+
|
50
|
+
@yydebug = ENV.has_key? 'DEBUG'
|
51
|
+
|
52
|
+
do_parse
|
53
|
+
end
|
54
|
+
|
55
|
+
def do_parse
|
56
|
+
_racc_do_parse_rb(_racc_setup, false)
|
57
|
+
end
|
58
|
+
|
59
|
+
def yyparse(recv, mid)
|
60
|
+
_racc_yyparse_rb(recv, mid, _racc_setup, true)
|
61
|
+
end
|
62
|
+
|
63
|
+
def on_error( error_token_id, error_value, value_stack )
|
64
|
+
p :error => [ error_token_id, error_value, value_stack ]
|
65
|
+
raise "boom"
|
66
|
+
end if ENV["DEBUG"]
|
67
|
+
|
68
|
+
def next_token
|
69
|
+
if self.lexer.advance then
|
70
|
+
[self.lexer.token, self.lexer.yacc_value]
|
71
|
+
else
|
72
|
+
return [false, '$end']
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def assignable(lhs, value = nil)
|
77
|
+
id = lhs.to_sym
|
78
|
+
id = id.to_sym if Token === id
|
79
|
+
|
80
|
+
raise SyntaxError, "Can't change the value of #{id}" if
|
81
|
+
id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
|
82
|
+
|
83
|
+
result = case id.to_s
|
84
|
+
when /^@@/ then
|
85
|
+
asgn = in_def || in_single > 0
|
86
|
+
s((asgn ? :cvasgn : :cvdecl), id)
|
87
|
+
when /^@/ then
|
88
|
+
s(:iasgn, id)
|
89
|
+
when /^\$/ then
|
90
|
+
s(:gasgn, id)
|
91
|
+
when /^[A-Z]/ then
|
92
|
+
s(:cdecl, id)
|
93
|
+
else
|
94
|
+
|
95
|
+
case self.env[id]
|
96
|
+
when :lvar then
|
97
|
+
s(:lasgn, id)
|
98
|
+
when :dvar, nil then
|
99
|
+
if self.env.current[id] == :dvar then
|
100
|
+
s(:dasgn_curr, id)
|
101
|
+
elsif self.env[id] == :dvar then
|
102
|
+
self.env.use(id)
|
103
|
+
s(:dasgn, id)
|
104
|
+
elsif ! self.env.dynamic? then
|
105
|
+
s(:lasgn, id)
|
106
|
+
else
|
107
|
+
s(:dasgn_curr, id)
|
108
|
+
end
|
109
|
+
# if env.dynamic? then
|
110
|
+
# if env.dasgn_curr? id then
|
111
|
+
# s(:dasgn_curr, id)
|
112
|
+
# else
|
113
|
+
# s(:dasgn, id)
|
114
|
+
# end
|
115
|
+
# else
|
116
|
+
# s(:lasgn, id)
|
117
|
+
# end
|
118
|
+
else
|
119
|
+
raise "wtf?"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
self.env[id] = (self.env.dynamic? ? :dvar : :lvar) unless self.env[id]
|
124
|
+
|
125
|
+
result << value if value
|
126
|
+
|
127
|
+
return result
|
128
|
+
end
|
129
|
+
|
130
|
+
def warnings= warnings
|
131
|
+
@warnings = warnings
|
132
|
+
|
133
|
+
self.lexer.warnings = warnings
|
134
|
+
end
|
135
|
+
|
136
|
+
def arg_add(node1, node2)
|
137
|
+
return s(:array, node2) unless node1
|
138
|
+
return node1 << node2 if node1[0] == :array
|
139
|
+
return s(:argspush, node1, node2)
|
140
|
+
end
|
141
|
+
|
142
|
+
def node_assign(lhs, rhs)
|
143
|
+
return nil unless lhs
|
144
|
+
|
145
|
+
rhs = value_expr rhs
|
146
|
+
|
147
|
+
case lhs[0]
|
148
|
+
when :gasgn, :iasgn, :lasgn, :dasgn, :dasgn_curr,
|
149
|
+
:masgn, :cdecl, :cvdecl, :cvasgn then
|
150
|
+
lhs << rhs
|
151
|
+
when :attrasgn, :call then
|
152
|
+
args = lhs.array(true) || lhs.argscat(true) || lhs.splat(true) # FIX: fragile
|
153
|
+
# args = case lhs[1][1]
|
154
|
+
# when :array, :argscat, :splat then
|
155
|
+
# lhs.delete_at 1
|
156
|
+
# else
|
157
|
+
# nil # TODO: check - no clue what it should be, or even if
|
158
|
+
# end
|
159
|
+
|
160
|
+
lhs << arg_add(args, rhs)
|
161
|
+
end
|
162
|
+
|
163
|
+
lhs
|
164
|
+
end
|
165
|
+
|
166
|
+
def gettable(id)
|
167
|
+
id = id.to_sym if Token === id # HACK
|
168
|
+
id = id.last.to_sym if Sexp === id # HACK
|
169
|
+
id = id.to_sym if String === id # HACK
|
170
|
+
|
171
|
+
return s(:self) if id == :self
|
172
|
+
return s(:nil) if id == :nil
|
173
|
+
return s(:true) if id == :true
|
174
|
+
return s(:false) if id == :false
|
175
|
+
return s(:str, self.file) if id == :"__FILE__"
|
176
|
+
return s(:lit, lexer.src.current_line) if id == :"__LINE__"
|
177
|
+
|
178
|
+
result = case id.to_s
|
179
|
+
when /^@@/ then
|
180
|
+
s(:cvar, id)
|
181
|
+
when /^@/ then
|
182
|
+
s(:ivar, id)
|
183
|
+
when /^\$/ then
|
184
|
+
s(:gvar, id)
|
185
|
+
when /^[A-Z]/ then
|
186
|
+
s(:const, id)
|
187
|
+
else
|
188
|
+
type = env[id]
|
189
|
+
if type then
|
190
|
+
s(type, id)
|
191
|
+
elsif env.dynamic? and :dvar == env[id] then
|
192
|
+
s(:dvar, id)
|
193
|
+
else
|
194
|
+
s(:vcall, id)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
return result if result
|
199
|
+
|
200
|
+
raise "identifier #{id.inspect} is not valid"
|
201
|
+
end
|
202
|
+
|
203
|
+
def block_append(head, tail, strip_tail_block=false)
|
204
|
+
return head unless tail
|
205
|
+
return tail unless head
|
206
|
+
|
207
|
+
case head[0]
|
208
|
+
when :lit, :str then
|
209
|
+
return tail
|
210
|
+
end
|
211
|
+
|
212
|
+
head = remove_begin(head)
|
213
|
+
head = s(:block, head) unless head[0] == :block
|
214
|
+
|
215
|
+
if strip_tail_block and Sexp === tail and tail[0] == :block then
|
216
|
+
head.push(*tail.values)
|
217
|
+
else
|
218
|
+
head << tail
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def new_yield(node)
|
223
|
+
if node then
|
224
|
+
raise SyntaxError, "Block argument should not be given." if
|
225
|
+
node.node_type == :block_pass
|
226
|
+
|
227
|
+
node = node.last if node.node_type == :array and node.size == 2
|
228
|
+
end
|
229
|
+
|
230
|
+
return s(:yield, node)
|
231
|
+
end
|
232
|
+
|
233
|
+
def logop(type, left, right)
|
234
|
+
left = value_expr left
|
235
|
+
|
236
|
+
if left and left[0] == type and not left.paren then
|
237
|
+
node, second = left, nil
|
238
|
+
|
239
|
+
while (second = node[2]) && second[0] == type and not second.paren do
|
240
|
+
node = second
|
241
|
+
end
|
242
|
+
|
243
|
+
node[2] = s(type, second, right)
|
244
|
+
|
245
|
+
return left
|
246
|
+
end
|
247
|
+
|
248
|
+
return s(type, left, right)
|
249
|
+
end
|
250
|
+
|
251
|
+
def new_call recv, meth, args = nil # REFACTOR - merge with fcall
|
252
|
+
if args && args[0] == :block_pass then
|
253
|
+
new_args = args.array(true) || args.argscat(true) || args.splat(true)
|
254
|
+
call = s(:call, recv, meth)
|
255
|
+
call << new_args if new_args
|
256
|
+
args << call
|
257
|
+
|
258
|
+
return args
|
259
|
+
end
|
260
|
+
result = s(:call, recv, meth)
|
261
|
+
result << args if args
|
262
|
+
result
|
263
|
+
end
|
264
|
+
|
265
|
+
def new_fcall meth, args
|
266
|
+
if args and args[0] == :block_pass then
|
267
|
+
new_args = args.array(true) || args.argscat(true) || args.splat(true)
|
268
|
+
call = s(:fcall, meth)
|
269
|
+
call << new_args if new_args
|
270
|
+
args << call
|
271
|
+
return args
|
272
|
+
end
|
273
|
+
|
274
|
+
r = s(:fcall, meth)
|
275
|
+
r << args if args and args != s(:array)
|
276
|
+
r
|
277
|
+
end
|
278
|
+
|
279
|
+
def arg_blk_pass node1, node2
|
280
|
+
if node2 then
|
281
|
+
node2.insert 1, node1
|
282
|
+
return node2
|
283
|
+
else
|
284
|
+
node1
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def get_match_node lhs, rhs
|
289
|
+
if lhs then
|
290
|
+
case lhs[0]
|
291
|
+
when :dregx, :dregx_once then
|
292
|
+
return s(:match2, lhs, rhs)
|
293
|
+
when :lit then
|
294
|
+
return s(:match2, lhs, rhs) if Regexp === lhs.last
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
if rhs then
|
299
|
+
case rhs[0]
|
300
|
+
when :dregx, :dregx_once then
|
301
|
+
return s(:match3, rhs, lhs)
|
302
|
+
when :lit then
|
303
|
+
return s(:match3, rhs, lhs) if Regexp === rhs.last
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
return s(:call, lhs, :"=~", s(:array, rhs))
|
308
|
+
end
|
309
|
+
|
310
|
+
def cond node
|
311
|
+
return nil if node.nil?
|
312
|
+
node = value_expr node
|
313
|
+
|
314
|
+
case node.first
|
315
|
+
when :dregex then
|
316
|
+
return s(:match2, node, s(:gvar, "$_".to_sym))
|
317
|
+
when :regex then
|
318
|
+
return s(:match, node)
|
319
|
+
when :lit then
|
320
|
+
if Regexp === node.last then
|
321
|
+
return s(:match, node)
|
322
|
+
else
|
323
|
+
return node
|
324
|
+
end
|
325
|
+
when :and then
|
326
|
+
return s(:and, cond(node[1]), cond(node[2]))
|
327
|
+
when :or then
|
328
|
+
return s(:or, cond(node[1]), cond(node[2]))
|
329
|
+
when :dot2 then
|
330
|
+
label = "flip#{node.hash}"
|
331
|
+
env[label] = self.env.dynamic? ? :dvar : :lvar
|
332
|
+
return s(:flip2, node[1], node[2])
|
333
|
+
when :dot3 then
|
334
|
+
label = "flip#{node.hash}"
|
335
|
+
env[label] = self.env.dynamic? ? :dvar : :lvar
|
336
|
+
return s(:flip3, node[1], node[2])
|
337
|
+
else
|
338
|
+
return node
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
def append_to_block head, tail # FIX: wtf is this?!? switch to block_append
|
343
|
+
return head if tail.nil?
|
344
|
+
return tail if head.nil?
|
345
|
+
|
346
|
+
head = s(:block, head) unless head.first == :block
|
347
|
+
head << tail
|
348
|
+
end
|
349
|
+
|
350
|
+
def new_super args
|
351
|
+
if args && args.first == :block_pass then
|
352
|
+
t, body, bp = args
|
353
|
+
result = s(t, bp, s(:super, body))
|
354
|
+
else
|
355
|
+
result = s(:super)
|
356
|
+
result << args if args and args != s(:array)
|
357
|
+
end
|
358
|
+
result
|
359
|
+
end
|
360
|
+
|
361
|
+
def aryset receiver, index
|
362
|
+
s(:attrasgn, receiver, :"[]=", index)
|
363
|
+
end
|
364
|
+
|
365
|
+
def arg_concat node1, node2
|
366
|
+
return node2.nil? ? node1 : s(:argscat, node1, node2)
|
367
|
+
end
|
368
|
+
|
369
|
+
def list_append list, item # TODO: nuke me *sigh*
|
370
|
+
return s(:array, item) unless list
|
371
|
+
list << item
|
372
|
+
end
|
373
|
+
|
374
|
+
def literal_concat head, tail
|
375
|
+
return tail unless head
|
376
|
+
return head unless tail
|
377
|
+
|
378
|
+
htype, ttype = head[0], tail[0]
|
379
|
+
|
380
|
+
head = s(:dstr, '', head) if htype == :evstr
|
381
|
+
|
382
|
+
case ttype
|
383
|
+
when :str then
|
384
|
+
if htype == :str
|
385
|
+
head[-1] << tail[-1]
|
386
|
+
elsif htype == :dstr and head.size == 2 then
|
387
|
+
head[-1] << tail[-1]
|
388
|
+
else
|
389
|
+
head << tail
|
390
|
+
end
|
391
|
+
when :dstr then
|
392
|
+
if htype == :str then
|
393
|
+
tail[1] = head[-1] + tail[1]
|
394
|
+
head = tail
|
395
|
+
else
|
396
|
+
tail[0] = :array
|
397
|
+
tail[1] = s(:str, tail[1])
|
398
|
+
tail.delete_at 1 if tail[1] == s(:str, '')
|
399
|
+
|
400
|
+
head.push(*tail[1..-1])
|
401
|
+
end
|
402
|
+
when :evstr then
|
403
|
+
head[0] = :dstr if htype == :str
|
404
|
+
if head.size == 2 and tail[1][0] == :str then
|
405
|
+
head[-1] << tail[1][-1]
|
406
|
+
head[0] = :str if head.size == 2 # HACK ?
|
407
|
+
else
|
408
|
+
head.push(tail)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
return head
|
413
|
+
end
|
414
|
+
|
415
|
+
def remove_begin node
|
416
|
+
node = node[-1] if node and node[0] == :begin and node.size == 2
|
417
|
+
node
|
418
|
+
end
|
419
|
+
|
420
|
+
def ret_args node
|
421
|
+
if node then
|
422
|
+
if node[0] == :block_pass then
|
423
|
+
raise SyntaxError, "block argument should not be given"
|
424
|
+
end
|
425
|
+
|
426
|
+
node = node.last if node[0] == :array && node.size == 2
|
427
|
+
node = s(:svalue, node) if node[0] == :splat and not node.paren # HACK matz wraps ONE of the FOUR splats in a newline to distinguish. I use paren for now. ugh
|
428
|
+
end
|
429
|
+
|
430
|
+
node
|
431
|
+
end
|
432
|
+
|
433
|
+
def value_expr node # HACK
|
434
|
+
node = remove_begin node
|
435
|
+
node[2] = value_expr(node[2]) if node and node[0] == :if
|
436
|
+
node
|
437
|
+
end
|
438
|
+
|
439
|
+
def void_stmts node
|
440
|
+
return nil unless node
|
441
|
+
return node unless node[0] == :block
|
442
|
+
|
443
|
+
node[1..-2] = node[1..-2].map { |n| remove_begin(n) }
|
444
|
+
node
|
445
|
+
end
|
446
|
+
|
447
|
+
############################################################
|
448
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
449
|
+
|
450
|
+
def dyna_init body, known_vars = []
|
451
|
+
var = nil
|
452
|
+
vars = self.env.dynamic.keys - known_vars
|
453
|
+
|
454
|
+
vars.each do |id|
|
455
|
+
if self.env.used? id then
|
456
|
+
var = s(:dasgn_curr, id, var).compact
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
self.block_append(var, body, body && body[0] == :block)
|
461
|
+
end
|
462
|
+
|
463
|
+
def warning s
|
464
|
+
# do nothing for now
|
465
|
+
end
|
466
|
+
|
467
|
+
kill :is_in_def, :is_in_single, :push_local_scope, :pop_local_scope, :support
|
468
|
+
|
469
|
+
# END HACK
|
470
|
+
############################################################$
|
471
|
+
|
472
|
+
end
|
473
|
+
|
474
|
+
class RubyLexer
|
475
|
+
attr_accessor :command_start
|
476
|
+
attr_accessor :cmdarg
|
477
|
+
attr_accessor :cond
|
478
|
+
attr_accessor :nest
|
479
|
+
|
480
|
+
# Additional context surrounding tokens that both the lexer and
|
481
|
+
# grammar use.
|
482
|
+
attr_reader :lex_state
|
483
|
+
|
484
|
+
def lex_state= o
|
485
|
+
raise "wtf?" unless Symbol === o
|
486
|
+
@lex_state = o
|
487
|
+
end
|
488
|
+
|
489
|
+
attr_accessor :end_seen # TODO: figure out if I really need this
|
490
|
+
|
491
|
+
attr_accessor :lex_strterm
|
492
|
+
|
493
|
+
# Used for tiny smidgen of grammar in lexer
|
494
|
+
attr_accessor :parser_support # TODO: remove
|
495
|
+
|
496
|
+
# Stream of data that yylex examines.
|
497
|
+
attr_accessor :src
|
498
|
+
|
499
|
+
# Last token read via yylex.
|
500
|
+
attr_accessor :token
|
501
|
+
|
502
|
+
# Tempory buffer to build up a potential token. Consumer takes
|
503
|
+
# responsibility to reset this before use.
|
504
|
+
attr_accessor :token_buffer
|
505
|
+
|
506
|
+
# Value of last token which had a value associated with it.
|
507
|
+
attr_accessor :yacc_value
|
508
|
+
|
509
|
+
# What handles warnings
|
510
|
+
attr_accessor :warnings
|
511
|
+
|
512
|
+
# TODO: remove all of these
|
513
|
+
alias :source= :src=
|
514
|
+
alias :str_term :lex_strterm
|
515
|
+
alias :str_term= :lex_strterm=
|
516
|
+
alias :state :lex_state
|
517
|
+
alias :state= :lex_state=
|
518
|
+
alias :value :yacc_value
|
519
|
+
alias :value= :yacc_value=
|
520
|
+
alias :getCmdArgumentState :cmdarg
|
521
|
+
|
522
|
+
# Give a name to a value. Enebo: This should be used more.
|
523
|
+
# HACK OMG HORRIBLE KILL ME NOW. Enebo, no. this shouldn't be used more
|
524
|
+
EOF = nil # was 0... ugh
|
525
|
+
|
526
|
+
# ruby constants for strings (should this be moved somewhere else?)
|
527
|
+
STR_FUNC_ESCAPE=0x01
|
528
|
+
STR_FUNC_EXPAND=0x02
|
529
|
+
STR_FUNC_REGEXP=0x04
|
530
|
+
STR_FUNC_QWORDS=0x08
|
531
|
+
STR_FUNC_SYMBOL=0x10
|
532
|
+
STR_FUNC_INDENT=0x20 # <<-HEREDOC
|
533
|
+
|
534
|
+
STR_SQUOTE = 0
|
535
|
+
STR_DQUOTE = STR_FUNC_EXPAND
|
536
|
+
STR_XQUOTE = STR_FUNC_EXPAND
|
537
|
+
STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
|
538
|
+
STR_SSYM = STR_FUNC_SYMBOL
|
539
|
+
STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
|
540
|
+
|
541
|
+
def initialize
|
542
|
+
self.parser_support = nil
|
543
|
+
self.token_buffer = []
|
544
|
+
self.cond = StackState.new(:cond)
|
545
|
+
self.cmdarg = StackState.new(:cmdarg)
|
546
|
+
self.nest = 0
|
547
|
+
self.end_seen = false
|
548
|
+
|
549
|
+
reset
|
550
|
+
end
|
551
|
+
|
552
|
+
def reset
|
553
|
+
self.token = nil
|
554
|
+
self.yacc_value = nil
|
555
|
+
self.src = nil
|
556
|
+
@lex_state = nil
|
557
|
+
self.lex_strterm = nil
|
558
|
+
self.command_start = true
|
559
|
+
end
|
560
|
+
|
561
|
+
# How the parser advances to the next token.
|
562
|
+
#
|
563
|
+
# @return true if not at end of file (EOF).
|
564
|
+
|
565
|
+
def advance
|
566
|
+
r = yylex
|
567
|
+
self.token = r
|
568
|
+
return r != RubyLexer::EOF
|
569
|
+
end
|
570
|
+
|
571
|
+
def parse_string(quote)
|
572
|
+
_, string_type, term, open = quote
|
573
|
+
|
574
|
+
space = false # FIX: remove these
|
575
|
+
func = string_type
|
576
|
+
paren = open
|
577
|
+
|
578
|
+
return :tSTRING_END unless func
|
579
|
+
|
580
|
+
c = src.read
|
581
|
+
|
582
|
+
if (func & STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
|
583
|
+
begin
|
584
|
+
c = src.read
|
585
|
+
break if c == RubyLexer::EOF # HACK UGH
|
586
|
+
end while String === c and c =~ /\s/
|
587
|
+
space = true
|
588
|
+
end
|
589
|
+
|
590
|
+
if c == term && self.nest == 0 then
|
591
|
+
if func & STR_FUNC_QWORDS != 0 then
|
592
|
+
quote[1] = nil
|
593
|
+
return ' '
|
594
|
+
end
|
595
|
+
unless func & STR_FUNC_REGEXP != 0 then
|
596
|
+
self.yacc_value = t(term)
|
597
|
+
return :tSTRING_END
|
598
|
+
end
|
599
|
+
self.yacc_value = self.regx_options
|
600
|
+
return :tREGEXP_END
|
601
|
+
end
|
602
|
+
|
603
|
+
if space then
|
604
|
+
src.unread c
|
605
|
+
return ' '
|
606
|
+
end
|
607
|
+
|
608
|
+
self.token_buffer = []
|
609
|
+
|
610
|
+
if (func & STR_FUNC_EXPAND) != 0 && c == '#' then
|
611
|
+
case c = src.read
|
612
|
+
when '$', '@' then
|
613
|
+
src.unread c
|
614
|
+
return :tSTRING_DVAR
|
615
|
+
when '{' then
|
616
|
+
return :tSTRING_DBEG
|
617
|
+
end
|
618
|
+
token_buffer << '#'
|
619
|
+
end
|
620
|
+
|
621
|
+
src.unread c
|
622
|
+
|
623
|
+
if tokadd_string(func, term, paren, token_buffer) == RubyLexer::EOF then
|
624
|
+
# HACK ruby_sourceline = nd_line(quote)
|
625
|
+
raise "unterminated string meets end of file"
|
626
|
+
return :tSTRING_END
|
627
|
+
end
|
628
|
+
|
629
|
+
self.yacc_value = s(:str, token_buffer.join)
|
630
|
+
return :tSTRING_CONTENT
|
631
|
+
end
|
632
|
+
|
633
|
+
def regx_options
|
634
|
+
options = []
|
635
|
+
bad = []
|
636
|
+
|
637
|
+
while c = src.read and c =~ /[a-z]/ do
|
638
|
+
case c
|
639
|
+
when /^[ixmonesu]$/ then
|
640
|
+
options << c
|
641
|
+
else
|
642
|
+
bad << c
|
643
|
+
end
|
644
|
+
end
|
645
|
+
|
646
|
+
src.unread c
|
647
|
+
|
648
|
+
rb_compile_error("unknown regexp option%s - %s" %
|
649
|
+
[(bad.size > 1 ? "s" : ""), bad.join.inspect]) unless bad.empty?
|
650
|
+
|
651
|
+
return options.join
|
652
|
+
end
|
653
|
+
|
654
|
+
def tokadd_escape term
|
655
|
+
case c = src.read
|
656
|
+
when "\n" then
|
657
|
+
return false # just ignore
|
658
|
+
when /0-7/ then # octal constant
|
659
|
+
tokadd "\\"
|
660
|
+
tokadd c
|
661
|
+
|
662
|
+
2.times do |i|
|
663
|
+
c = src.read
|
664
|
+
# HACK goto eof if (c == -1)
|
665
|
+
if c < "0" || "7" < c then
|
666
|
+
pushback c
|
667
|
+
break
|
668
|
+
end
|
669
|
+
tokadd c
|
670
|
+
end
|
671
|
+
|
672
|
+
return false
|
673
|
+
when "x" then # hex constant
|
674
|
+
tokadd "\\"
|
675
|
+
tokadd c
|
676
|
+
|
677
|
+
2.times do
|
678
|
+
c = src.read
|
679
|
+
unless c =~ /[0-9a-f]/i then # TODO error case? empty?
|
680
|
+
src.unread c
|
681
|
+
break
|
682
|
+
end
|
683
|
+
tokadd c
|
684
|
+
end
|
685
|
+
|
686
|
+
return false
|
687
|
+
when "M" then
|
688
|
+
if (c = src.read()) != "-" then
|
689
|
+
yyerror "Invalid escape character syntax"
|
690
|
+
pushback c
|
691
|
+
return false
|
692
|
+
end
|
693
|
+
tokadd "\\"
|
694
|
+
tokadd "M"
|
695
|
+
tokadd "-"
|
696
|
+
raise "not yet"
|
697
|
+
# goto escaped;
|
698
|
+
when "C" then
|
699
|
+
if (c = src.read) != "-" then
|
700
|
+
yyerror "Invalid escape character syntax"
|
701
|
+
pushback c
|
702
|
+
return false
|
703
|
+
end
|
704
|
+
tokadd "\\"
|
705
|
+
tokadd "C"
|
706
|
+
tokadd "-"
|
707
|
+
raise "not yet"
|
708
|
+
# HACK goto escaped;
|
709
|
+
when "c" then
|
710
|
+
tokadd "\\"
|
711
|
+
tokadd "c"
|
712
|
+
# HACK escaped:
|
713
|
+
if (c = src.read) == "\\" then
|
714
|
+
return tokadd_escape(term)
|
715
|
+
elsif c == -1 then
|
716
|
+
raise "no"
|
717
|
+
# HACK goto eof
|
718
|
+
end
|
719
|
+
tokadd c
|
720
|
+
return false
|
721
|
+
# HACK eof
|
722
|
+
when RubyLexer::EOF then
|
723
|
+
yyerror "Invalid escape character syntax"
|
724
|
+
return true
|
725
|
+
else
|
726
|
+
if (c != "\\" || c != term)
|
727
|
+
tokadd "\\"
|
728
|
+
end
|
729
|
+
tokadd c
|
730
|
+
end
|
731
|
+
return false
|
732
|
+
end
|
733
|
+
|
734
|
+
def read_escape
|
735
|
+
case c = src.read
|
736
|
+
when "\\" then # Backslash
|
737
|
+
return c
|
738
|
+
when "n" then # newline
|
739
|
+
return "\n"
|
740
|
+
when "t" then # horizontal tab
|
741
|
+
return "\t"
|
742
|
+
when "r" then # carriage-return
|
743
|
+
return "\r"
|
744
|
+
when "f" then # form-feed
|
745
|
+
return "\f"
|
746
|
+
when "v" then # vertical tab
|
747
|
+
return "\13"
|
748
|
+
when "a" then # alarm(bell)
|
749
|
+
return "\007"
|
750
|
+
when 'e' then # escape
|
751
|
+
return "\033"
|
752
|
+
when /[0-7]/ then # octal constant
|
753
|
+
src.unread c # TODO this seems dumb
|
754
|
+
|
755
|
+
n = 0
|
756
|
+
|
757
|
+
3.times do
|
758
|
+
c = src.read
|
759
|
+
unless c =~ /[0-7]/ then
|
760
|
+
src.unread c
|
761
|
+
break
|
762
|
+
end
|
763
|
+
n <<= 3
|
764
|
+
n |= c[0] - ?0
|
765
|
+
end
|
766
|
+
|
767
|
+
return n.chr
|
768
|
+
when "x" then # hex constant
|
769
|
+
n = 0
|
770
|
+
|
771
|
+
2.times do
|
772
|
+
c = src.read.downcase
|
773
|
+
unless c =~ /[0-9a-f]/i then
|
774
|
+
src.unread c
|
775
|
+
break
|
776
|
+
end
|
777
|
+
n <<= 4
|
778
|
+
n |= case c[0] # TODO: I'm sure there is a better way... but I'm tired
|
779
|
+
when ?a..?f then
|
780
|
+
c[0] - ?a + 10
|
781
|
+
when ?A..?F then
|
782
|
+
c[0] - ?A + 10
|
783
|
+
when ?0..?9 then
|
784
|
+
c[0] - ?0
|
785
|
+
else
|
786
|
+
raise "wtf?: #{c.inspect}"
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
return n.chr
|
791
|
+
when "b" then # backspace
|
792
|
+
return "\010"
|
793
|
+
when "s" then # space
|
794
|
+
return " "
|
795
|
+
when "M" then
|
796
|
+
c = src.read
|
797
|
+
if c != "-" then
|
798
|
+
yyerror("Invalid escape character syntax")
|
799
|
+
src.unread c
|
800
|
+
return "\0"
|
801
|
+
end
|
802
|
+
|
803
|
+
c = src.read
|
804
|
+
case c
|
805
|
+
when "\\" then
|
806
|
+
c = self.read_escape
|
807
|
+
c[0] |= 0x80
|
808
|
+
return c
|
809
|
+
when RubyLexer::EOF then
|
810
|
+
yyerror("Invalid escape character syntax");
|
811
|
+
return '\0';
|
812
|
+
else
|
813
|
+
c[0] |= 0x80
|
814
|
+
return c
|
815
|
+
end
|
816
|
+
when "C", "c" then
|
817
|
+
if (c = src.read) != "-" then
|
818
|
+
yyerror("Invalid escape character syntax")
|
819
|
+
pushback(c)
|
820
|
+
return "\0"
|
821
|
+
end if c == "C"
|
822
|
+
|
823
|
+
case c = src.read
|
824
|
+
when "\\" then
|
825
|
+
c = read_escape
|
826
|
+
when "?" then
|
827
|
+
return 0177
|
828
|
+
when RubyLexer::EOF then
|
829
|
+
yyerror("Invalid escape character syntax");
|
830
|
+
return "\0";
|
831
|
+
end
|
832
|
+
c[0] &= 0x9f
|
833
|
+
return c
|
834
|
+
when RubyLexer::EOF then
|
835
|
+
yyerror("Invalid escape character syntax")
|
836
|
+
return "\0"
|
837
|
+
else
|
838
|
+
return c
|
839
|
+
end
|
840
|
+
end
|
841
|
+
|
842
|
+
def tokadd_string(func, term, paren, buffer)
|
843
|
+
until (c = src.read) == RubyLexer::EOF do
|
844
|
+
if c == paren then
|
845
|
+
self.nest += 1
|
846
|
+
elsif c == term then
|
847
|
+
if self.nest == 0 then
|
848
|
+
src.unread c
|
849
|
+
break
|
850
|
+
end
|
851
|
+
self.nest -= 1
|
852
|
+
elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek("\n") then
|
853
|
+
c2 = src.read
|
854
|
+
|
855
|
+
if c2 == '$' || c2 == '@' || c2 == '{' then
|
856
|
+
src.unread c2
|
857
|
+
src.unread c
|
858
|
+
break
|
859
|
+
end
|
860
|
+
src.unread(c2)
|
861
|
+
elsif c == "\\" then
|
862
|
+
c = src.read
|
863
|
+
case c
|
864
|
+
when "\n" then
|
865
|
+
break if ((func & RubyLexer::STR_FUNC_QWORDS) != 0) # TODO: check break
|
866
|
+
next if ((func & RubyLexer::STR_FUNC_EXPAND) != 0)
|
867
|
+
|
868
|
+
buffer << "\\"
|
869
|
+
when "\\" then
|
870
|
+
buffer << c if (func & RubyLexer::STR_FUNC_ESCAPE) != 0
|
871
|
+
else
|
872
|
+
if (func & RubyLexer::STR_FUNC_REGEXP) != 0 then
|
873
|
+
src.unread c
|
874
|
+
tokadd_escape term
|
875
|
+
next
|
876
|
+
elsif (func & RubyLexer::STR_FUNC_EXPAND) != 0 then
|
877
|
+
src.unread c
|
878
|
+
if (func & RubyLexer::STR_FUNC_ESCAPE) != 0 then
|
879
|
+
buffer << "\\"
|
880
|
+
end
|
881
|
+
c = read_escape
|
882
|
+
elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
|
883
|
+
# ignore backslashed spaces in %w
|
884
|
+
elsif c != term && !(paren && c == paren) then
|
885
|
+
buffer << "\\"
|
886
|
+
end
|
887
|
+
end
|
888
|
+
# else if (ismbchar(c)) {
|
889
|
+
# int i, len = mbclen(c)-1;
|
890
|
+
# for (i = 0; i < len; i++) {
|
891
|
+
# tokadd(c);
|
892
|
+
# c = nextc();
|
893
|
+
# }
|
894
|
+
# }
|
895
|
+
elsif (func & RubyLexer::STR_FUNC_QWORDS) != 0 && c =~ /\s/ then
|
896
|
+
src.unread c
|
897
|
+
break
|
898
|
+
end
|
899
|
+
|
900
|
+
if c == "\0" && (func & RubyLexer::STR_FUNC_SYMBOL) != 0 then
|
901
|
+
raise SyntaxError, "symbol cannot contain '\\0'"
|
902
|
+
end
|
903
|
+
|
904
|
+
buffer << c # unless c == "\r"
|
905
|
+
end # while
|
906
|
+
|
907
|
+
return c
|
908
|
+
end
|
909
|
+
|
910
|
+
def heredoc here
|
911
|
+
_, eos, func, last_line = here
|
912
|
+
|
913
|
+
eosn = eos + "\n"
|
914
|
+
err_msg = "can't find string #{eos.inspect} anywhere before EOF"
|
915
|
+
|
916
|
+
indent = (func & RubyLexer::STR_FUNC_INDENT) != 0
|
917
|
+
str = []
|
918
|
+
|
919
|
+
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
920
|
+
|
921
|
+
if src.begin_of_line? && src.match_string(eosn, indent) then
|
922
|
+
src.unread_many last_line
|
923
|
+
self.yacc_value = t(eos)
|
924
|
+
return :tSTRING_END
|
925
|
+
end
|
926
|
+
|
927
|
+
if (func & RubyLexer::STR_FUNC_EXPAND) == 0 then
|
928
|
+
begin
|
929
|
+
str << src.read_line
|
930
|
+
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
931
|
+
end until src.match_string(eosn, indent)
|
932
|
+
else
|
933
|
+
c = src.read
|
934
|
+
buffer = []
|
935
|
+
|
936
|
+
if c == "#" then
|
937
|
+
case c = src.read
|
938
|
+
when "$", "@" then
|
939
|
+
src.unread c
|
940
|
+
self.yacc_value = t("#" + c)
|
941
|
+
return :tSTRING_DVAR
|
942
|
+
when "{" then
|
943
|
+
self.yacc_value = t("#" + c)
|
944
|
+
return :tSTRING_DBEG
|
945
|
+
end
|
946
|
+
buffer << "#"
|
947
|
+
end
|
948
|
+
|
949
|
+
src.unread c
|
950
|
+
|
951
|
+
begin
|
952
|
+
c = tokadd_string func, "\n", nil, buffer
|
953
|
+
|
954
|
+
raise SyntaxError, err_msg if c == RubyLexer::EOF
|
955
|
+
|
956
|
+
if c != "\n" then
|
957
|
+
self.yacc_value = s(:str, buffer.join)
|
958
|
+
return :tSTRING_CONTENT
|
959
|
+
end
|
960
|
+
|
961
|
+
buffer << src.read
|
962
|
+
|
963
|
+
raise SyntaxError, err_msg if src.peek == RubyLexer::EOF
|
964
|
+
end until src.match_string(eosn, indent)
|
965
|
+
|
966
|
+
str = buffer
|
967
|
+
end
|
968
|
+
|
969
|
+
src.unread_many eosn
|
970
|
+
|
971
|
+
self.lex_strterm = s(:heredoc, eos, func, last_line)
|
972
|
+
self.yacc_value = s(:str, str.join)
|
973
|
+
|
974
|
+
return :tSTRING_CONTENT
|
975
|
+
end
|
976
|
+
|
977
|
+
def parse_quote(c)
|
978
|
+
beg, nnd = nil, nil
|
979
|
+
short_hand = false
|
980
|
+
|
981
|
+
# Short-hand (e.g. %{,%.,%!,... versus %Q{).
|
982
|
+
unless c =~ /[a-z0-9]/i then
|
983
|
+
beg, c = c, 'Q'
|
984
|
+
short_hand = true
|
985
|
+
else # Long-hand (e.g. %Q{}).
|
986
|
+
short_hand = false
|
987
|
+
beg = src.read
|
988
|
+
if beg =~ /[a-z0-9]/i then
|
989
|
+
raise SyntaxError, "unknown type of %string"
|
990
|
+
end
|
991
|
+
end
|
992
|
+
|
993
|
+
if c == RubyLexer::EOF or beg == RubyLexer::EOF then
|
994
|
+
raise SyntaxError, "unterminated quoted string meets nnd of file"
|
995
|
+
end
|
996
|
+
|
997
|
+
# Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
|
998
|
+
nnd = case beg
|
999
|
+
when '(' then
|
1000
|
+
')'
|
1001
|
+
when '[' then
|
1002
|
+
']'
|
1003
|
+
when '{' then
|
1004
|
+
'}'
|
1005
|
+
when '<' then
|
1006
|
+
'>'
|
1007
|
+
else
|
1008
|
+
nnd, beg = beg, "\0"
|
1009
|
+
nnd
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
string_type, token_type = STR_DQUOTE, :tSTRING_BEG
|
1013
|
+
self.yacc_value = t("%#{c}#{beg}")
|
1014
|
+
|
1015
|
+
case (c)
|
1016
|
+
when 'Q' then
|
1017
|
+
self.yacc_value = t("%#{short_hand ? nnd : c + beg}")
|
1018
|
+
when 'q' then
|
1019
|
+
string_type, token_type = STR_SQUOTE, :tSTRING_BEG
|
1020
|
+
when 'W' then
|
1021
|
+
string_type, token_type = STR_DQUOTE | STR_FUNC_QWORDS, :tWORDS_BEG
|
1022
|
+
begin c = src.read end while c =~ /\s/
|
1023
|
+
src.unread(c)
|
1024
|
+
when 'w' then
|
1025
|
+
string_type, token_type = STR_SQUOTE | STR_FUNC_QWORDS, :tQWORDS_BEG
|
1026
|
+
begin c = src.read end while c =~ /\s/
|
1027
|
+
src.unread(c)
|
1028
|
+
when 'x' then
|
1029
|
+
string_type, token_type = STR_XQUOTE, :tXSTRING_BEG
|
1030
|
+
when 'r' then
|
1031
|
+
string_type, token_type = STR_REGEXP, :tREGEXP_BEG
|
1032
|
+
when 's' then
|
1033
|
+
string_type, token_type = STR_SSYM, :tSYMBEG
|
1034
|
+
self.lex_state = :expr_fname
|
1035
|
+
else
|
1036
|
+
raise SyntaxError, "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'."
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
self.lex_strterm = s(:strterm, string_type, nnd, beg)
|
1040
|
+
|
1041
|
+
return token_type
|
1042
|
+
end
|
1043
|
+
|
1044
|
+
def heredoc_identifier
|
1045
|
+
c = src.read
|
1046
|
+
term = 42 # HACK
|
1047
|
+
func = 0
|
1048
|
+
|
1049
|
+
if c == '-' then
|
1050
|
+
c = src.read
|
1051
|
+
func = STR_FUNC_INDENT
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
if c == "\'" || c == '"' || c == '`' then
|
1055
|
+
if c == "\'" then
|
1056
|
+
func |= STR_SQUOTE
|
1057
|
+
elsif c == '"'
|
1058
|
+
func |= STR_DQUOTE
|
1059
|
+
else
|
1060
|
+
func |= STR_XQUOTE
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
token_buffer.clear
|
1064
|
+
term = c
|
1065
|
+
|
1066
|
+
while (c = src.read) != RubyLexer::EOF && c != term
|
1067
|
+
token_buffer << c
|
1068
|
+
end
|
1069
|
+
|
1070
|
+
if c == RubyLexer::EOF then
|
1071
|
+
raise SyntaxError, "unterminated here document identifier"
|
1072
|
+
end
|
1073
|
+
else
|
1074
|
+
unless c =~ /\w/ then
|
1075
|
+
src.unread c
|
1076
|
+
src.unread '-' if (func & STR_FUNC_INDENT) != 0
|
1077
|
+
return 0 # TODO: RubyLexer::EOF?
|
1078
|
+
end
|
1079
|
+
token_buffer.clear
|
1080
|
+
term = '"'
|
1081
|
+
func |= STR_DQUOTE
|
1082
|
+
begin
|
1083
|
+
token_buffer << c
|
1084
|
+
end while (c = src.read) != RubyLexer::EOF && c =~ /\w/
|
1085
|
+
src.unread c
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
line = src.read_line
|
1089
|
+
tok = token_buffer.join
|
1090
|
+
self.lex_strterm = s(:heredoc, tok, func, line)
|
1091
|
+
|
1092
|
+
if term == '`' then
|
1093
|
+
self.yacc_value = t("`")
|
1094
|
+
return :tXSTRING_BEG
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
self.yacc_value = t("\"")
|
1098
|
+
return :tSTRING_BEG
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
def arg_ambiguous
|
1102
|
+
self.warning("Ambiguous first argument. make sure.")
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
##
|
1106
|
+
# Read a comment up to end of line. When found each comment will
|
1107
|
+
# get stored away into the parser result so that any interested
|
1108
|
+
# party can use them as they seem fit. One idea is that IDE authors
|
1109
|
+
# can do distance based heuristics to associate these comments to
|
1110
|
+
# the AST node they think they belong to.
|
1111
|
+
#
|
1112
|
+
# @param c last character read from lexer source
|
1113
|
+
# @return newline or eof value
|
1114
|
+
|
1115
|
+
def read_comment c
|
1116
|
+
token_buffer.clear
|
1117
|
+
token_buffer << c
|
1118
|
+
|
1119
|
+
while (c = src.read) != "\n" do
|
1120
|
+
break if c == RubyLexer::EOF
|
1121
|
+
token_buffer << c
|
1122
|
+
end
|
1123
|
+
src.unread c
|
1124
|
+
|
1125
|
+
# Store away each comment to parser result so IDEs can do whatever
|
1126
|
+
# they want with them.
|
1127
|
+
# HACK parser_support.result.add_comment(Node.comment(token_buffer.join))
|
1128
|
+
|
1129
|
+
return c
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
##
|
1133
|
+
# Returns the next token. Also sets yy_val is needed.
|
1134
|
+
#
|
1135
|
+
# @return Description of the Returned Value
|
1136
|
+
# TODO: remove ALL sexps coming from here and move up to grammar
|
1137
|
+
# TODO: only literal values should come up from the lexer.
|
1138
|
+
|
1139
|
+
def yylex
|
1140
|
+
c = ''
|
1141
|
+
space_seen = false
|
1142
|
+
command_state = false
|
1143
|
+
|
1144
|
+
if lex_strterm then
|
1145
|
+
token = nil
|
1146
|
+
|
1147
|
+
if lex_strterm[0] == :heredoc then
|
1148
|
+
token = self.heredoc(lex_strterm)
|
1149
|
+
if token == :tSTRING_END then
|
1150
|
+
self.lex_strterm = nil
|
1151
|
+
self.lex_state = :expr_end
|
1152
|
+
end
|
1153
|
+
else
|
1154
|
+
token = self.parse_string(lex_strterm)
|
1155
|
+
|
1156
|
+
if token == :tSTRING_END || token == :tREGEXP_END then
|
1157
|
+
self.lex_strterm = nil
|
1158
|
+
self.lex_state = :expr_end
|
1159
|
+
end
|
1160
|
+
end
|
1161
|
+
|
1162
|
+
return token
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
command_state = self.command_start
|
1166
|
+
self.command_start = false
|
1167
|
+
|
1168
|
+
last_state = lex_state
|
1169
|
+
|
1170
|
+
loop do
|
1171
|
+
c = src.read
|
1172
|
+
case c
|
1173
|
+
when /\004|\032|\000/, RubyLexer::EOF then # ^D, ^Z, EOF
|
1174
|
+
return RubyLexer::EOF
|
1175
|
+
when /\ |\t|\f|\r|\13/ then # white spaces, 13 = '\v
|
1176
|
+
space_seen = true
|
1177
|
+
next
|
1178
|
+
when /#|\n/ then
|
1179
|
+
return 0 if c == '#' and read_comment(c) == 0 # FIX 0?
|
1180
|
+
# Replace a string of newlines with a single one
|
1181
|
+
while (c = src.read) == "\n"
|
1182
|
+
# do nothing
|
1183
|
+
end
|
1184
|
+
|
1185
|
+
src.unread c
|
1186
|
+
|
1187
|
+
if (lex_state == :expr_beg ||
|
1188
|
+
lex_state == :expr_fname ||
|
1189
|
+
lex_state == :expr_dot ||
|
1190
|
+
lex_state == :expr_class) then
|
1191
|
+
next
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
self.command_start = true
|
1195
|
+
self.lex_state = :expr_beg
|
1196
|
+
return "\n"
|
1197
|
+
when '*' then
|
1198
|
+
c = src.read
|
1199
|
+
if c == '*' then
|
1200
|
+
c = src.read
|
1201
|
+
if c == '=' then
|
1202
|
+
self.lex_state = :expr_beg
|
1203
|
+
self.yacc_value = t("**")
|
1204
|
+
return :tOP_ASGN
|
1205
|
+
end
|
1206
|
+
src.unread c
|
1207
|
+
self.yacc_value = t("**")
|
1208
|
+
c = :tPOW
|
1209
|
+
else
|
1210
|
+
if c == '=' then
|
1211
|
+
self.lex_state = :expr_beg
|
1212
|
+
self.yacc_value = t("*")
|
1213
|
+
return :tOP_ASGN
|
1214
|
+
end
|
1215
|
+
src.unread c
|
1216
|
+
if lex_state.is_argument && space_seen && c !~ /\s/ then
|
1217
|
+
warning("`*' interpreted as argument prefix")
|
1218
|
+
c = :tSTAR
|
1219
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1220
|
+
c = :tSTAR
|
1221
|
+
else
|
1222
|
+
c = :tSTAR2
|
1223
|
+
end
|
1224
|
+
self.yacc_value = t("*")
|
1225
|
+
end
|
1226
|
+
|
1227
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1228
|
+
self.lex_state = :expr_arg
|
1229
|
+
else
|
1230
|
+
self.lex_state = :expr_beg
|
1231
|
+
end
|
1232
|
+
|
1233
|
+
return c
|
1234
|
+
when '!' then
|
1235
|
+
self.lex_state = :expr_beg
|
1236
|
+
if (c = src.read) == '=' then
|
1237
|
+
self.yacc_value = t("!=")
|
1238
|
+
return :tNEQ
|
1239
|
+
end
|
1240
|
+
if c == '~' then
|
1241
|
+
self.yacc_value = t("!~")
|
1242
|
+
return :tNMATCH
|
1243
|
+
end
|
1244
|
+
src.unread(c)
|
1245
|
+
self.yacc_value = t("!")
|
1246
|
+
return :tBANG
|
1247
|
+
when '=' then
|
1248
|
+
# documentation nodes - FIX: cruby much cleaner w/ lookahead
|
1249
|
+
if src.was_begin_of_line and src.match_string "begin" then
|
1250
|
+
self.token_buffer.clear
|
1251
|
+
self.token_buffer << "begin"
|
1252
|
+
c = src.read
|
1253
|
+
|
1254
|
+
if c =~ /\s/ then
|
1255
|
+
# In case last next was the newline.
|
1256
|
+
src.unread(c)
|
1257
|
+
|
1258
|
+
loop do
|
1259
|
+
c = src.read
|
1260
|
+
token_buffer << c
|
1261
|
+
|
1262
|
+
# If a line is followed by a blank line put it back.
|
1263
|
+
while c == "\n"
|
1264
|
+
c = src.read
|
1265
|
+
token_buffer << c
|
1266
|
+
end
|
1267
|
+
|
1268
|
+
if c == RubyLexer::EOF then
|
1269
|
+
raise SyntaxError, "embedded document meets end of file"
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
next unless c == '='
|
1273
|
+
|
1274
|
+
if src.was_begin_of_line && src.match_string("end") then
|
1275
|
+
token_buffer << "end"
|
1276
|
+
token_buffer << src.read_line
|
1277
|
+
src.unread "\n"
|
1278
|
+
break
|
1279
|
+
end
|
1280
|
+
end
|
1281
|
+
|
1282
|
+
# parser_support.result.add_comment(Node.comment(token_buffer.join))
|
1283
|
+
next
|
1284
|
+
end
|
1285
|
+
src.unread(c)
|
1286
|
+
end
|
1287
|
+
|
1288
|
+
|
1289
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1290
|
+
self.lex_state = :expr_arg
|
1291
|
+
else
|
1292
|
+
self.lex_state = :expr_beg
|
1293
|
+
end
|
1294
|
+
|
1295
|
+
c = src.read
|
1296
|
+
if c == '=' then
|
1297
|
+
c = src.read
|
1298
|
+
if c == '=' then
|
1299
|
+
self.yacc_value = t("===")
|
1300
|
+
return :tEQQ
|
1301
|
+
end
|
1302
|
+
src.unread(c)
|
1303
|
+
self.yacc_value = t("==")
|
1304
|
+
return :tEQ
|
1305
|
+
end
|
1306
|
+
if c == '~' then
|
1307
|
+
self.yacc_value = t("=~")
|
1308
|
+
return :tMATCH
|
1309
|
+
elsif c == '>' then
|
1310
|
+
self.yacc_value = t("=>")
|
1311
|
+
return :tASSOC
|
1312
|
+
end
|
1313
|
+
src.unread(c)
|
1314
|
+
self.yacc_value = t("=")
|
1315
|
+
return '='
|
1316
|
+
when '<' then
|
1317
|
+
c = src.read
|
1318
|
+
if (c == '<' &&
|
1319
|
+
lex_state != :expr_end &&
|
1320
|
+
lex_state != :expr_dot &&
|
1321
|
+
lex_state != :expr_endarg &&
|
1322
|
+
lex_state != :expr_class &&
|
1323
|
+
(!lex_state.is_argument || space_seen)) then
|
1324
|
+
tok = self.heredoc_identifier
|
1325
|
+
return tok unless tok == 0
|
1326
|
+
end
|
1327
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1328
|
+
self.lex_state = :expr_arg
|
1329
|
+
else
|
1330
|
+
self.lex_state = :expr_beg
|
1331
|
+
end
|
1332
|
+
if c == '=' then
|
1333
|
+
if (c = src.read) == '>' then
|
1334
|
+
self.yacc_value = t("<=>")
|
1335
|
+
return :tCMP
|
1336
|
+
end
|
1337
|
+
src.unread c
|
1338
|
+
self.yacc_value = t("<=")
|
1339
|
+
return :tLEQ
|
1340
|
+
end
|
1341
|
+
if c == '<' then
|
1342
|
+
if (c = src.read) == '=' then
|
1343
|
+
self.lex_state = :expr_beg
|
1344
|
+
self.yacc_value = t("\<\<")
|
1345
|
+
return :tOP_ASGN
|
1346
|
+
end
|
1347
|
+
src.unread(c)
|
1348
|
+
self.yacc_value = t("<<")
|
1349
|
+
return :tLSHFT
|
1350
|
+
end
|
1351
|
+
self.yacc_value = t("<")
|
1352
|
+
src.unread(c)
|
1353
|
+
return :tLT
|
1354
|
+
when '>' then
|
1355
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1356
|
+
self.lex_state = :expr_arg
|
1357
|
+
else
|
1358
|
+
self.lex_state = :expr_beg
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
if (c = src.read) == '=' then
|
1362
|
+
self.yacc_value = t(">=")
|
1363
|
+
return :tGEQ
|
1364
|
+
end
|
1365
|
+
if c == '>' then
|
1366
|
+
if (c = src.read) == '=' then
|
1367
|
+
self.lex_state = :expr_beg
|
1368
|
+
self.yacc_value = t(">>")
|
1369
|
+
return :tOP_ASGN
|
1370
|
+
end
|
1371
|
+
src.unread c
|
1372
|
+
self.yacc_value = t(">>")
|
1373
|
+
return :tRSHFT
|
1374
|
+
end
|
1375
|
+
src.unread c
|
1376
|
+
self.yacc_value = t(">")
|
1377
|
+
return :tGT
|
1378
|
+
when '"' then
|
1379
|
+
self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
|
1380
|
+
self.yacc_value = t("\"")
|
1381
|
+
return :tSTRING_BEG
|
1382
|
+
when '`' then
|
1383
|
+
self.yacc_value = t("`")
|
1384
|
+
if lex_state == :expr_fname then
|
1385
|
+
self.lex_state = :expr_end
|
1386
|
+
return :tBACK_REF2
|
1387
|
+
end
|
1388
|
+
if lex_state == :expr_dot then
|
1389
|
+
if command_state then
|
1390
|
+
self.lex_state = :expr_cmdarg
|
1391
|
+
else
|
1392
|
+
self.lex_state = :expr_arg
|
1393
|
+
end
|
1394
|
+
return :tBACK_REF2
|
1395
|
+
end
|
1396
|
+
self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
|
1397
|
+
return :tXSTRING_BEG
|
1398
|
+
when "\'" then
|
1399
|
+
self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
|
1400
|
+
self.yacc_value = t("'")
|
1401
|
+
return :tSTRING_BEG
|
1402
|
+
when '?' then
|
1403
|
+
if lex_state == :expr_end || lex_state == :expr_endarg then
|
1404
|
+
self.lex_state = :expr_beg
|
1405
|
+
self.yacc_value = t("?")
|
1406
|
+
return '?'
|
1407
|
+
end
|
1408
|
+
|
1409
|
+
c = src.read
|
1410
|
+
|
1411
|
+
raise SyntaxError, "incomplete character syntax" if c == RubyLexer::EOF
|
1412
|
+
|
1413
|
+
if c =~ /\s/ then
|
1414
|
+
if !lex_state.is_argument then
|
1415
|
+
c2 = 0
|
1416
|
+
c2 = case c
|
1417
|
+
when ' ' then
|
1418
|
+
's'
|
1419
|
+
when "\n" then
|
1420
|
+
'n'
|
1421
|
+
when "\t" then
|
1422
|
+
't'
|
1423
|
+
when "\v" then
|
1424
|
+
'v'
|
1425
|
+
when "\r" then
|
1426
|
+
'r'
|
1427
|
+
when "\f" then
|
1428
|
+
'f'
|
1429
|
+
end
|
1430
|
+
|
1431
|
+
if c2 != 0 then
|
1432
|
+
warning("invalid character syntax; use ?\\" + c2)
|
1433
|
+
end
|
1434
|
+
end
|
1435
|
+
|
1436
|
+
# ternary
|
1437
|
+
src.unread c
|
1438
|
+
self.lex_state = :expr_beg
|
1439
|
+
self.yacc_value = t("?")
|
1440
|
+
return '?'
|
1441
|
+
# elsif ismbchar(c) then # ternary, also
|
1442
|
+
# rb_warn("multibyte character literal not supported yet; use ?\\" + c)
|
1443
|
+
# support.unread c
|
1444
|
+
# self.lex_state = :expr_beg
|
1445
|
+
# return '?'
|
1446
|
+
elsif c =~ /\w/ && ! src.peek("\n") && self.is_next_identchar then
|
1447
|
+
# ternary, also
|
1448
|
+
src.unread c
|
1449
|
+
self.lex_state = :expr_beg
|
1450
|
+
self.yacc_value = t("?")
|
1451
|
+
return '?'
|
1452
|
+
elsif c == "\\" then
|
1453
|
+
c = self.read_escape
|
1454
|
+
end
|
1455
|
+
c[0] &= 0xff
|
1456
|
+
self.lex_state = :expr_end
|
1457
|
+
self.yacc_value = c[0]
|
1458
|
+
return :tINTEGER
|
1459
|
+
when '&' then
|
1460
|
+
if (c = src.read) == '&' then
|
1461
|
+
self.lex_state = :expr_beg
|
1462
|
+
if (c = src.read) == '=' then
|
1463
|
+
self.yacc_value = t("&&")
|
1464
|
+
self.lex_state = :expr_beg
|
1465
|
+
return :tOP_ASGN
|
1466
|
+
end
|
1467
|
+
src.unread c
|
1468
|
+
self.yacc_value = t("&&")
|
1469
|
+
return :tANDOP
|
1470
|
+
elsif c == '=' then
|
1471
|
+
self.yacc_value = t("&")
|
1472
|
+
self.lex_state = :expr_beg
|
1473
|
+
return :tOP_ASGN
|
1474
|
+
end
|
1475
|
+
|
1476
|
+
src.unread c
|
1477
|
+
|
1478
|
+
if lex_state.is_argument && space_seen && c !~ /\s/ then
|
1479
|
+
warning("`&' interpreted as argument prefix")
|
1480
|
+
c = :tAMPER
|
1481
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1482
|
+
c = :tAMPER
|
1483
|
+
else
|
1484
|
+
c = :tAMPER2
|
1485
|
+
end
|
1486
|
+
|
1487
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1488
|
+
self.lex_state = :expr_arg
|
1489
|
+
else
|
1490
|
+
self.lex_state = :expr_beg
|
1491
|
+
end
|
1492
|
+
self.yacc_value = t("&")
|
1493
|
+
return c
|
1494
|
+
when '|' then
|
1495
|
+
if (c = src.read) == '|' then
|
1496
|
+
self.lex_state = :expr_beg
|
1497
|
+
if (c = src.read) == '=' then
|
1498
|
+
self.lex_state = :expr_beg
|
1499
|
+
self.yacc_value = t("||")
|
1500
|
+
return :tOP_ASGN
|
1501
|
+
end
|
1502
|
+
src.unread c
|
1503
|
+
self.yacc_value = t("||")
|
1504
|
+
return :tOROP
|
1505
|
+
end
|
1506
|
+
if c == '=' then
|
1507
|
+
self.lex_state = :expr_beg
|
1508
|
+
self.yacc_value = t("|")
|
1509
|
+
return :tOP_ASGN
|
1510
|
+
end
|
1511
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1512
|
+
self.lex_state = :expr_arg
|
1513
|
+
else
|
1514
|
+
self.lex_state = :expr_beg
|
1515
|
+
end
|
1516
|
+
src.unread c
|
1517
|
+
self.yacc_value = t("|")
|
1518
|
+
return :tPIPE
|
1519
|
+
when '+' then
|
1520
|
+
c = src.read
|
1521
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1522
|
+
self.lex_state = :expr_arg
|
1523
|
+
if c == '@' then
|
1524
|
+
self.yacc_value = t("+@")
|
1525
|
+
return :tUPLUS
|
1526
|
+
end
|
1527
|
+
src.unread c
|
1528
|
+
self.yacc_value = t("+")
|
1529
|
+
return :tPLUS
|
1530
|
+
end
|
1531
|
+
|
1532
|
+
if c == '=' then
|
1533
|
+
self.lex_state = :expr_beg
|
1534
|
+
self.yacc_value = t("+")
|
1535
|
+
return :tOP_ASGN
|
1536
|
+
end
|
1537
|
+
|
1538
|
+
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
1539
|
+
(lex_state.is_argument && space_seen && c !~ /\s/)) then
|
1540
|
+
arg_ambiguous if lex_state.is_argument
|
1541
|
+
self.lex_state = :expr_beg
|
1542
|
+
src.unread c
|
1543
|
+
if c =~ /\d/ then
|
1544
|
+
c = '+'
|
1545
|
+
return parse_number(c)
|
1546
|
+
end
|
1547
|
+
self.yacc_value = t("+")
|
1548
|
+
return :tUPLUS
|
1549
|
+
end
|
1550
|
+
self.lex_state = :expr_beg
|
1551
|
+
src.unread c
|
1552
|
+
self.yacc_value = t("+")
|
1553
|
+
return :tPLUS
|
1554
|
+
when '-' then
|
1555
|
+
c = src.read
|
1556
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1557
|
+
self.lex_state = :expr_arg
|
1558
|
+
if c == '@' then
|
1559
|
+
self.yacc_value = t("-@")
|
1560
|
+
return :tUMINUS
|
1561
|
+
end
|
1562
|
+
src.unread c
|
1563
|
+
self.yacc_value = t("-")
|
1564
|
+
return :tMINUS
|
1565
|
+
end
|
1566
|
+
if c == '=' then
|
1567
|
+
self.lex_state = :expr_beg
|
1568
|
+
self.yacc_value = t("-")
|
1569
|
+
return :tOP_ASGN
|
1570
|
+
end
|
1571
|
+
if (lex_state == :expr_beg || lex_state == :expr_mid ||
|
1572
|
+
(lex_state.is_argument && space_seen && c !~ /\s/)) then
|
1573
|
+
arg_ambiguous if lex_state.is_argument
|
1574
|
+
self.lex_state = :expr_beg
|
1575
|
+
src.unread c
|
1576
|
+
self.yacc_value = t("-")
|
1577
|
+
if c =~ /\d/ then
|
1578
|
+
return :tUMINUS_NUM
|
1579
|
+
end
|
1580
|
+
return :tUMINUS
|
1581
|
+
end
|
1582
|
+
self.lex_state = :expr_beg
|
1583
|
+
src.unread c
|
1584
|
+
self.yacc_value = t("-")
|
1585
|
+
return :tMINUS
|
1586
|
+
when '.' then
|
1587
|
+
self.lex_state = :expr_beg
|
1588
|
+
if (c = src.read) == '.' then
|
1589
|
+
if (c = src.read) == '.' then
|
1590
|
+
self.yacc_value = t("...")
|
1591
|
+
return :tDOT3
|
1592
|
+
end
|
1593
|
+
src.unread c
|
1594
|
+
self.yacc_value = t("..")
|
1595
|
+
return :tDOT2
|
1596
|
+
end
|
1597
|
+
src.unread c
|
1598
|
+
if c =~ /\d/ then
|
1599
|
+
raise SyntaxError, "no .<digit> floating literal anymore put 0 before dot"
|
1600
|
+
end
|
1601
|
+
self.lex_state = :expr_dot
|
1602
|
+
self.yacc_value = t(".")
|
1603
|
+
return :tDOT
|
1604
|
+
when /[0-9]/ then
|
1605
|
+
return parse_number(c)
|
1606
|
+
when ')' then # REFACTOR: omg this is lame... next 3 are all the same
|
1607
|
+
cond.lexpop
|
1608
|
+
cmdarg.lexpop
|
1609
|
+
self.lex_state = :expr_end
|
1610
|
+
self.yacc_value = t(")")
|
1611
|
+
return :tRPAREN
|
1612
|
+
when ']' then
|
1613
|
+
cond.lexpop
|
1614
|
+
cmdarg.lexpop
|
1615
|
+
self.lex_state = :expr_end
|
1616
|
+
self.yacc_value = t("]")
|
1617
|
+
return :tRBRACK
|
1618
|
+
when '}' then
|
1619
|
+
cond.lexpop
|
1620
|
+
cmdarg.lexpop
|
1621
|
+
self.lex_state = :expr_end
|
1622
|
+
self.yacc_value = t("end")
|
1623
|
+
return :tRCURLY
|
1624
|
+
when ':' then
|
1625
|
+
c = src.read
|
1626
|
+
if c == ':' then
|
1627
|
+
if (lex_state == :expr_beg ||
|
1628
|
+
lex_state == :expr_mid ||
|
1629
|
+
lex_state == :expr_class ||
|
1630
|
+
(lex_state.is_argument && space_seen)) then
|
1631
|
+
self.lex_state = :expr_beg
|
1632
|
+
self.yacc_value = t("::")
|
1633
|
+
return :tCOLON3
|
1634
|
+
end
|
1635
|
+
|
1636
|
+
self.lex_state = :expr_dot
|
1637
|
+
self.yacc_value = t(":")
|
1638
|
+
return :tCOLON2
|
1639
|
+
end
|
1640
|
+
|
1641
|
+
if lex_state == :expr_end || lex_state == :expr_endarg || c =~ /\s/ then
|
1642
|
+
src.unread c
|
1643
|
+
self.lex_state = :expr_beg
|
1644
|
+
self.yacc_value = t(":")
|
1645
|
+
return ':'
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
case c
|
1649
|
+
when "\'" then
|
1650
|
+
self.lex_strterm = s(:strterm, STR_SSYM, c, "\0")
|
1651
|
+
when '"' then
|
1652
|
+
self.lex_strterm = s(:strterm, STR_DSYM, c, "\0")
|
1653
|
+
else
|
1654
|
+
src.unread c
|
1655
|
+
end
|
1656
|
+
|
1657
|
+
self.lex_state = :expr_fname
|
1658
|
+
self.yacc_value = t(":")
|
1659
|
+
return :tSYMBEG
|
1660
|
+
when '/' then
|
1661
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1662
|
+
self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
|
1663
|
+
self.yacc_value = t("/")
|
1664
|
+
return :tREGEXP_BEG
|
1665
|
+
end
|
1666
|
+
|
1667
|
+
if (c = src.read) == '=' then
|
1668
|
+
self.yacc_value = t("/")
|
1669
|
+
self.lex_state = :expr_beg
|
1670
|
+
return :tOP_ASGN
|
1671
|
+
end
|
1672
|
+
|
1673
|
+
src.unread c
|
1674
|
+
|
1675
|
+
if lex_state.is_argument && space_seen then
|
1676
|
+
unless c =~ /\s/ then
|
1677
|
+
arg_ambiguous
|
1678
|
+
self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
|
1679
|
+
self.yacc_value = t("/")
|
1680
|
+
return :tREGEXP_BEG
|
1681
|
+
end
|
1682
|
+
end
|
1683
|
+
|
1684
|
+
self.lex_state = if (lex_state == :expr_fname ||
|
1685
|
+
lex_state == :expr_dot) then
|
1686
|
+
:expr_arg
|
1687
|
+
else
|
1688
|
+
:expr_beg
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
self.yacc_value = t("/")
|
1692
|
+
return :tDIVIDE
|
1693
|
+
when '^' then
|
1694
|
+
if (c = src.read) == '=' then
|
1695
|
+
self.lex_state = :expr_beg
|
1696
|
+
self.yacc_value = t("^")
|
1697
|
+
return :tOP_ASGN
|
1698
|
+
end
|
1699
|
+
if lex_state == :expr_fname || self.lex_state == :expr_dot then
|
1700
|
+
self.lex_state = :expr_arg
|
1701
|
+
else
|
1702
|
+
self.lex_state = :expr_beg
|
1703
|
+
end
|
1704
|
+
src.unread c
|
1705
|
+
self.yacc_value = t("^")
|
1706
|
+
return :tCARET
|
1707
|
+
when ';' then
|
1708
|
+
self.command_start = true
|
1709
|
+
self.lex_state = :expr_beg
|
1710
|
+
self.yacc_value = t(";")
|
1711
|
+
return c
|
1712
|
+
when ',' then
|
1713
|
+
self.lex_state = :expr_beg
|
1714
|
+
self.yacc_value = t(",")
|
1715
|
+
return c
|
1716
|
+
when '~' then
|
1717
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1718
|
+
if (c = src.read) != '@' then
|
1719
|
+
src.unread c
|
1720
|
+
end
|
1721
|
+
end
|
1722
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1723
|
+
self.lex_state = :expr_arg
|
1724
|
+
else
|
1725
|
+
self.lex_state = :expr_beg
|
1726
|
+
end
|
1727
|
+
self.yacc_value = t("~")
|
1728
|
+
return :tTILDE
|
1729
|
+
when '(' then
|
1730
|
+
c = :tLPAREN2
|
1731
|
+
self.command_start = true
|
1732
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1733
|
+
c = :tLPAREN
|
1734
|
+
elsif space_seen then
|
1735
|
+
if lex_state == :expr_cmdarg then
|
1736
|
+
c = :tLPAREN_ARG
|
1737
|
+
elsif lex_state == :expr_arg then
|
1738
|
+
warning("don't put space before argument parentheses")
|
1739
|
+
c = :tLPAREN2
|
1740
|
+
end
|
1741
|
+
end
|
1742
|
+
cond.push false
|
1743
|
+
cmdarg.push false
|
1744
|
+
self.lex_state = :expr_beg
|
1745
|
+
self.yacc_value = t("(")
|
1746
|
+
return c
|
1747
|
+
when '[' then
|
1748
|
+
if lex_state == :expr_fname || lex_state == :expr_dot then
|
1749
|
+
self.lex_state = :expr_arg
|
1750
|
+
if (c = src.read) == ']' then
|
1751
|
+
if src.peek('=') then
|
1752
|
+
c = src.read
|
1753
|
+
self.yacc_value = t("[]=")
|
1754
|
+
return :tASET
|
1755
|
+
end
|
1756
|
+
self.yacc_value = t("[]")
|
1757
|
+
return :tAREF
|
1758
|
+
end
|
1759
|
+
src.unread c
|
1760
|
+
self.yacc_value = t("[")
|
1761
|
+
return '['
|
1762
|
+
elsif lex_state == :expr_beg || lex_state == :expr_mid then
|
1763
|
+
c = :tLBRACK
|
1764
|
+
elsif lex_state.is_argument && space_seen then
|
1765
|
+
c = :tLBRACK
|
1766
|
+
end
|
1767
|
+
self.lex_state = :expr_beg
|
1768
|
+
cond.push false
|
1769
|
+
cmdarg.push false
|
1770
|
+
self.yacc_value = t("[")
|
1771
|
+
return c
|
1772
|
+
when '{' then
|
1773
|
+
c = :tLCURLY
|
1774
|
+
|
1775
|
+
if lex_state.is_argument || lex_state == :expr_end then
|
1776
|
+
c = :tLCURLY # block (primary)
|
1777
|
+
elsif lex_state == :expr_endarg then
|
1778
|
+
c = :tLBRACE_ARG # block (expr)
|
1779
|
+
else
|
1780
|
+
c = :tLBRACE # hash
|
1781
|
+
end
|
1782
|
+
cond.push false
|
1783
|
+
cmdarg.push false
|
1784
|
+
self.lex_state = :expr_beg
|
1785
|
+
self.yacc_value = t("{")
|
1786
|
+
return c
|
1787
|
+
when "\\" then
|
1788
|
+
c = src.read
|
1789
|
+
if c == "\n" then
|
1790
|
+
space_seen = true
|
1791
|
+
next # skip \\n
|
1792
|
+
end
|
1793
|
+
src.unread c
|
1794
|
+
self.yacc_value = t("\\")
|
1795
|
+
return "\\"
|
1796
|
+
when '%' then
|
1797
|
+
if lex_state == :expr_beg || lex_state == :expr_mid then
|
1798
|
+
return parse_quote(src.read)
|
1799
|
+
end
|
1800
|
+
|
1801
|
+
c = src.read
|
1802
|
+
if c == '=' then
|
1803
|
+
self.lex_state = :expr_beg
|
1804
|
+
self.yacc_value = t("%")
|
1805
|
+
return :tOP_ASGN
|
1806
|
+
end
|
1807
|
+
|
1808
|
+
return parse_quote(c) if lex_state.is_argument && space_seen && c !~ /\s/
|
1809
|
+
|
1810
|
+
self.lex_state = case lex_state
|
1811
|
+
when :expr_fname, :expr_dot then
|
1812
|
+
:expr_arg
|
1813
|
+
else
|
1814
|
+
:expr_beg
|
1815
|
+
end
|
1816
|
+
|
1817
|
+
src.unread c
|
1818
|
+
self.yacc_value = t("%")
|
1819
|
+
|
1820
|
+
return :tPERCENT
|
1821
|
+
when '$' then
|
1822
|
+
last_state = lex_state
|
1823
|
+
self.lex_state = :expr_end
|
1824
|
+
token_buffer.clear
|
1825
|
+
c = src.read
|
1826
|
+
case c
|
1827
|
+
when '_' then # $_: last read line string
|
1828
|
+
c = src.read
|
1829
|
+
|
1830
|
+
token_buffer << '$'
|
1831
|
+
token_buffer << '_'
|
1832
|
+
|
1833
|
+
unless c =~ /\w/ then
|
1834
|
+
src.unread c
|
1835
|
+
self.yacc_value = t(token_buffer.join)
|
1836
|
+
return :tGVAR
|
1837
|
+
end
|
1838
|
+
when /[~*$?!@\/\\;,.=:<>\"]/ then
|
1839
|
+
token_buffer << '$'
|
1840
|
+
token_buffer << c
|
1841
|
+
self.yacc_value = t(token_buffer.join)
|
1842
|
+
return :tGVAR
|
1843
|
+
when '-' then
|
1844
|
+
token_buffer << '$'
|
1845
|
+
token_buffer << c
|
1846
|
+
c = src.read
|
1847
|
+
if c =~ /\w/ then
|
1848
|
+
token_buffer << c
|
1849
|
+
else
|
1850
|
+
src.unread c
|
1851
|
+
end
|
1852
|
+
self.yacc_value = t(token_buffer.join)
|
1853
|
+
# xxx shouldn't check if valid option variable
|
1854
|
+
return :tGVAR
|
1855
|
+
when /[\&\`\'\+]/ then
|
1856
|
+
# Explicit reference to these vars as symbols...
|
1857
|
+
if last_state == :expr_fname then
|
1858
|
+
token_buffer << '$'
|
1859
|
+
token_buffer << c
|
1860
|
+
self.yacc_value = t(token_buffer.join)
|
1861
|
+
return :tGVAR
|
1862
|
+
end
|
1863
|
+
|
1864
|
+
self.yacc_value = s(:back_ref, c.to_sym)
|
1865
|
+
return :tBACK_REF
|
1866
|
+
when /[1-9]/ then
|
1867
|
+
token_buffer << '$'
|
1868
|
+
begin
|
1869
|
+
token_buffer << c
|
1870
|
+
c = src.read
|
1871
|
+
end while c =~ /\d/
|
1872
|
+
src.unread c
|
1873
|
+
if last_state == :expr_fname then
|
1874
|
+
self.yacc_value = t(token_buffer.join)
|
1875
|
+
return :tGVAR
|
1876
|
+
else
|
1877
|
+
self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
|
1878
|
+
return :tNTH_REF
|
1879
|
+
end
|
1880
|
+
when '0' then
|
1881
|
+
token_buffer << '$'
|
1882
|
+
else
|
1883
|
+
unless c =~ /\w/ then
|
1884
|
+
src.unread c
|
1885
|
+
self.yacc_value = t("$")
|
1886
|
+
return '$'
|
1887
|
+
end
|
1888
|
+
token_buffer << '$'
|
1889
|
+
end
|
1890
|
+
when '@' then
|
1891
|
+
c = src.read
|
1892
|
+
token_buffer.clear
|
1893
|
+
token_buffer << '@'
|
1894
|
+
if c == '@' then
|
1895
|
+
token_buffer << '@'
|
1896
|
+
c = src.read
|
1897
|
+
end
|
1898
|
+
if c =~ /\d/ then
|
1899
|
+
if token_buffer.length == 1 then
|
1900
|
+
raise SyntaxError, "`@" + c + "' is not allowed as an instance variable name"
|
1901
|
+
else
|
1902
|
+
raise SyntaxError, "`@@" + c + "' is not allowed as a class variable name"
|
1903
|
+
end
|
1904
|
+
end
|
1905
|
+
unless c =~ /\w/ then
|
1906
|
+
src.unread c
|
1907
|
+
self.yacc_value = t("@")
|
1908
|
+
return '@'
|
1909
|
+
end
|
1910
|
+
when '_' then
|
1911
|
+
if src.was_begin_of_line && src.match_string("_END__\n", false) then
|
1912
|
+
self.end_seen = true
|
1913
|
+
return RubyLexer::EOF
|
1914
|
+
end
|
1915
|
+
token_buffer.clear
|
1916
|
+
else
|
1917
|
+
unless c =~ /\w/ then
|
1918
|
+
raise SyntaxError, "Invalid char '#{c.inspect}' in expression"
|
1919
|
+
end
|
1920
|
+
token_buffer.clear
|
1921
|
+
end
|
1922
|
+
|
1923
|
+
begin
|
1924
|
+
token_buffer << c
|
1925
|
+
# if ismbchar(c) then
|
1926
|
+
# len = mbclen(c) - 1
|
1927
|
+
# (0..len).each do
|
1928
|
+
# c = src.read;
|
1929
|
+
# token_buffer << c
|
1930
|
+
# end
|
1931
|
+
# end
|
1932
|
+
c = src.read
|
1933
|
+
end while c =~ /\w/
|
1934
|
+
|
1935
|
+
if c =~ /\!|\?/ && token_buffer[0] =~ /\w/ && src.peek != '=' then
|
1936
|
+
token_buffer << c
|
1937
|
+
else
|
1938
|
+
src.unread c
|
1939
|
+
end
|
1940
|
+
|
1941
|
+
result = nil
|
1942
|
+
last_state = lex_state
|
1943
|
+
|
1944
|
+
case token_buffer[0]
|
1945
|
+
when '$' then
|
1946
|
+
self.lex_state = :expr_end
|
1947
|
+
result = :tGVAR
|
1948
|
+
when '@' then
|
1949
|
+
self.lex_state = :expr_end
|
1950
|
+
if token_buffer[1] == '@' then
|
1951
|
+
result = :tCVAR
|
1952
|
+
else
|
1953
|
+
result = :tIVAR
|
1954
|
+
end
|
1955
|
+
else
|
1956
|
+
if token_buffer[-1] =~ /[!?]/ then
|
1957
|
+
result = :tFID
|
1958
|
+
else
|
1959
|
+
if lex_state == :expr_fname then
|
1960
|
+
if (c = src.read) == '=' then
|
1961
|
+
c2 = src.read
|
1962
|
+
|
1963
|
+
if c2 != '~' && c2 != '>' && (c2 != '=' || (c2 == "\n" && src.peek('>'))) then
|
1964
|
+
result = :tIDENTIFIER
|
1965
|
+
token_buffer << c
|
1966
|
+
src.unread c2
|
1967
|
+
else
|
1968
|
+
src.unread c2
|
1969
|
+
src.unread c
|
1970
|
+
end
|
1971
|
+
else
|
1972
|
+
src.unread c
|
1973
|
+
end
|
1974
|
+
end
|
1975
|
+
if result.nil? && token_buffer[0] =~ /[A-Z]/ then
|
1976
|
+
result = :tCONSTANT
|
1977
|
+
else
|
1978
|
+
result = :tIDENTIFIER
|
1979
|
+
end
|
1980
|
+
end
|
1981
|
+
|
1982
|
+
unless lex_state == :expr_dot then
|
1983
|
+
# See if it is a reserved word.
|
1984
|
+
keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
|
1985
|
+
|
1986
|
+
unless keyword.nil? then
|
1987
|
+
state = lex_state
|
1988
|
+
self.lex_state = keyword.state
|
1989
|
+
|
1990
|
+
if state == :expr_fname then
|
1991
|
+
self.yacc_value = t(keyword.name)
|
1992
|
+
else
|
1993
|
+
self.yacc_value = t(token_buffer.join)
|
1994
|
+
end
|
1995
|
+
|
1996
|
+
if keyword.id0 == :kDO then
|
1997
|
+
self.command_start = true
|
1998
|
+
return :kDO_COND if cond.is_in_state
|
1999
|
+
return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
|
2000
|
+
return :kDO_BLOCK if state == :expr_endarg
|
2001
|
+
return :kDO
|
2002
|
+
end
|
2003
|
+
|
2004
|
+
return keyword.id0 if state == :expr_beg
|
2005
|
+
|
2006
|
+
self.lex_state = :expr_beg unless keyword.id0 == keyword.id1
|
2007
|
+
|
2008
|
+
return keyword.id1
|
2009
|
+
end
|
2010
|
+
end
|
2011
|
+
|
2012
|
+
if (lex_state == :expr_beg ||
|
2013
|
+
lex_state == :expr_mid ||
|
2014
|
+
lex_state == :expr_dot ||
|
2015
|
+
lex_state == :expr_arg ||
|
2016
|
+
lex_state == :expr_cmdarg) then
|
2017
|
+
if command_state then
|
2018
|
+
self.lex_state = :expr_cmdarg
|
2019
|
+
else
|
2020
|
+
self.lex_state = :expr_arg
|
2021
|
+
end
|
2022
|
+
else
|
2023
|
+
self.lex_state = :expr_end
|
2024
|
+
end
|
2025
|
+
end
|
2026
|
+
|
2027
|
+
|
2028
|
+
temp_val = token_buffer.join
|
2029
|
+
|
2030
|
+
# Lame: parsing logic made it into lexer in ruby...So we
|
2031
|
+
# are emulating
|
2032
|
+
# FIXME: I believe this is much simpler now...
|
2033
|
+
# HACK
|
2034
|
+
# scope = parser_support.current_scope
|
2035
|
+
# if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
|
2036
|
+
# last_state != :expr_dot &&
|
2037
|
+
# (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
|
2038
|
+
# (scope.local_scope.is_defined(temp_val) >= 0)) then
|
2039
|
+
# self.lex_state = :expr_end
|
2040
|
+
# end
|
2041
|
+
|
2042
|
+
self.yacc_value = t(temp_val)
|
2043
|
+
|
2044
|
+
return result
|
2045
|
+
end
|
2046
|
+
end
|
2047
|
+
|
2048
|
+
##
|
2049
|
+
# Parse a number from the input stream.
|
2050
|
+
#
|
2051
|
+
# @param c The first character of the number.
|
2052
|
+
# @return A int constant wich represents a token.
|
2053
|
+
|
2054
|
+
def parse_number c
|
2055
|
+
self.lex_state = :expr_end
|
2056
|
+
|
2057
|
+
token_buffer.clear
|
2058
|
+
|
2059
|
+
if c == '-' then
|
2060
|
+
token_buffer << c
|
2061
|
+
c = src.read
|
2062
|
+
elsif c == '+' then
|
2063
|
+
# We don't append '+' since Java number parser gets confused FIX
|
2064
|
+
c = src.read
|
2065
|
+
end
|
2066
|
+
|
2067
|
+
nondigit = "\0"
|
2068
|
+
|
2069
|
+
if c == '0' then
|
2070
|
+
start_len = token_buffer.length
|
2071
|
+
c = src.read
|
2072
|
+
|
2073
|
+
case c
|
2074
|
+
when /x/i then # hexadecimal
|
2075
|
+
c = src.read
|
2076
|
+
|
2077
|
+
if c =~ /[a-f0-9]/i then
|
2078
|
+
loop do
|
2079
|
+
if c == '_' then
|
2080
|
+
break unless nondigit == "\0"
|
2081
|
+
nondigit = c
|
2082
|
+
elsif c =~ /[a-f0-9]/i then
|
2083
|
+
nondigit = "\0"
|
2084
|
+
token_buffer << c
|
2085
|
+
else
|
2086
|
+
break
|
2087
|
+
end
|
2088
|
+
c = src.read
|
2089
|
+
end
|
2090
|
+
end
|
2091
|
+
|
2092
|
+
src.unread c
|
2093
|
+
|
2094
|
+
if token_buffer.length == start_len then
|
2095
|
+
raise SyntaxError, "Hexadecimal number without hex-digits."
|
2096
|
+
elsif nondigit != "\0" then
|
2097
|
+
raise SyntaxError, "Trailing '_' in number."
|
2098
|
+
end
|
2099
|
+
self.yacc_value = token_buffer.join.to_i(16)
|
2100
|
+
return :tINTEGER
|
2101
|
+
when /b/i # binary
|
2102
|
+
c = src.read
|
2103
|
+
if c == '0' or c == '1' then
|
2104
|
+
loop do
|
2105
|
+
if c == '_' then
|
2106
|
+
break if nondigit != "\0"
|
2107
|
+
nondigit = c
|
2108
|
+
elsif c == '0' or c == '1' then
|
2109
|
+
nondigit = "\0"
|
2110
|
+
token_buffer << c
|
2111
|
+
else
|
2112
|
+
break
|
2113
|
+
end
|
2114
|
+
c = src.read
|
2115
|
+
end
|
2116
|
+
end
|
2117
|
+
|
2118
|
+
src.unread c
|
2119
|
+
|
2120
|
+
if token_buffer.length == start_len then
|
2121
|
+
raise SyntaxError, "Binary number without digits."
|
2122
|
+
elsif nondigit != "\0" then
|
2123
|
+
raise SyntaxError, "Trailing '_' in number."
|
2124
|
+
end
|
2125
|
+
self.yacc_value = token_buffer.join.to_i(2)
|
2126
|
+
return :tINTEGER
|
2127
|
+
when /d/i then # decimal
|
2128
|
+
c = src.read
|
2129
|
+
if c =~ /\d/ then
|
2130
|
+
loop do
|
2131
|
+
if c == '_' then
|
2132
|
+
break if nondigit != "\0"
|
2133
|
+
nondigit = c
|
2134
|
+
elsif c =~ /\d/ then
|
2135
|
+
nondigit = "\0"
|
2136
|
+
token_buffer << c
|
2137
|
+
else
|
2138
|
+
break
|
2139
|
+
end
|
2140
|
+
c = src.read
|
2141
|
+
end
|
2142
|
+
end
|
2143
|
+
|
2144
|
+
src.unread c
|
2145
|
+
|
2146
|
+
if token_buffer.length == start_len then
|
2147
|
+
raise SyntaxError, "Binary number without digits."
|
2148
|
+
elsif nondigit != "\0" then
|
2149
|
+
raise SyntaxError, "Trailing '_' in number."
|
2150
|
+
end
|
2151
|
+
|
2152
|
+
self.yacc_value = token_buffer.join.to_i(10)
|
2153
|
+
return :tINTEGER
|
2154
|
+
when /o/i, /[0-7_]/ then # octal
|
2155
|
+
c = src.read if c =~ /o/i # prefixed octal - kill me
|
2156
|
+
loop do
|
2157
|
+
if c == '_' then
|
2158
|
+
break if (nondigit != "\0")
|
2159
|
+
nondigit = c
|
2160
|
+
elsif c >= '0' && c <= '7' then
|
2161
|
+
nondigit = "\0"
|
2162
|
+
token_buffer << c
|
2163
|
+
else
|
2164
|
+
break
|
2165
|
+
end
|
2166
|
+
c = src.read
|
2167
|
+
end
|
2168
|
+
if token_buffer.length > start_len then
|
2169
|
+
src.unread c
|
2170
|
+
|
2171
|
+
if nondigit != "\0" then
|
2172
|
+
raise SyntaxError, "Trailing '_' in number."
|
2173
|
+
end
|
2174
|
+
|
2175
|
+
self.yacc_value = token_buffer.join.to_i(8)
|
2176
|
+
return :tINTEGER
|
2177
|
+
end
|
2178
|
+
when /[89]/ then
|
2179
|
+
raise SyntaxError, "Illegal octal digit."
|
2180
|
+
when /[\.eE]/ then
|
2181
|
+
token_buffer << '0'
|
2182
|
+
else
|
2183
|
+
src.unread c
|
2184
|
+
self.yacc_value = 0
|
2185
|
+
return :tINTEGER
|
2186
|
+
end
|
2187
|
+
end
|
2188
|
+
|
2189
|
+
seen_point = false
|
2190
|
+
seen_e = false
|
2191
|
+
|
2192
|
+
loop do
|
2193
|
+
case c
|
2194
|
+
when /\d/ then
|
2195
|
+
nondigit = "\0"
|
2196
|
+
token_buffer << c
|
2197
|
+
when '.' then
|
2198
|
+
if nondigit != "\0" then
|
2199
|
+
src.unread c
|
2200
|
+
raise SyntaxError, "Trailing '_' in number."
|
2201
|
+
elsif seen_point or seen_e then
|
2202
|
+
src.unread c
|
2203
|
+
return number_token(token_buffer.join, true, nondigit)
|
2204
|
+
else
|
2205
|
+
c2 = src.read
|
2206
|
+
unless c2 =~ /\d/ then
|
2207
|
+
src.unread c2
|
2208
|
+
src.unread '.'
|
2209
|
+
if c == '_' then
|
2210
|
+
# Enebo: c can never be antrhign but '.'
|
2211
|
+
# Why did I put this here?
|
2212
|
+
else
|
2213
|
+
self.yacc_value = token_buffer.join.to_i(10)
|
2214
|
+
return :tINTEGER
|
2215
|
+
end
|
2216
|
+
else
|
2217
|
+
token_buffer << '.'
|
2218
|
+
token_buffer << c2
|
2219
|
+
seen_point = true
|
2220
|
+
nondigit = "\0"
|
2221
|
+
end
|
2222
|
+
end
|
2223
|
+
when /e/i then
|
2224
|
+
if nondigit != "\0" then
|
2225
|
+
raise SyntaxError, "Trailing '_' in number."
|
2226
|
+
elsif seen_e then
|
2227
|
+
src.unread c
|
2228
|
+
return number_token(token_buffer.join, true, nondigit)
|
2229
|
+
else
|
2230
|
+
token_buffer << c
|
2231
|
+
seen_e = true
|
2232
|
+
nondigit = c
|
2233
|
+
c = src.read
|
2234
|
+
if c == '-' or c == '+' then
|
2235
|
+
token_buffer << c
|
2236
|
+
nondigit = c
|
2237
|
+
else
|
2238
|
+
src.unread c
|
2239
|
+
end
|
2240
|
+
end
|
2241
|
+
when '_' then # '_' in number just ignored
|
2242
|
+
if nondigit != "\0" then
|
2243
|
+
raise SyntaxError, "Trailing '_' in number."
|
2244
|
+
end
|
2245
|
+
nondigit = c
|
2246
|
+
else
|
2247
|
+
src.unread c
|
2248
|
+
r = number_token(token_buffer.join, seen_e || seen_point, nondigit)
|
2249
|
+
return r
|
2250
|
+
end
|
2251
|
+
c = src.read
|
2252
|
+
end
|
2253
|
+
end
|
2254
|
+
|
2255
|
+
# TODO: remove me
|
2256
|
+
def number_token(number, is_float, nondigit)
|
2257
|
+
if nondigit != "\0" then
|
2258
|
+
raise SyntaxError, "Trailing '_' in number."
|
2259
|
+
end
|
2260
|
+
|
2261
|
+
if is_float then
|
2262
|
+
self.yacc_value = number.to_f
|
2263
|
+
return :tFLOAT
|
2264
|
+
end
|
2265
|
+
|
2266
|
+
self.yacc_value = number.to_i
|
2267
|
+
return :tINTEGER
|
2268
|
+
end
|
2269
|
+
|
2270
|
+
############################################################
|
2271
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
2272
|
+
|
2273
|
+
def tokadd s # HACK
|
2274
|
+
self.token_buffer << s
|
2275
|
+
end
|
2276
|
+
|
2277
|
+
def warning s
|
2278
|
+
# do nothing for now
|
2279
|
+
end
|
2280
|
+
|
2281
|
+
def rb_compile_error msg
|
2282
|
+
raise msg
|
2283
|
+
end
|
2284
|
+
|
2285
|
+
def is_next_identchar # TODO: ?
|
2286
|
+
c = src.read
|
2287
|
+
src.unread c
|
2288
|
+
|
2289
|
+
return c != RubyLexer::EOF && c =~ /\w/
|
2290
|
+
end
|
2291
|
+
|
2292
|
+
def is_next_no_case(s) # FIX: replace this whole thing with something clean
|
2293
|
+
buf = []
|
2294
|
+
old_pos = src.pos
|
2295
|
+
|
2296
|
+
s.each_byte do |b|
|
2297
|
+
c = b.chr
|
2298
|
+
r = src.read
|
2299
|
+
buf << r
|
2300
|
+
|
2301
|
+
if c.downcase != r.downcase then
|
2302
|
+
src.pos = old_pos
|
2303
|
+
return nil
|
2304
|
+
end
|
2305
|
+
end
|
2306
|
+
|
2307
|
+
return buf.join
|
2308
|
+
end
|
2309
|
+
|
2310
|
+
kill :is_hex_char, :is_oct_char, :is_identifier_char, :nextc, :pushback
|
2311
|
+
|
2312
|
+
# END HACK
|
2313
|
+
############################################################$
|
2314
|
+
|
2315
|
+
end
|
2316
|
+
|
2317
|
+
class Keyword
|
2318
|
+
class KWtable
|
2319
|
+
attr_accessor :name, :id, :state
|
2320
|
+
def initialize(name, id=[], state=nil)
|
2321
|
+
@name = name
|
2322
|
+
@id = id
|
2323
|
+
@state = state
|
2324
|
+
end
|
2325
|
+
|
2326
|
+
def id0
|
2327
|
+
self.id.first
|
2328
|
+
end
|
2329
|
+
|
2330
|
+
def id1
|
2331
|
+
self.id.last
|
2332
|
+
end
|
2333
|
+
end
|
2334
|
+
|
2335
|
+
TOTAL_KEYWORDS = 40
|
2336
|
+
MIN_WORD_LENGTH = 2
|
2337
|
+
MAX_WORD_LENGTH = 8
|
2338
|
+
MIN_HASH_VALUE = 6
|
2339
|
+
MAX_HASH_VALUE = 55
|
2340
|
+
# maximum key range = 50, duplicates = 0
|
2341
|
+
|
2342
|
+
def self.hash_keyword(str, len)
|
2343
|
+
hval = len
|
2344
|
+
|
2345
|
+
asso_values = [
|
2346
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2347
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2348
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2349
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2350
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2351
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2352
|
+
56, 56, 56, 11, 56, 56, 36, 56, 1, 37,
|
2353
|
+
31, 1, 56, 56, 56, 56, 29, 56, 1, 56,
|
2354
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2355
|
+
56, 56, 56, 56, 56, 1, 56, 32, 1, 2,
|
2356
|
+
1, 1, 4, 23, 56, 17, 56, 20, 9, 2,
|
2357
|
+
9, 26, 14, 56, 5, 1, 1, 16, 56, 21,
|
2358
|
+
20, 9, 56, 56, 56, 56, 56, 56, 56, 56,
|
2359
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2360
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2361
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2362
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2363
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2364
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2365
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2366
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2367
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2368
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2369
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2370
|
+
56, 56, 56, 56, 56, 56, 56, 56, 56, 56,
|
2371
|
+
56, 56, 56, 56, 56, 56
|
2372
|
+
]
|
2373
|
+
|
2374
|
+
case hval
|
2375
|
+
when 2, 1 then
|
2376
|
+
hval += asso_values[str[0]]
|
2377
|
+
else
|
2378
|
+
hval += asso_values[str[2]]
|
2379
|
+
hval += asso_values[str[0]]
|
2380
|
+
end
|
2381
|
+
|
2382
|
+
hval += asso_values[str[len - 1]]
|
2383
|
+
return hval
|
2384
|
+
end
|
2385
|
+
|
2386
|
+
##
|
2387
|
+
# :expr_beg = ignore newline, +/- is a sign.
|
2388
|
+
# :expr_end = newline significant, +/- is a operator.
|
2389
|
+
# :expr_arg = newline significant, +/- is a operator.
|
2390
|
+
# :expr_cmdarg = newline significant, +/- is a operator.
|
2391
|
+
# :expr_endarg = newline significant, +/- is a operator.
|
2392
|
+
# :expr_mid = newline significant, +/- is a operator.
|
2393
|
+
# :expr_fname = ignore newline, no reserved words.
|
2394
|
+
# :expr_dot = right after . or ::, no reserved words.
|
2395
|
+
# :expr_class = immediate after class, no here document.
|
2396
|
+
|
2397
|
+
def self.keyword(str, len = str.size)
|
2398
|
+
wordlist = [
|
2399
|
+
[""], [""], [""], [""], [""], [""],
|
2400
|
+
["end", [:kEND, :kEND ], :expr_end ],
|
2401
|
+
["else", [:kELSE, :kELSE ], :expr_beg ],
|
2402
|
+
["case", [:kCASE, :kCASE ], :expr_beg ],
|
2403
|
+
["ensure", [:kENSURE, :kENSURE ], :expr_beg ],
|
2404
|
+
["module", [:kMODULE, :kMODULE ], :expr_beg ],
|
2405
|
+
["elsif", [:kELSIF, :kELSIF ], :expr_beg ],
|
2406
|
+
["def", [:kDEF, :kDEF ], :expr_fname ],
|
2407
|
+
["rescue", [:kRESCUE, :kRESCUE_MOD ], :expr_mid ],
|
2408
|
+
["not", [:kNOT, :kNOT ], :expr_beg ],
|
2409
|
+
["then", [:kTHEN, :kTHEN ], :expr_beg ],
|
2410
|
+
["yield", [:kYIELD, :kYIELD ], :expr_arg ],
|
2411
|
+
["for", [:kFOR, :kFOR ], :expr_beg ],
|
2412
|
+
["self", [:kSELF, :kSELF ], :expr_end ],
|
2413
|
+
["false", [:kFALSE, :kFALSE ], :expr_end ],
|
2414
|
+
["retry", [:kRETRY, :kRETRY ], :expr_end ],
|
2415
|
+
["return", [:kRETURN, :kRETURN ], :expr_mid ],
|
2416
|
+
["true", [:kTRUE, :kTRUE ], :expr_end ],
|
2417
|
+
["if", [:kIF, :kIF_MOD ], :expr_beg ],
|
2418
|
+
["defined?", [:kDEFINED, :kDEFINED ], :expr_arg ],
|
2419
|
+
["super", [:kSUPER, :kSUPER ], :expr_arg ],
|
2420
|
+
["undef", [:kUNDEF, :kUNDEF ], :expr_fname ],
|
2421
|
+
["break", [:kBREAK, :kBREAK ], :expr_mid ],
|
2422
|
+
["in", [:kIN, :kIN ], :expr_beg ],
|
2423
|
+
["do", [:kDO, :kDO ], :expr_beg ],
|
2424
|
+
["nil", [:kNIL, :kNIL ], :expr_end ],
|
2425
|
+
["until", [:kUNTIL, :kUNTIL_MOD ], :expr_beg ],
|
2426
|
+
["unless", [:kUNLESS, :kUNLESS_MOD ], :expr_beg ],
|
2427
|
+
["or", [:kOR, :kOR ], :expr_beg ],
|
2428
|
+
["next", [:kNEXT, :kNEXT ], :expr_mid ],
|
2429
|
+
["when", [:kWHEN, :kWHEN ], :expr_beg ],
|
2430
|
+
["redo", [:kREDO, :kREDO ], :expr_end ],
|
2431
|
+
["and", [:kAND, :kAND ], :expr_beg ],
|
2432
|
+
["begin", [:kBEGIN, :kBEGIN ], :expr_beg ],
|
2433
|
+
["__LINE__", [:k__LINE__, :k__LINE__ ], :expr_end ],
|
2434
|
+
["class", [:kCLASS, :kCLASS ], :expr_class ],
|
2435
|
+
["__FILE__", [:k__FILE__, :k__FILE__ ], :expr_end ],
|
2436
|
+
["END", [:klEND, :klEND ], :expr_end ],
|
2437
|
+
["BEGIN", [:klBEGIN, :klBEGIN ], :expr_end ],
|
2438
|
+
["while", [:kWHILE, :kWHILE_MOD ], :expr_beg ],
|
2439
|
+
[""], [""], [""], [""], [""], [""], [""], [""], [""],
|
2440
|
+
[""],
|
2441
|
+
["alias", [:kALIAS, :kALIAS ], :expr_fname ],
|
2442
|
+
].map { |args| KWtable.new(*args) }
|
2443
|
+
|
2444
|
+
if len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH then
|
2445
|
+
key = hash_keyword(str, len)
|
2446
|
+
if key <= MAX_HASH_VALUE && key >= 0 then
|
2447
|
+
s = wordlist[key].name
|
2448
|
+
return wordlist[key] if str == s
|
2449
|
+
end
|
2450
|
+
end
|
2451
|
+
|
2452
|
+
return nil
|
2453
|
+
end
|
2454
|
+
end
|
2455
|
+
|
2456
|
+
class Environment
|
2457
|
+
attr_reader :env, :dyn
|
2458
|
+
attr_accessor :init
|
2459
|
+
|
2460
|
+
def initialize dyn = false
|
2461
|
+
@dyn = []
|
2462
|
+
@env = []
|
2463
|
+
@use = []
|
2464
|
+
@init = false
|
2465
|
+
self.extend
|
2466
|
+
end
|
2467
|
+
|
2468
|
+
def use id
|
2469
|
+
@env.each_with_index do |env, i|
|
2470
|
+
if env[id] then
|
2471
|
+
@use[i][id] = true
|
2472
|
+
end
|
2473
|
+
end
|
2474
|
+
end
|
2475
|
+
|
2476
|
+
def used? id
|
2477
|
+
idx = @dyn.index false # REFACTOR
|
2478
|
+
u = @use[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
|
2479
|
+
u[id]
|
2480
|
+
end
|
2481
|
+
|
2482
|
+
def [] k
|
2483
|
+
self.all[k]
|
2484
|
+
end
|
2485
|
+
|
2486
|
+
def []= k, v
|
2487
|
+
raise "no" if v == true
|
2488
|
+
self.current[k] = v
|
2489
|
+
end
|
2490
|
+
|
2491
|
+
def has_key? k
|
2492
|
+
self.all.has_key? k
|
2493
|
+
end
|
2494
|
+
|
2495
|
+
def all
|
2496
|
+
idx = @dyn.index false
|
2497
|
+
@env[0..idx].reverse.inject { |env, scope| env.merge scope }
|
2498
|
+
end
|
2499
|
+
|
2500
|
+
def dynamic
|
2501
|
+
idx = @dyn.index false
|
2502
|
+
@env[0...idx].reverse.inject { |env, scope| env.merge scope } || {}
|
2503
|
+
end
|
2504
|
+
|
2505
|
+
def current
|
2506
|
+
@env.first
|
2507
|
+
end
|
2508
|
+
|
2509
|
+
def dynamic?
|
2510
|
+
@dyn[0] != false
|
2511
|
+
end
|
2512
|
+
|
2513
|
+
def dasgn_curr? name # TODO: I think this is wrong - nuke
|
2514
|
+
(! has_key?(name) && dynamic?) || current.has_key?(name)
|
2515
|
+
end
|
2516
|
+
|
2517
|
+
def extend dyn = false
|
2518
|
+
@dyn.unshift dyn
|
2519
|
+
@env.unshift({})
|
2520
|
+
@use.unshift({})
|
2521
|
+
end
|
2522
|
+
|
2523
|
+
def unextend
|
2524
|
+
@dyn.shift
|
2525
|
+
@env.shift
|
2526
|
+
@use.shift
|
2527
|
+
raise "You went too far unextending env" if @env.empty?
|
2528
|
+
end
|
2529
|
+
end
|
2530
|
+
|
2531
|
+
class StackState
|
2532
|
+
attr_reader :stack
|
2533
|
+
|
2534
|
+
def inspect
|
2535
|
+
"StackState(#{@name}, #{@stack.inspect})"
|
2536
|
+
end
|
2537
|
+
|
2538
|
+
def initialize(name)
|
2539
|
+
@name = name
|
2540
|
+
@stack = [false]
|
2541
|
+
end
|
2542
|
+
|
2543
|
+
def pop
|
2544
|
+
# raise "#{@name} empty" if @stack.size <= 1
|
2545
|
+
r = @stack.pop
|
2546
|
+
@stack.push false if @stack.size == 0
|
2547
|
+
r
|
2548
|
+
end
|
2549
|
+
|
2550
|
+
def lexpop
|
2551
|
+
raise if @stack.size == 0
|
2552
|
+
a = @stack.pop
|
2553
|
+
b = @stack.pop
|
2554
|
+
@stack.push(a || b)
|
2555
|
+
end
|
2556
|
+
|
2557
|
+
def push val
|
2558
|
+
raise if val != true and val != false
|
2559
|
+
@stack.push val
|
2560
|
+
end
|
2561
|
+
|
2562
|
+
def is_in_state
|
2563
|
+
@stack.last
|
2564
|
+
end
|
2565
|
+
end
|
2566
|
+
|
2567
|
+
def t str
|
2568
|
+
Token.new str
|
2569
|
+
end
|
2570
|
+
|
2571
|
+
class Token # TODO: nuke this and use sexps
|
2572
|
+
attr_accessor :args
|
2573
|
+
def initialize(token)
|
2574
|
+
@args = Array(token)
|
2575
|
+
end
|
2576
|
+
|
2577
|
+
def value # TODO: eventually phase this out (or make it official)
|
2578
|
+
self.args.first
|
2579
|
+
end
|
2580
|
+
|
2581
|
+
def first # HACK
|
2582
|
+
self.args.first
|
2583
|
+
end
|
2584
|
+
|
2585
|
+
def inspect
|
2586
|
+
"t(#{args.join.inspect})"
|
2587
|
+
end
|
2588
|
+
|
2589
|
+
def to_sym
|
2590
|
+
self.value.to_sym
|
2591
|
+
end
|
2592
|
+
|
2593
|
+
def == o
|
2594
|
+
Token === o and self.args == o.args
|
2595
|
+
end
|
2596
|
+
end
|
2597
|
+
|
2598
|
+
############################################################
|
2599
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
2600
|
+
|
2601
|
+
class Symbol
|
2602
|
+
def is_argument # TODO: phase this out
|
2603
|
+
return self == :expr_arg || self == :expr_cmdarg
|
2604
|
+
end
|
2605
|
+
end
|
2606
|
+
|
2607
|
+
class StringIO # HACK: everything in here is a hack
|
2608
|
+
attr_accessor :begin_of_line, :was_begin_of_line
|
2609
|
+
alias :begin_of_line? :begin_of_line
|
2610
|
+
alias :read_all :read
|
2611
|
+
|
2612
|
+
alias :old_initialize :initialize
|
2613
|
+
|
2614
|
+
def initialize(*args)
|
2615
|
+
self.begin_of_line = true
|
2616
|
+
self.was_begin_of_line = false
|
2617
|
+
old_initialize(*args)
|
2618
|
+
@original_string = self.string.dup
|
2619
|
+
end
|
2620
|
+
|
2621
|
+
def rest
|
2622
|
+
self.string[self.pos..-1]
|
2623
|
+
end
|
2624
|
+
|
2625
|
+
def current_line # HAHA fuck you
|
2626
|
+
@original_string[0..self.pos][/\A.*__LINE__/m].split(/\n/).size
|
2627
|
+
end
|
2628
|
+
|
2629
|
+
def read
|
2630
|
+
c = self.getc
|
2631
|
+
|
2632
|
+
if c == ?\r then
|
2633
|
+
d = self.getc
|
2634
|
+
self.ungetc d if d and d != ?\n
|
2635
|
+
c = ?\n
|
2636
|
+
end
|
2637
|
+
|
2638
|
+
self.was_begin_of_line = self.begin_of_line
|
2639
|
+
self.begin_of_line = c == ?\n
|
2640
|
+
if c and c != 0 then
|
2641
|
+
c.chr
|
2642
|
+
else
|
2643
|
+
::RubyLexer::EOF
|
2644
|
+
end
|
2645
|
+
end
|
2646
|
+
|
2647
|
+
def match_string term, indent=false # TODO: add case insensitivity, or just remove
|
2648
|
+
buffer = []
|
2649
|
+
|
2650
|
+
if indent
|
2651
|
+
while c = self.read do
|
2652
|
+
if c !~ /\s/ or c == "\n" or c == "\r" then
|
2653
|
+
self.unread c
|
2654
|
+
break
|
2655
|
+
end
|
2656
|
+
buffer << c
|
2657
|
+
end
|
2658
|
+
end
|
2659
|
+
|
2660
|
+
term.each_byte do |c2|
|
2661
|
+
c = self.read
|
2662
|
+
c = self.read if c and c == "\r"
|
2663
|
+
buffer << c
|
2664
|
+
if c and c2 != c[0] then
|
2665
|
+
self.unread_many buffer.join # HACK omg
|
2666
|
+
return false
|
2667
|
+
end
|
2668
|
+
end
|
2669
|
+
|
2670
|
+
return true
|
2671
|
+
end
|
2672
|
+
|
2673
|
+
def read_line
|
2674
|
+
self.begin_of_line = true
|
2675
|
+
self.was_begin_of_line = false
|
2676
|
+
gets.sub(/\r\n?$/, "\n") # HACK
|
2677
|
+
end
|
2678
|
+
|
2679
|
+
def peek expected = nil # FIX: barf
|
2680
|
+
c = self.getc
|
2681
|
+
return RubyLexer::EOF if c.nil?
|
2682
|
+
self.ungetc c if c
|
2683
|
+
c = c.chr if c
|
2684
|
+
if expected then
|
2685
|
+
c == expected
|
2686
|
+
else
|
2687
|
+
c
|
2688
|
+
end
|
2689
|
+
end
|
2690
|
+
|
2691
|
+
def unread(c)
|
2692
|
+
return if c.nil? # UGH
|
2693
|
+
|
2694
|
+
# HACK: only depth is 2... who cares? really I want to remove all of this
|
2695
|
+
self.begin_of_line = self.was_begin_of_line || true
|
2696
|
+
self.was_begin_of_line = nil
|
2697
|
+
|
2698
|
+
c = c[0] if String === c
|
2699
|
+
self.ungetc c
|
2700
|
+
end
|
2701
|
+
|
2702
|
+
def unread_many str
|
2703
|
+
str.split(//).reverse.each do |c|
|
2704
|
+
unread c
|
2705
|
+
end
|
2706
|
+
end
|
2707
|
+
end
|
2708
|
+
|
2709
|
+
class Sexp
|
2710
|
+
attr_writer :paren
|
2711
|
+
|
2712
|
+
def paren
|
2713
|
+
@paren ||= false
|
2714
|
+
end
|
2715
|
+
|
2716
|
+
def value
|
2717
|
+
raise "multi item sexp" if size > 2
|
2718
|
+
last
|
2719
|
+
end
|
2720
|
+
|
2721
|
+
def values
|
2722
|
+
self[1..-1]
|
2723
|
+
end
|
2724
|
+
|
2725
|
+
def node_type
|
2726
|
+
first
|
2727
|
+
end
|
2728
|
+
|
2729
|
+
kill :add, :add_all
|
2730
|
+
end
|
2731
|
+
|
2732
|
+
def bitch
|
2733
|
+
c = caller
|
2734
|
+
m = c[0].split.last
|
2735
|
+
warn "bitch: you shouldn't be doing #{m}: from #{c[1]}"
|
2736
|
+
end
|
2737
|
+
|
2738
|
+
# class NilClass
|
2739
|
+
# def method_missing msg, *args
|
2740
|
+
# c = caller
|
2741
|
+
# warn "called #{msg} on nil (args = #{args.inspect}): from #{c[0]}"
|
2742
|
+
# nil
|
2743
|
+
# end
|
2744
|
+
# end
|
2745
|
+
|
2746
|
+
# def d s
|
2747
|
+
# warn s.inspect
|
2748
|
+
# end
|
2749
|
+
|
2750
|
+
# END HACK
|
2751
|
+
############################################################
|