rubylexer 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rubylexer/0.7.0'
|
2
|
+
|
3
|
+
#make ImplicitParamList Start and End tokens descend from IgnoreToken again
|
4
|
+
class RubyLexer
|
5
|
+
remove_const :ImplicitParamListStartToken
|
6
|
+
remove_const :ImplicitParamListEndToken
|
7
|
+
|
8
|
+
class ImplicitParamListStartToken < IgnoreToken
|
9
|
+
# include StillIgnoreToken
|
10
|
+
def initialize(offset)
|
11
|
+
super("(",offset)
|
12
|
+
end
|
13
|
+
def to_s; '' end
|
14
|
+
end
|
15
|
+
|
16
|
+
class ImplicitParamListEndToken < IgnoreToken
|
17
|
+
# include StillIgnoreToken
|
18
|
+
def initialize(offset)
|
19
|
+
super(")",offset)
|
20
|
+
end
|
21
|
+
def to_s; '' end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
RubyLexer.constants.map{|k|
|
26
|
+
k.name[/[^:]+$/] if Token>=k or Context>=k
|
27
|
+
}.compact + %w[
|
28
|
+
RuLexer CharHandler CharSet SymbolTable
|
29
|
+
SimpleTokenPrinter KeepWsTokenPrinter
|
30
|
+
].each{|name|
|
31
|
+
Object.const_set name, RubyLexer.const_get name
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
class RubyLexer
|
36
|
+
def merge_assignment_op_in_setter_callsites?
|
37
|
+
true
|
38
|
+
end
|
39
|
+
end
|
@@ -17,7 +17,8 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
require "charset"
|
20
|
+
require "rubylexer/charset"
|
21
|
+
class RubyLexer
|
21
22
|
#------------------------------------
|
22
23
|
class CharHandler
|
23
24
|
#-----------------------------------
|
@@ -41,7 +42,7 @@ class CharHandler
|
|
41
42
|
when Fixnum
|
42
43
|
self[pattern]=action
|
43
44
|
else
|
44
|
-
raise "invalid pattern class #{pattern.class}"
|
45
|
+
raise "invalid pattern class #{pattern.class}: #{pattern}"
|
45
46
|
end
|
46
47
|
}
|
47
48
|
|
@@ -80,5 +81,6 @@ class CharHandler
|
|
80
81
|
end until go(mychar,*args)
|
81
82
|
end
|
82
83
|
end
|
84
|
+
end
|
83
85
|
|
84
86
|
|
@@ -17,6 +17,7 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
+
class RubyLexer
|
20
21
|
class CharSet
|
21
22
|
def initialize(*charss)
|
22
23
|
clear
|
@@ -31,9 +32,9 @@ class CharSet
|
|
31
32
|
|
32
33
|
def add(chars)
|
33
34
|
case chars
|
34
|
-
when String
|
35
|
+
when ::String
|
35
36
|
chars.each_byte {|c| @bitset |= (1<<c) }
|
36
|
-
when Fixnum then @bitset |= (1<<chars)
|
37
|
+
when ::Fixnum then @bitset |= (1<<chars)
|
37
38
|
else chars.each {|c| @bitset |= (1<<c) }
|
38
39
|
end
|
39
40
|
end
|
@@ -72,5 +73,5 @@ class CharSet
|
|
72
73
|
return result
|
73
74
|
end
|
74
75
|
end
|
75
|
-
|
76
|
+
end
|
76
77
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
class RubyLexer
|
1
2
|
module NestedContexts
|
2
3
|
class NestedContext
|
3
4
|
attr :starter
|
@@ -13,7 +14,8 @@ module NestedContexts
|
|
13
14
|
@ender==tok
|
14
15
|
end
|
15
16
|
|
16
|
-
def see
|
17
|
+
def see lxr,msg; end
|
18
|
+
def lhs=*x; end #do nothing
|
17
19
|
end
|
18
20
|
|
19
21
|
class ListContext < NestedContext
|
@@ -30,6 +32,7 @@ module NestedContexts
|
|
30
32
|
def initialize(linenum)
|
31
33
|
super('(', ')' ,linenum)
|
32
34
|
end
|
35
|
+
attr_accessor :lhs
|
33
36
|
end
|
34
37
|
|
35
38
|
class BlockContext < NestedContext
|
@@ -38,16 +41,31 @@ module NestedContexts
|
|
38
41
|
end
|
39
42
|
end
|
40
43
|
|
41
|
-
class BlockParamListContext < ListContext
|
42
|
-
def initialize(linenum)
|
43
|
-
super('|','|',linenum)
|
44
|
-
end
|
45
|
-
end
|
44
|
+
# class BlockParamListContext < ListContext
|
45
|
+
# def initialize(linenum)
|
46
|
+
# super('|','|',linenum)
|
47
|
+
# end
|
48
|
+
# end
|
46
49
|
|
47
50
|
class ParamListContext < ListContext
|
48
51
|
def initialize(linenum)
|
49
52
|
super('(', ')',linenum)
|
50
53
|
end
|
54
|
+
def lhs; false end
|
55
|
+
end
|
56
|
+
|
57
|
+
class ImplicitLhsContext < NestedContext
|
58
|
+
def initialize(linenum)
|
59
|
+
@linenum=linenum
|
60
|
+
end
|
61
|
+
def lhs; true end
|
62
|
+
def starter; nil end
|
63
|
+
def ender; '=' end
|
64
|
+
end
|
65
|
+
|
66
|
+
class BlockParamListLhsContext < ImplicitLhsContext
|
67
|
+
def starter; '|' end
|
68
|
+
def ender; '|' end
|
51
69
|
end
|
52
70
|
|
53
71
|
class ImplicitContext < ListContext
|
@@ -55,19 +73,26 @@ module NestedContexts
|
|
55
73
|
|
56
74
|
class ParamListContextNoParen < ImplicitContext
|
57
75
|
def initialize(linenum)
|
58
|
-
|
76
|
+
super(nil,nil,linenum)
|
59
77
|
end
|
78
|
+
def lhs; false end
|
60
79
|
end
|
61
80
|
|
62
|
-
class
|
81
|
+
class WhenParamListContext < ImplicitContext
|
63
82
|
def initialize(starter,linenum)
|
64
|
-
|
83
|
+
super(starter,nil,linenum)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class AssignmentContext < NestedContext
|
88
|
+
def initialize(linenum)
|
89
|
+
super("assignment context", "=",linenum)
|
65
90
|
end
|
66
91
|
end
|
67
92
|
|
68
93
|
class AssignmentRhsContext < ImplicitContext
|
69
94
|
def initialize(linenum)
|
70
|
-
|
95
|
+
super(nil,nil,linenum)
|
71
96
|
end
|
72
97
|
end
|
73
98
|
|
@@ -76,8 +101,8 @@ module NestedContexts
|
|
76
101
|
super(starter,'end',linenum)
|
77
102
|
end
|
78
103
|
|
79
|
-
def see
|
80
|
-
msg==:rescue ?
|
104
|
+
def see lxr,msg
|
105
|
+
msg==:rescue ? lxr.parsestack.push_rescue_sm : super
|
81
106
|
end
|
82
107
|
end
|
83
108
|
|
@@ -115,7 +140,8 @@ module NestedContexts
|
|
115
140
|
@state=:rescue
|
116
141
|
end
|
117
142
|
|
118
|
-
def see(
|
143
|
+
def see(lxr,msg)
|
144
|
+
stack=lxr.parsestack
|
119
145
|
case msg
|
120
146
|
when :rescue:
|
121
147
|
WantsEndContext===stack.last or
|
@@ -125,7 +151,7 @@ module NestedContexts
|
|
125
151
|
when :arrow: #local var defined in this state
|
126
152
|
when :then,:semi,:colon:
|
127
153
|
msg=:then
|
128
|
-
|
154
|
+
self.equal? stack.pop or raise 'syntax error: then not expected at this time'
|
129
155
|
#pop self off owning context stack
|
130
156
|
else super
|
131
157
|
end
|
@@ -149,16 +175,19 @@ module NestedContexts
|
|
149
175
|
@state=:for
|
150
176
|
end
|
151
177
|
|
152
|
-
def see(
|
178
|
+
def see(lxr,msg)
|
179
|
+
stack=lxr.parsestack
|
180
|
+
assert msg!=:for
|
153
181
|
case msg
|
154
182
|
when :for: WantsEndContext===stack.last or raise 'syntax error: for not expected at this time'
|
155
183
|
#local var defined in this state
|
156
|
-
|
184
|
+
#never actually used?
|
185
|
+
when :in: self.equal? stack.pop or raise 'syntax error: in not expected at this time'
|
157
186
|
stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
|
158
187
|
#pop self off owning context stack and push ExpectDoOrNlContext
|
159
188
|
else super
|
160
189
|
end
|
161
|
-
LEGAL_SUCCESSORS[@state] == msg or raise
|
190
|
+
LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
|
162
191
|
@state=msg
|
163
192
|
end
|
164
193
|
end
|
@@ -171,4 +200,5 @@ module NestedContexts
|
|
171
200
|
dflt_initialize('?',':',linenum)
|
172
201
|
end
|
173
202
|
end
|
174
|
-
end
|
203
|
+
end
|
204
|
+
end
|
@@ -19,8 +19,10 @@
|
|
19
19
|
|
20
20
|
|
21
21
|
|
22
|
-
require "token.rb"
|
23
|
-
require "tokenprinter.rb"
|
22
|
+
#require "token.rb"
|
23
|
+
#require "tokenprinter.rb"
|
24
|
+
|
25
|
+
class RubyLexer
|
24
26
|
|
25
27
|
class RubyCode < Token
|
26
28
|
def initialize(tokens,filename,linenum)
|
@@ -40,5 +42,5 @@ class RubyCode < Token
|
|
40
42
|
return result.to_s
|
41
43
|
end
|
42
44
|
end
|
43
|
-
|
45
|
+
end
|
44
46
|
|
@@ -20,14 +20,27 @@
|
|
20
20
|
|
21
21
|
|
22
22
|
require "assert"
|
23
|
-
require "charhandler"
|
23
|
+
#require "charhandler"
|
24
24
|
#require "term"
|
25
|
-
require "rubycode"
|
26
|
-
require "io.each_til_charset"
|
27
|
-
|
25
|
+
#require "rubycode"
|
26
|
+
#require "io.each_til_charset"
|
27
|
+
#begin
|
28
|
+
require 'rubygems'
|
29
|
+
#rescue Exception:
|
30
|
+
#end
|
31
|
+
#require 'sequence'
|
32
|
+
require 'sequence/indexed'
|
33
|
+
require 'sequence/file'
|
34
|
+
#-----------------------------------
|
35
|
+
assert !defined? ::RubyLexer
|
36
|
+
$RuLexer=Class.new{}
|
37
|
+
class RubyLexer < $RuLexer
|
38
|
+
RuLexer=$RuLexer
|
39
|
+
end
|
40
|
+
$RuLexer=nil
|
28
41
|
#------------------------------------
|
29
|
-
class
|
30
|
-
|
42
|
+
class RubyLexer
|
43
|
+
class RuLexer
|
31
44
|
WHSP=" \t\r\v\f"
|
32
45
|
WHSPLF=WHSP+"\n"
|
33
46
|
#maybe \r should be in WHSPLF instead
|
@@ -41,25 +54,28 @@ class RuLexer
|
|
41
54
|
#-----------------------------------
|
42
55
|
def initialize(filename, file, line)
|
43
56
|
@filename=filename
|
44
|
-
|
45
|
-
file.
|
46
|
-
|
57
|
+
|
58
|
+
# String===file && file=IOext::FakeFile.new(file)
|
59
|
+
file.binmode if File===file
|
60
|
+
@original_file=file
|
61
|
+
@file=file.to_sequence
|
47
62
|
@linenum=line
|
48
63
|
@toptable=nil #descendants must fill this out
|
49
|
-
@moretokens=[ FileAndLineToken.new(@filename, @linenum,
|
64
|
+
@moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
|
50
65
|
@last_operative_token=nil
|
66
|
+
@endsets={}
|
51
67
|
end
|
52
68
|
|
53
69
|
#-----------------------------------
|
54
70
|
def endoffile_detected s=''
|
55
|
-
EoiToken.new(s,@
|
71
|
+
EoiToken.new(s,@original_file, input_position-s.size)
|
56
72
|
end
|
57
73
|
|
58
74
|
#-----------------------------------
|
59
75
|
def get1token
|
60
76
|
@moretokens.empty? or return @moretokens.shift
|
61
77
|
|
62
|
-
if
|
78
|
+
if eof?
|
63
79
|
#@moretokens<<nil
|
64
80
|
return endoffile_detected()
|
65
81
|
end
|
@@ -73,6 +89,14 @@ class RuLexer
|
|
73
89
|
return true
|
74
90
|
end
|
75
91
|
|
92
|
+
#-----------------------------------
|
93
|
+
def each
|
94
|
+
begin yield tok = get1token
|
95
|
+
end until tok.is_a? EoiToken
|
96
|
+
end
|
97
|
+
|
98
|
+
include Enumerable
|
99
|
+
|
76
100
|
private
|
77
101
|
#-----------------------------------
|
78
102
|
def lexerror_errortoken(tok,str,file=@filename,line=@linenum)
|
@@ -99,16 +123,16 @@ private
|
|
99
123
|
def regex(ch=nil)
|
100
124
|
result=RenderExactlyStringToken.new('/').
|
101
125
|
append_token double_quote("/")
|
102
|
-
|
126
|
+
result.line=@linenum
|
103
127
|
return result
|
104
128
|
end
|
105
129
|
|
106
130
|
#-----------------------------------
|
107
|
-
def single_char_token(str)
|
131
|
+
def single_char_token(str) getchar end
|
108
132
|
|
109
133
|
#-----------------------------------
|
110
134
|
def illegal_char(ch)
|
111
|
-
pos
|
135
|
+
pos= input_position
|
112
136
|
LEGALCHARS===ch and return( lexerror WsToken.new(getchar,pos), "legal (?!) bad char (code: #{ch[0]})" )
|
113
137
|
lexerror WsToken.new(til_charset(LEGALCHARS),pos), "bad char (code: #{ch[0]})"
|
114
138
|
end
|
@@ -116,7 +140,7 @@ private
|
|
116
140
|
#-----------------------------------
|
117
141
|
def fancy_quote (ch)
|
118
142
|
assert ch=='%'
|
119
|
-
oldpos
|
143
|
+
oldpos= input_position
|
120
144
|
eat_next_if(ch) or raise "fancy_quote, no "+ch
|
121
145
|
|
122
146
|
ch=getchar
|
@@ -133,7 +157,9 @@ private
|
|
133
157
|
when /^[a-z0-9]$/oi
|
134
158
|
error= "unrecognized %string type: "+ch; '"'
|
135
159
|
when ''
|
136
|
-
|
160
|
+
result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
|
161
|
+
result.line=@linenum
|
162
|
+
return result
|
137
163
|
else back1char; '"' #no letter means string too
|
138
164
|
end
|
139
165
|
|
@@ -144,8 +170,10 @@ private
|
|
144
170
|
|
145
171
|
result=double_quote(beg, type, (PAIRS[beg] or beg))
|
146
172
|
case ch
|
147
|
-
when /^[Wwr]
|
148
|
-
|
173
|
+
when /^[Wwr]$/;
|
174
|
+
result=RenderExactlyStringToken.new(type).append_token(result)
|
175
|
+
result.line=@linenum
|
176
|
+
when 's'; result=SymbolToken.new(result.to_s)
|
149
177
|
end
|
150
178
|
result.offset=oldpos
|
151
179
|
return lexerror(result,error)
|
@@ -159,11 +187,15 @@ private
|
|
159
187
|
|
160
188
|
#-----------------------------------
|
161
189
|
def all_quote(nester, type, delimiter, bs_handler=nil)
|
190
|
+
endset="\r\n\\\\"
|
191
|
+
|
162
192
|
#string must start with nester
|
163
193
|
if nester==INET_NL_REX
|
164
194
|
readnl
|
165
195
|
else
|
166
|
-
|
196
|
+
endset<< "\\"+nester
|
197
|
+
endset<< "\\"+delimiter if nester!=delimiter
|
198
|
+
eat_next_if(nester[0])
|
167
199
|
end or return nil
|
168
200
|
|
169
201
|
bs_handler ||= case type
|
@@ -177,9 +209,12 @@ private
|
|
177
209
|
str=StringToken.new type
|
178
210
|
old_linenum=@linenum
|
179
211
|
nestlevel=1
|
180
|
-
maybe_crunch= "'["[type]
|
181
|
-
|
182
|
-
|
212
|
+
endset<<maybe_crunch="#" unless "'["[type]
|
213
|
+
endset=
|
214
|
+
@endsets[endset] ||= /[#{endset}]/
|
215
|
+
loop{
|
216
|
+
str.append(til_charset( endset ))
|
217
|
+
b=getchar
|
183
218
|
if /^[\r\n]$/===b
|
184
219
|
back1char
|
185
220
|
b=readnl
|
@@ -189,8 +224,9 @@ private
|
|
189
224
|
if (nestlevel-=1)==0
|
190
225
|
str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
|
191
226
|
#emit eol marker later if line has changed
|
227
|
+
str.line=@linenum
|
192
228
|
@linenum != old_linenum and @moretokens <<
|
193
|
-
FileAndLineToken.new(@filename,@linenum
|
229
|
+
FileAndLineToken.new(@filename,@linenum, input_position)
|
194
230
|
return str
|
195
231
|
end
|
196
232
|
when nester
|
@@ -198,17 +234,25 @@ private
|
|
198
234
|
assert(nester!=delimiter)
|
199
235
|
nestlevel+=1
|
200
236
|
when "\\"
|
237
|
+
begin
|
201
238
|
b= send(bs_handler,'\\',nester,delimiter)
|
239
|
+
rescue e
|
240
|
+
lexerror str, e.message
|
241
|
+
end
|
202
242
|
when nil then raise "nil char from each_byte?" #never happens
|
203
243
|
when maybe_crunch
|
204
244
|
nc=nextchar.chr
|
205
245
|
nc[/^[{@$]$/] and b=ruby_code(nc)
|
246
|
+
when "" #eof
|
247
|
+
lexerror str, "unterminated #{delimiter}-string at eof"
|
248
|
+
break
|
206
249
|
end
|
207
250
|
str.append b
|
208
251
|
}
|
209
252
|
|
210
|
-
assert
|
211
|
-
|
253
|
+
assert eof?
|
254
|
+
str.line=@linenum
|
255
|
+
str
|
212
256
|
end
|
213
257
|
|
214
258
|
#-----------------------------------
|
@@ -216,7 +260,7 @@ private
|
|
216
260
|
ESCAPESEQS="\a\b\e\f\n\r\s\t\v"
|
217
261
|
def dquote_esc_seq(ch,nester=nil,delimiter=nil)
|
218
262
|
assert ch == '\\'
|
219
|
-
#see
|
263
|
+
#see pickaxe (1st ed), p 205 for documentation of escape sequences
|
220
264
|
return case k=getchar
|
221
265
|
when "\n" then @linenum+=1; ""
|
222
266
|
when "\\" then "\\"
|
@@ -236,11 +280,11 @@ private
|
|
236
280
|
when "v" then "\v"
|
237
281
|
=end
|
238
282
|
when "M"
|
239
|
-
eat_next_if(?-) or
|
283
|
+
eat_next_if(?-) or raise 'bad \\M sequence'
|
240
284
|
(getchar_maybe_escape | 0x80).chr
|
241
285
|
|
242
286
|
when "C"
|
243
|
-
eat_next_if(?-) or
|
287
|
+
eat_next_if(?-) or raise 'bad \\C sequence'
|
244
288
|
(getchar_maybe_escape & 0x9F).chr
|
245
289
|
|
246
290
|
when "c"
|
@@ -249,16 +293,16 @@ private
|
|
249
293
|
when /^[0-7]$/
|
250
294
|
str=k
|
251
295
|
while str.length < 3
|
252
|
-
str << (eat_next_if(
|
296
|
+
str << (eat_next_if(/[0-7]/) or break)
|
253
297
|
end
|
254
298
|
(str.oct&0xFF).chr
|
255
299
|
|
256
300
|
when "x"
|
257
301
|
str=''
|
258
302
|
while str.length < 2
|
259
|
-
str << (eat_next_if(
|
303
|
+
str << (eat_next_if(/[0-9A-F]/i) or break)
|
260
304
|
end
|
261
|
-
str=='' and
|
305
|
+
str=='' and raise "bad \\x sequence"
|
262
306
|
str.hex.chr
|
263
307
|
|
264
308
|
else
|
@@ -281,7 +325,7 @@ private
|
|
281
325
|
#when "M","C"
|
282
326
|
# eat_next_if(?-) or
|
283
327
|
# lexerror "illegal \\#{c}- esc sequence"
|
284
|
-
# ch + c + '-' + (eat_next_if(
|
328
|
+
# ch + c + '-' + (eat_next_if(/[^\\]/)or'')
|
285
329
|
# #if this \M- or \C- sequence is continued by
|
286
330
|
# #another backslash, we'll just leave the
|
287
331
|
# #backslash on the input, to be read by the next pass
|
@@ -332,7 +376,11 @@ private
|
|
332
376
|
klass= RubyLexer===self ? self.class : RubyLexer
|
333
377
|
rl=klass.new(@filename,@file,@linenum)
|
334
378
|
|
335
|
-
|
379
|
+
#pass current local vars into new parser
|
380
|
+
localvars.names.each{|varname|
|
381
|
+
rl.localvars[varname]=true
|
382
|
+
}
|
383
|
+
rl.localvars.start_block
|
336
384
|
|
337
385
|
case ch
|
338
386
|
when '@'
|
@@ -363,6 +411,10 @@ private
|
|
363
411
|
rl.no_more? or
|
364
412
|
raise 'uh-oh, ruby tokens were lexed past end of ruby code'
|
365
413
|
|
414
|
+
#local vars defined in inclusion get propagated to outer parser
|
415
|
+
newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
|
416
|
+
newvars.each{|newvar| localvars[newvar]=true }
|
417
|
+
|
366
418
|
result=RubyCode.new(tokens,@filename,@linenum)
|
367
419
|
@linenum=rl.linenum
|
368
420
|
return result
|
@@ -375,10 +427,14 @@ private
|
|
375
427
|
|
376
428
|
|
377
429
|
#-----------------------------------
|
378
|
-
BINCHARS=?0..?1
|
379
|
-
OCTCHARS=?0..?7
|
380
|
-
DECCHARS=?0..?9
|
381
|
-
HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
|
430
|
+
# BINCHARS=?0..?1
|
431
|
+
# OCTCHARS=?0..?7
|
432
|
+
# DECCHARS=?0..?9
|
433
|
+
# HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
|
434
|
+
BINCHARS=/[^01_]/
|
435
|
+
OCTCHARS=/[^0-7_]/
|
436
|
+
DECCHARS=/[^0-9_]/
|
437
|
+
HEXCHARS=/[^0-9a-f_]/i
|
382
438
|
#0-9
|
383
439
|
#-----------------------------------
|
384
440
|
def number(str)
|
@@ -386,83 +442,85 @@ private
|
|
386
442
|
return nil unless /^[0-9+\-]$/===str
|
387
443
|
|
388
444
|
interp=:to_i
|
389
|
-
str= (eat_next_if(
|
390
|
-
str<< (eat_next_if(
|
445
|
+
str= (eat_next_if(/[+\-]/)or'')
|
446
|
+
str<< (eat_next_if(?0)or'')
|
391
447
|
|
392
|
-
if str[-1] == ?0 and nextchar
|
393
|
-
typechar=eat_next_if(
|
448
|
+
if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
|
449
|
+
typechar=eat_next_if(/[BOXD]/i)||'o'
|
394
450
|
str << typechar
|
395
451
|
interp=:oct
|
396
|
-
|
397
|
-
when 'b','B'
|
398
|
-
when 'x','X'
|
399
|
-
when 'o','O'
|
452
|
+
unallowed=case typechar
|
453
|
+
when 'b','B'; BINCHARS
|
454
|
+
when 'x','X'; HEXCHARS
|
455
|
+
when 'o','O'; OCTCHARS
|
456
|
+
when 'd','D'; interp=:to_i; DECCHARS
|
400
457
|
else raise :impossible
|
401
458
|
end
|
402
459
|
else
|
403
460
|
interp=:to_i
|
404
|
-
|
461
|
+
unallowed =DECCHARS
|
405
462
|
end
|
406
463
|
|
407
|
-
addl_dig_seqs= (
|
464
|
+
addl_dig_seqs= (typechar)? 0 : 2 #den 210
|
408
465
|
error=nil
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
466
|
+
|
467
|
+
# @file.each_byte { |b|
|
468
|
+
# if unallowed === b or ?_ == b
|
469
|
+
# str << b
|
470
|
+
# else
|
471
|
+
str<<til_charset(unallowed)
|
472
|
+
b=getc
|
413
473
|
#digits must follow and precede . and e
|
414
|
-
if ?.==b and addl_dig_seqs==2 and
|
415
|
-
addl_dig_seqs=1
|
474
|
+
if ?.==b and addl_dig_seqs==2 and !(unallowed===nextchar.chr)
|
475
|
+
#addl_dig_seqs=1
|
416
476
|
str << b
|
477
|
+
str<<til_charset(unallowed)
|
478
|
+
b=getc
|
479
|
+
interp=:to_s
|
480
|
+
end
|
417
481
|
#digits must follow and precede . and e
|
418
|
-
|
482
|
+
if (?e==b or ?E==b) and addl_dig_seqs>=1 and
|
419
483
|
readahead(2)[/^[-+]?[0-9]/]
|
420
|
-
addl_dig_seqs=0
|
484
|
+
#addl_dig_seqs=0
|
421
485
|
str << b
|
422
486
|
str << (eat_next_if(/[+\-]/)or'')
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
break
|
487
|
+
str<<til_charset(unallowed)
|
488
|
+
b=getc
|
489
|
+
interp=:to_s
|
427
490
|
end
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
491
|
+
back1char if b
|
492
|
+
#return(str.send(interp))
|
493
|
+
# break
|
494
|
+
# #OCTCHARS allowed here to permit constants like this: 01.2
|
495
|
+
# unallowed == DECCHARS or unallowed == OCTCHARS or error= "floats are always decimal (currently)"
|
496
|
+
# unallowed = DECCHARS
|
497
|
+
# interp=:to_s
|
498
|
+
# end
|
499
|
+
# }
|
434
500
|
|
435
501
|
assert(str[/[0-9]/])
|
436
502
|
lexerror NumberToken.new(str.send(interp)), error
|
437
503
|
end
|
438
504
|
|
505
|
+
if (defined? DEBUGGER__ or defined? Debugger)
|
439
506
|
#-----------------------------------
|
440
507
|
def comment(str=nil)
|
441
508
|
#assert str == '#'
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
@file.each_byte {|b|
|
447
|
-
if b==?\n #leave \n's on input for newline to eat
|
448
|
-
back1char
|
449
|
-
else
|
450
|
-
str << b
|
451
|
-
end
|
452
|
-
return IgnoreToken.new(str) if b==?\n or b==?#
|
453
|
-
}
|
454
|
-
#eof...
|
509
|
+
Process.kill("INT",0) if readahead(11)==%/#breakpoint/
|
510
|
+
|
511
|
+
IgnoreToken.new(til_charset(/[\r\n]/))
|
512
|
+
end
|
455
513
|
else
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
return IgnoreToken.new(str)
|
514
|
+
#-----------------------------------
|
515
|
+
def comment(str=nil)
|
516
|
+
IgnoreToken.new(til_charset(/[\r\n]/))
|
460
517
|
end
|
518
|
+
end
|
461
519
|
|
462
520
|
#-----------------------------------
|
463
521
|
def whitespace(ch)
|
464
522
|
assert ch[/^[#{WHSP}]$/o]
|
465
|
-
oldpos
|
523
|
+
oldpos= input_position
|
466
524
|
str=til_charset(/[^#{WHSP}]/o)
|
467
525
|
return WsToken.new(str,oldpos)
|
468
526
|
end
|
@@ -471,26 +529,29 @@ end
|
|
471
529
|
INET_NL_REX=/^(\r\n?|\n\r?)/
|
472
530
|
def readnl
|
473
531
|
#compatible with dos/mac style newlines...
|
532
|
+
|
533
|
+
eof? and return ''
|
534
|
+
|
474
535
|
nl=readahead(2)[INET_NL_REX]
|
475
536
|
nl or return nil
|
476
537
|
assert((1..2)===nl.length)
|
477
538
|
@linenum+=1
|
478
|
-
|
539
|
+
read nl.length
|
479
540
|
end
|
480
541
|
|
481
542
|
#-----------------------------------
|
482
543
|
def newline(ch)
|
483
|
-
offset
|
544
|
+
offset= input_position
|
484
545
|
nl=readnl
|
485
|
-
@moretokens << FileAndLineToken.new( @filename, @linenum,
|
546
|
+
@moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
|
486
547
|
return NewlineToken.new( nl,offset)
|
487
548
|
end
|
488
549
|
|
489
550
|
|
490
551
|
#-----------------------------------
|
491
552
|
def getchar_maybe_escape
|
492
|
-
|
493
|
-
c
|
553
|
+
eof? and raise "unterminated dq string"
|
554
|
+
c=getc
|
494
555
|
|
495
556
|
c == ?\\ and
|
496
557
|
(c = (dquote_esc_seq('\\')[-1] or ?\n))
|
@@ -498,26 +559,41 @@ end
|
|
498
559
|
return c
|
499
560
|
end
|
500
561
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
562
|
+
protected
|
563
|
+
# delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
|
564
|
+
require 'forwardable'
|
565
|
+
extend Forwardable
|
566
|
+
def_delegators :@file, :readahead,:readback, :read, :eof?
|
567
|
+
|
568
|
+
def til_charset cs,len=16; @file.read_til_charset cs,len end
|
569
|
+
def getc; @file.read1 end
|
570
|
+
def getchar; @file.read 1 end
|
571
|
+
def back1char; @file.move( -1 )end
|
572
|
+
def prevchar; @file.readbehind 1 end
|
573
|
+
def nextchar; @file.readahead1 end
|
574
|
+
def eat_next_if(ch);
|
575
|
+
saw=getchar
|
576
|
+
if Integer===ch
|
577
|
+
ch==saw[0]
|
578
|
+
else
|
579
|
+
ch===saw
|
580
|
+
end or (back1char; return)
|
581
|
+
return saw
|
509
582
|
end
|
510
583
|
|
511
|
-
|
512
|
-
|
584
|
+
#-----------------------------------
|
585
|
+
def input_position; @file.pos end
|
586
|
+
|
587
|
+
#-----------------------------------
|
588
|
+
def input_position_set x; @file.pos=x end
|
513
589
|
|
514
590
|
#-----------------------------------
|
515
|
-
def
|
591
|
+
def self.save_offsets_in(*funcnames)
|
516
592
|
eval funcnames.collect{|fn| <<-endeval }.to_s
|
517
593
|
class ::#{self}
|
518
594
|
alias #{fn}__no_offset #{fn} #rename old ver of fn
|
519
595
|
def #{fn}(*args) #create new version
|
520
|
-
pos
|
596
|
+
pos= input_position
|
521
597
|
result=#{fn}__no_offset(*args)
|
522
598
|
assert Token===result
|
523
599
|
result.offset||=pos
|
@@ -527,6 +603,8 @@ protected
|
|
527
603
|
endeval
|
528
604
|
end
|
529
605
|
|
530
|
-
|
606
|
+
end
|
531
607
|
|
532
608
|
end
|
609
|
+
|
610
|
+
|