rubylexer 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rubylexer/0.7.0'
|
2
|
+
|
3
|
+
#make ImplicitParamList Start and End tokens descend from IgnoreToken again
|
4
|
+
class RubyLexer
|
5
|
+
remove_const :ImplicitParamListStartToken
|
6
|
+
remove_const :ImplicitParamListEndToken
|
7
|
+
|
8
|
+
class ImplicitParamListStartToken < IgnoreToken
|
9
|
+
# include StillIgnoreToken
|
10
|
+
def initialize(offset)
|
11
|
+
super("(",offset)
|
12
|
+
end
|
13
|
+
def to_s; '' end
|
14
|
+
end
|
15
|
+
|
16
|
+
class ImplicitParamListEndToken < IgnoreToken
|
17
|
+
# include StillIgnoreToken
|
18
|
+
def initialize(offset)
|
19
|
+
super(")",offset)
|
20
|
+
end
|
21
|
+
def to_s; '' end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
RubyLexer.constants.map{|k|
|
26
|
+
k.name[/[^:]+$/] if Token>=k or Context>=k
|
27
|
+
}.compact + %w[
|
28
|
+
RuLexer CharHandler CharSet SymbolTable
|
29
|
+
SimpleTokenPrinter KeepWsTokenPrinter
|
30
|
+
].each{|name|
|
31
|
+
Object.const_set name, RubyLexer.const_get name
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
class RubyLexer
|
36
|
+
def merge_assignment_op_in_setter_callsites?
|
37
|
+
true
|
38
|
+
end
|
39
|
+
end
|
@@ -17,7 +17,8 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
require "charset"
|
20
|
+
require "rubylexer/charset"
|
21
|
+
class RubyLexer
|
21
22
|
#------------------------------------
|
22
23
|
class CharHandler
|
23
24
|
#-----------------------------------
|
@@ -41,7 +42,7 @@ class CharHandler
|
|
41
42
|
when Fixnum
|
42
43
|
self[pattern]=action
|
43
44
|
else
|
44
|
-
raise "invalid pattern class #{pattern.class}"
|
45
|
+
raise "invalid pattern class #{pattern.class}: #{pattern}"
|
45
46
|
end
|
46
47
|
}
|
47
48
|
|
@@ -80,5 +81,6 @@ class CharHandler
|
|
80
81
|
end until go(mychar,*args)
|
81
82
|
end
|
82
83
|
end
|
84
|
+
end
|
83
85
|
|
84
86
|
|
@@ -17,6 +17,7 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
+
class RubyLexer
|
20
21
|
class CharSet
|
21
22
|
def initialize(*charss)
|
22
23
|
clear
|
@@ -31,9 +32,9 @@ class CharSet
|
|
31
32
|
|
32
33
|
def add(chars)
|
33
34
|
case chars
|
34
|
-
when String
|
35
|
+
when ::String
|
35
36
|
chars.each_byte {|c| @bitset |= (1<<c) }
|
36
|
-
when Fixnum then @bitset |= (1<<chars)
|
37
|
+
when ::Fixnum then @bitset |= (1<<chars)
|
37
38
|
else chars.each {|c| @bitset |= (1<<c) }
|
38
39
|
end
|
39
40
|
end
|
@@ -72,5 +73,5 @@ class CharSet
|
|
72
73
|
return result
|
73
74
|
end
|
74
75
|
end
|
75
|
-
|
76
|
+
end
|
76
77
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
class RubyLexer
|
1
2
|
module NestedContexts
|
2
3
|
class NestedContext
|
3
4
|
attr :starter
|
@@ -13,7 +14,8 @@ module NestedContexts
|
|
13
14
|
@ender==tok
|
14
15
|
end
|
15
16
|
|
16
|
-
def see
|
17
|
+
def see lxr,msg; end
|
18
|
+
def lhs=*x; end #do nothing
|
17
19
|
end
|
18
20
|
|
19
21
|
class ListContext < NestedContext
|
@@ -30,6 +32,7 @@ module NestedContexts
|
|
30
32
|
def initialize(linenum)
|
31
33
|
super('(', ')' ,linenum)
|
32
34
|
end
|
35
|
+
attr_accessor :lhs
|
33
36
|
end
|
34
37
|
|
35
38
|
class BlockContext < NestedContext
|
@@ -38,16 +41,31 @@ module NestedContexts
|
|
38
41
|
end
|
39
42
|
end
|
40
43
|
|
41
|
-
class BlockParamListContext < ListContext
|
42
|
-
def initialize(linenum)
|
43
|
-
super('|','|',linenum)
|
44
|
-
end
|
45
|
-
end
|
44
|
+
# class BlockParamListContext < ListContext
|
45
|
+
# def initialize(linenum)
|
46
|
+
# super('|','|',linenum)
|
47
|
+
# end
|
48
|
+
# end
|
46
49
|
|
47
50
|
class ParamListContext < ListContext
|
48
51
|
def initialize(linenum)
|
49
52
|
super('(', ')',linenum)
|
50
53
|
end
|
54
|
+
def lhs; false end
|
55
|
+
end
|
56
|
+
|
57
|
+
class ImplicitLhsContext < NestedContext
|
58
|
+
def initialize(linenum)
|
59
|
+
@linenum=linenum
|
60
|
+
end
|
61
|
+
def lhs; true end
|
62
|
+
def starter; nil end
|
63
|
+
def ender; '=' end
|
64
|
+
end
|
65
|
+
|
66
|
+
class BlockParamListLhsContext < ImplicitLhsContext
|
67
|
+
def starter; '|' end
|
68
|
+
def ender; '|' end
|
51
69
|
end
|
52
70
|
|
53
71
|
class ImplicitContext < ListContext
|
@@ -55,19 +73,26 @@ module NestedContexts
|
|
55
73
|
|
56
74
|
class ParamListContextNoParen < ImplicitContext
|
57
75
|
def initialize(linenum)
|
58
|
-
|
76
|
+
super(nil,nil,linenum)
|
59
77
|
end
|
78
|
+
def lhs; false end
|
60
79
|
end
|
61
80
|
|
62
|
-
class
|
81
|
+
class WhenParamListContext < ImplicitContext
|
63
82
|
def initialize(starter,linenum)
|
64
|
-
|
83
|
+
super(starter,nil,linenum)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class AssignmentContext < NestedContext
|
88
|
+
def initialize(linenum)
|
89
|
+
super("assignment context", "=",linenum)
|
65
90
|
end
|
66
91
|
end
|
67
92
|
|
68
93
|
class AssignmentRhsContext < ImplicitContext
|
69
94
|
def initialize(linenum)
|
70
|
-
|
95
|
+
super(nil,nil,linenum)
|
71
96
|
end
|
72
97
|
end
|
73
98
|
|
@@ -76,8 +101,8 @@ module NestedContexts
|
|
76
101
|
super(starter,'end',linenum)
|
77
102
|
end
|
78
103
|
|
79
|
-
def see
|
80
|
-
msg==:rescue ?
|
104
|
+
def see lxr,msg
|
105
|
+
msg==:rescue ? lxr.parsestack.push_rescue_sm : super
|
81
106
|
end
|
82
107
|
end
|
83
108
|
|
@@ -115,7 +140,8 @@ module NestedContexts
|
|
115
140
|
@state=:rescue
|
116
141
|
end
|
117
142
|
|
118
|
-
def see(
|
143
|
+
def see(lxr,msg)
|
144
|
+
stack=lxr.parsestack
|
119
145
|
case msg
|
120
146
|
when :rescue:
|
121
147
|
WantsEndContext===stack.last or
|
@@ -125,7 +151,7 @@ module NestedContexts
|
|
125
151
|
when :arrow: #local var defined in this state
|
126
152
|
when :then,:semi,:colon:
|
127
153
|
msg=:then
|
128
|
-
|
154
|
+
self.equal? stack.pop or raise 'syntax error: then not expected at this time'
|
129
155
|
#pop self off owning context stack
|
130
156
|
else super
|
131
157
|
end
|
@@ -149,16 +175,19 @@ module NestedContexts
|
|
149
175
|
@state=:for
|
150
176
|
end
|
151
177
|
|
152
|
-
def see(
|
178
|
+
def see(lxr,msg)
|
179
|
+
stack=lxr.parsestack
|
180
|
+
assert msg!=:for
|
153
181
|
case msg
|
154
182
|
when :for: WantsEndContext===stack.last or raise 'syntax error: for not expected at this time'
|
155
183
|
#local var defined in this state
|
156
|
-
|
184
|
+
#never actually used?
|
185
|
+
when :in: self.equal? stack.pop or raise 'syntax error: in not expected at this time'
|
157
186
|
stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
|
158
187
|
#pop self off owning context stack and push ExpectDoOrNlContext
|
159
188
|
else super
|
160
189
|
end
|
161
|
-
LEGAL_SUCCESSORS[@state] == msg or raise
|
190
|
+
LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
|
162
191
|
@state=msg
|
163
192
|
end
|
164
193
|
end
|
@@ -171,4 +200,5 @@ module NestedContexts
|
|
171
200
|
dflt_initialize('?',':',linenum)
|
172
201
|
end
|
173
202
|
end
|
174
|
-
end
|
203
|
+
end
|
204
|
+
end
|
@@ -19,8 +19,10 @@
|
|
19
19
|
|
20
20
|
|
21
21
|
|
22
|
-
require "token.rb"
|
23
|
-
require "tokenprinter.rb"
|
22
|
+
#require "token.rb"
|
23
|
+
#require "tokenprinter.rb"
|
24
|
+
|
25
|
+
class RubyLexer
|
24
26
|
|
25
27
|
class RubyCode < Token
|
26
28
|
def initialize(tokens,filename,linenum)
|
@@ -40,5 +42,5 @@ class RubyCode < Token
|
|
40
42
|
return result.to_s
|
41
43
|
end
|
42
44
|
end
|
43
|
-
|
45
|
+
end
|
44
46
|
|
@@ -20,14 +20,27 @@
|
|
20
20
|
|
21
21
|
|
22
22
|
require "assert"
|
23
|
-
require "charhandler"
|
23
|
+
#require "charhandler"
|
24
24
|
#require "term"
|
25
|
-
require "rubycode"
|
26
|
-
require "io.each_til_charset"
|
27
|
-
|
25
|
+
#require "rubycode"
|
26
|
+
#require "io.each_til_charset"
|
27
|
+
#begin
|
28
|
+
require 'rubygems'
|
29
|
+
#rescue Exception:
|
30
|
+
#end
|
31
|
+
#require 'sequence'
|
32
|
+
require 'sequence/indexed'
|
33
|
+
require 'sequence/file'
|
34
|
+
#-----------------------------------
|
35
|
+
assert !defined? ::RubyLexer
|
36
|
+
$RuLexer=Class.new{}
|
37
|
+
class RubyLexer < $RuLexer
|
38
|
+
RuLexer=$RuLexer
|
39
|
+
end
|
40
|
+
$RuLexer=nil
|
28
41
|
#------------------------------------
|
29
|
-
class
|
30
|
-
|
42
|
+
class RubyLexer
|
43
|
+
class RuLexer
|
31
44
|
WHSP=" \t\r\v\f"
|
32
45
|
WHSPLF=WHSP+"\n"
|
33
46
|
#maybe \r should be in WHSPLF instead
|
@@ -41,25 +54,28 @@ class RuLexer
|
|
41
54
|
#-----------------------------------
|
42
55
|
def initialize(filename, file, line)
|
43
56
|
@filename=filename
|
44
|
-
|
45
|
-
file.
|
46
|
-
|
57
|
+
|
58
|
+
# String===file && file=IOext::FakeFile.new(file)
|
59
|
+
file.binmode if File===file
|
60
|
+
@original_file=file
|
61
|
+
@file=file.to_sequence
|
47
62
|
@linenum=line
|
48
63
|
@toptable=nil #descendants must fill this out
|
49
|
-
@moretokens=[ FileAndLineToken.new(@filename, @linenum,
|
64
|
+
@moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
|
50
65
|
@last_operative_token=nil
|
66
|
+
@endsets={}
|
51
67
|
end
|
52
68
|
|
53
69
|
#-----------------------------------
|
54
70
|
def endoffile_detected s=''
|
55
|
-
EoiToken.new(s,@
|
71
|
+
EoiToken.new(s,@original_file, input_position-s.size)
|
56
72
|
end
|
57
73
|
|
58
74
|
#-----------------------------------
|
59
75
|
def get1token
|
60
76
|
@moretokens.empty? or return @moretokens.shift
|
61
77
|
|
62
|
-
if
|
78
|
+
if eof?
|
63
79
|
#@moretokens<<nil
|
64
80
|
return endoffile_detected()
|
65
81
|
end
|
@@ -73,6 +89,14 @@ class RuLexer
|
|
73
89
|
return true
|
74
90
|
end
|
75
91
|
|
92
|
+
#-----------------------------------
|
93
|
+
def each
|
94
|
+
begin yield tok = get1token
|
95
|
+
end until tok.is_a? EoiToken
|
96
|
+
end
|
97
|
+
|
98
|
+
include Enumerable
|
99
|
+
|
76
100
|
private
|
77
101
|
#-----------------------------------
|
78
102
|
def lexerror_errortoken(tok,str,file=@filename,line=@linenum)
|
@@ -99,16 +123,16 @@ private
|
|
99
123
|
def regex(ch=nil)
|
100
124
|
result=RenderExactlyStringToken.new('/').
|
101
125
|
append_token double_quote("/")
|
102
|
-
|
126
|
+
result.line=@linenum
|
103
127
|
return result
|
104
128
|
end
|
105
129
|
|
106
130
|
#-----------------------------------
|
107
|
-
def single_char_token(str)
|
131
|
+
def single_char_token(str) getchar end
|
108
132
|
|
109
133
|
#-----------------------------------
|
110
134
|
def illegal_char(ch)
|
111
|
-
pos
|
135
|
+
pos= input_position
|
112
136
|
LEGALCHARS===ch and return( lexerror WsToken.new(getchar,pos), "legal (?!) bad char (code: #{ch[0]})" )
|
113
137
|
lexerror WsToken.new(til_charset(LEGALCHARS),pos), "bad char (code: #{ch[0]})"
|
114
138
|
end
|
@@ -116,7 +140,7 @@ private
|
|
116
140
|
#-----------------------------------
|
117
141
|
def fancy_quote (ch)
|
118
142
|
assert ch=='%'
|
119
|
-
oldpos
|
143
|
+
oldpos= input_position
|
120
144
|
eat_next_if(ch) or raise "fancy_quote, no "+ch
|
121
145
|
|
122
146
|
ch=getchar
|
@@ -133,7 +157,9 @@ private
|
|
133
157
|
when /^[a-z0-9]$/oi
|
134
158
|
error= "unrecognized %string type: "+ch; '"'
|
135
159
|
when ''
|
136
|
-
|
160
|
+
result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
|
161
|
+
result.line=@linenum
|
162
|
+
return result
|
137
163
|
else back1char; '"' #no letter means string too
|
138
164
|
end
|
139
165
|
|
@@ -144,8 +170,10 @@ private
|
|
144
170
|
|
145
171
|
result=double_quote(beg, type, (PAIRS[beg] or beg))
|
146
172
|
case ch
|
147
|
-
when /^[Wwr]
|
148
|
-
|
173
|
+
when /^[Wwr]$/;
|
174
|
+
result=RenderExactlyStringToken.new(type).append_token(result)
|
175
|
+
result.line=@linenum
|
176
|
+
when 's'; result=SymbolToken.new(result.to_s)
|
149
177
|
end
|
150
178
|
result.offset=oldpos
|
151
179
|
return lexerror(result,error)
|
@@ -159,11 +187,15 @@ private
|
|
159
187
|
|
160
188
|
#-----------------------------------
|
161
189
|
def all_quote(nester, type, delimiter, bs_handler=nil)
|
190
|
+
endset="\r\n\\\\"
|
191
|
+
|
162
192
|
#string must start with nester
|
163
193
|
if nester==INET_NL_REX
|
164
194
|
readnl
|
165
195
|
else
|
166
|
-
|
196
|
+
endset<< "\\"+nester
|
197
|
+
endset<< "\\"+delimiter if nester!=delimiter
|
198
|
+
eat_next_if(nester[0])
|
167
199
|
end or return nil
|
168
200
|
|
169
201
|
bs_handler ||= case type
|
@@ -177,9 +209,12 @@ private
|
|
177
209
|
str=StringToken.new type
|
178
210
|
old_linenum=@linenum
|
179
211
|
nestlevel=1
|
180
|
-
maybe_crunch= "'["[type]
|
181
|
-
|
182
|
-
|
212
|
+
endset<<maybe_crunch="#" unless "'["[type]
|
213
|
+
endset=
|
214
|
+
@endsets[endset] ||= /[#{endset}]/
|
215
|
+
loop{
|
216
|
+
str.append(til_charset( endset ))
|
217
|
+
b=getchar
|
183
218
|
if /^[\r\n]$/===b
|
184
219
|
back1char
|
185
220
|
b=readnl
|
@@ -189,8 +224,9 @@ private
|
|
189
224
|
if (nestlevel-=1)==0
|
190
225
|
str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
|
191
226
|
#emit eol marker later if line has changed
|
227
|
+
str.line=@linenum
|
192
228
|
@linenum != old_linenum and @moretokens <<
|
193
|
-
FileAndLineToken.new(@filename,@linenum
|
229
|
+
FileAndLineToken.new(@filename,@linenum, input_position)
|
194
230
|
return str
|
195
231
|
end
|
196
232
|
when nester
|
@@ -198,17 +234,25 @@ private
|
|
198
234
|
assert(nester!=delimiter)
|
199
235
|
nestlevel+=1
|
200
236
|
when "\\"
|
237
|
+
begin
|
201
238
|
b= send(bs_handler,'\\',nester,delimiter)
|
239
|
+
rescue e
|
240
|
+
lexerror str, e.message
|
241
|
+
end
|
202
242
|
when nil then raise "nil char from each_byte?" #never happens
|
203
243
|
when maybe_crunch
|
204
244
|
nc=nextchar.chr
|
205
245
|
nc[/^[{@$]$/] and b=ruby_code(nc)
|
246
|
+
when "" #eof
|
247
|
+
lexerror str, "unterminated #{delimiter}-string at eof"
|
248
|
+
break
|
206
249
|
end
|
207
250
|
str.append b
|
208
251
|
}
|
209
252
|
|
210
|
-
assert
|
211
|
-
|
253
|
+
assert eof?
|
254
|
+
str.line=@linenum
|
255
|
+
str
|
212
256
|
end
|
213
257
|
|
214
258
|
#-----------------------------------
|
@@ -216,7 +260,7 @@ private
|
|
216
260
|
ESCAPESEQS="\a\b\e\f\n\r\s\t\v"
|
217
261
|
def dquote_esc_seq(ch,nester=nil,delimiter=nil)
|
218
262
|
assert ch == '\\'
|
219
|
-
#see
|
263
|
+
#see pickaxe (1st ed), p 205 for documentation of escape sequences
|
220
264
|
return case k=getchar
|
221
265
|
when "\n" then @linenum+=1; ""
|
222
266
|
when "\\" then "\\"
|
@@ -236,11 +280,11 @@ private
|
|
236
280
|
when "v" then "\v"
|
237
281
|
=end
|
238
282
|
when "M"
|
239
|
-
eat_next_if(?-) or
|
283
|
+
eat_next_if(?-) or raise 'bad \\M sequence'
|
240
284
|
(getchar_maybe_escape | 0x80).chr
|
241
285
|
|
242
286
|
when "C"
|
243
|
-
eat_next_if(?-) or
|
287
|
+
eat_next_if(?-) or raise 'bad \\C sequence'
|
244
288
|
(getchar_maybe_escape & 0x9F).chr
|
245
289
|
|
246
290
|
when "c"
|
@@ -249,16 +293,16 @@ private
|
|
249
293
|
when /^[0-7]$/
|
250
294
|
str=k
|
251
295
|
while str.length < 3
|
252
|
-
str << (eat_next_if(
|
296
|
+
str << (eat_next_if(/[0-7]/) or break)
|
253
297
|
end
|
254
298
|
(str.oct&0xFF).chr
|
255
299
|
|
256
300
|
when "x"
|
257
301
|
str=''
|
258
302
|
while str.length < 2
|
259
|
-
str << (eat_next_if(
|
303
|
+
str << (eat_next_if(/[0-9A-F]/i) or break)
|
260
304
|
end
|
261
|
-
str=='' and
|
305
|
+
str=='' and raise "bad \\x sequence"
|
262
306
|
str.hex.chr
|
263
307
|
|
264
308
|
else
|
@@ -281,7 +325,7 @@ private
|
|
281
325
|
#when "M","C"
|
282
326
|
# eat_next_if(?-) or
|
283
327
|
# lexerror "illegal \\#{c}- esc sequence"
|
284
|
-
# ch + c + '-' + (eat_next_if(
|
328
|
+
# ch + c + '-' + (eat_next_if(/[^\\]/)or'')
|
285
329
|
# #if this \M- or \C- sequence is continued by
|
286
330
|
# #another backslash, we'll just leave the
|
287
331
|
# #backslash on the input, to be read by the next pass
|
@@ -332,7 +376,11 @@ private
|
|
332
376
|
klass= RubyLexer===self ? self.class : RubyLexer
|
333
377
|
rl=klass.new(@filename,@file,@linenum)
|
334
378
|
|
335
|
-
|
379
|
+
#pass current local vars into new parser
|
380
|
+
localvars.names.each{|varname|
|
381
|
+
rl.localvars[varname]=true
|
382
|
+
}
|
383
|
+
rl.localvars.start_block
|
336
384
|
|
337
385
|
case ch
|
338
386
|
when '@'
|
@@ -363,6 +411,10 @@ private
|
|
363
411
|
rl.no_more? or
|
364
412
|
raise 'uh-oh, ruby tokens were lexed past end of ruby code'
|
365
413
|
|
414
|
+
#local vars defined in inclusion get propagated to outer parser
|
415
|
+
newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
|
416
|
+
newvars.each{|newvar| localvars[newvar]=true }
|
417
|
+
|
366
418
|
result=RubyCode.new(tokens,@filename,@linenum)
|
367
419
|
@linenum=rl.linenum
|
368
420
|
return result
|
@@ -375,10 +427,14 @@ private
|
|
375
427
|
|
376
428
|
|
377
429
|
#-----------------------------------
|
378
|
-
BINCHARS=?0..?1
|
379
|
-
OCTCHARS=?0..?7
|
380
|
-
DECCHARS=?0..?9
|
381
|
-
HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
|
430
|
+
# BINCHARS=?0..?1
|
431
|
+
# OCTCHARS=?0..?7
|
432
|
+
# DECCHARS=?0..?9
|
433
|
+
# HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
|
434
|
+
BINCHARS=/[^01_]/
|
435
|
+
OCTCHARS=/[^0-7_]/
|
436
|
+
DECCHARS=/[^0-9_]/
|
437
|
+
HEXCHARS=/[^0-9a-f_]/i
|
382
438
|
#0-9
|
383
439
|
#-----------------------------------
|
384
440
|
def number(str)
|
@@ -386,83 +442,85 @@ private
|
|
386
442
|
return nil unless /^[0-9+\-]$/===str
|
387
443
|
|
388
444
|
interp=:to_i
|
389
|
-
str= (eat_next_if(
|
390
|
-
str<< (eat_next_if(
|
445
|
+
str= (eat_next_if(/[+\-]/)or'')
|
446
|
+
str<< (eat_next_if(?0)or'')
|
391
447
|
|
392
|
-
if str[-1] == ?0 and nextchar
|
393
|
-
typechar=eat_next_if(
|
448
|
+
if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
|
449
|
+
typechar=eat_next_if(/[BOXD]/i)||'o'
|
394
450
|
str << typechar
|
395
451
|
interp=:oct
|
396
|
-
|
397
|
-
when 'b','B'
|
398
|
-
when 'x','X'
|
399
|
-
when 'o','O'
|
452
|
+
unallowed=case typechar
|
453
|
+
when 'b','B'; BINCHARS
|
454
|
+
when 'x','X'; HEXCHARS
|
455
|
+
when 'o','O'; OCTCHARS
|
456
|
+
when 'd','D'; interp=:to_i; DECCHARS
|
400
457
|
else raise :impossible
|
401
458
|
end
|
402
459
|
else
|
403
460
|
interp=:to_i
|
404
|
-
|
461
|
+
unallowed =DECCHARS
|
405
462
|
end
|
406
463
|
|
407
|
-
addl_dig_seqs= (
|
464
|
+
addl_dig_seqs= (typechar)? 0 : 2 #den 210
|
408
465
|
error=nil
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
466
|
+
|
467
|
+
# @file.each_byte { |b|
|
468
|
+
# if unallowed === b or ?_ == b
|
469
|
+
# str << b
|
470
|
+
# else
|
471
|
+
str<<til_charset(unallowed)
|
472
|
+
b=getc
|
413
473
|
#digits must follow and precede . and e
|
414
|
-
if ?.==b and addl_dig_seqs==2 and
|
415
|
-
addl_dig_seqs=1
|
474
|
+
if ?.==b and addl_dig_seqs==2 and !(unallowed===nextchar.chr)
|
475
|
+
#addl_dig_seqs=1
|
416
476
|
str << b
|
477
|
+
str<<til_charset(unallowed)
|
478
|
+
b=getc
|
479
|
+
interp=:to_s
|
480
|
+
end
|
417
481
|
#digits must follow and precede . and e
|
418
|
-
|
482
|
+
if (?e==b or ?E==b) and addl_dig_seqs>=1 and
|
419
483
|
readahead(2)[/^[-+]?[0-9]/]
|
420
|
-
addl_dig_seqs=0
|
484
|
+
#addl_dig_seqs=0
|
421
485
|
str << b
|
422
486
|
str << (eat_next_if(/[+\-]/)or'')
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
break
|
487
|
+
str<<til_charset(unallowed)
|
488
|
+
b=getc
|
489
|
+
interp=:to_s
|
427
490
|
end
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
491
|
+
back1char if b
|
492
|
+
#return(str.send(interp))
|
493
|
+
# break
|
494
|
+
# #OCTCHARS allowed here to permit constants like this: 01.2
|
495
|
+
# unallowed == DECCHARS or unallowed == OCTCHARS or error= "floats are always decimal (currently)"
|
496
|
+
# unallowed = DECCHARS
|
497
|
+
# interp=:to_s
|
498
|
+
# end
|
499
|
+
# }
|
434
500
|
|
435
501
|
assert(str[/[0-9]/])
|
436
502
|
lexerror NumberToken.new(str.send(interp)), error
|
437
503
|
end
|
438
504
|
|
505
|
+
if (defined? DEBUGGER__ or defined? Debugger)
|
439
506
|
#-----------------------------------
|
440
507
|
def comment(str=nil)
|
441
508
|
#assert str == '#'
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
@file.each_byte {|b|
|
447
|
-
if b==?\n #leave \n's on input for newline to eat
|
448
|
-
back1char
|
449
|
-
else
|
450
|
-
str << b
|
451
|
-
end
|
452
|
-
return IgnoreToken.new(str) if b==?\n or b==?#
|
453
|
-
}
|
454
|
-
#eof...
|
509
|
+
Process.kill("INT",0) if readahead(11)==%/#breakpoint/
|
510
|
+
|
511
|
+
IgnoreToken.new(til_charset(/[\r\n]/))
|
512
|
+
end
|
455
513
|
else
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
return IgnoreToken.new(str)
|
514
|
+
#-----------------------------------
|
515
|
+
def comment(str=nil)
|
516
|
+
IgnoreToken.new(til_charset(/[\r\n]/))
|
460
517
|
end
|
518
|
+
end
|
461
519
|
|
462
520
|
#-----------------------------------
|
463
521
|
def whitespace(ch)
|
464
522
|
assert ch[/^[#{WHSP}]$/o]
|
465
|
-
oldpos
|
523
|
+
oldpos= input_position
|
466
524
|
str=til_charset(/[^#{WHSP}]/o)
|
467
525
|
return WsToken.new(str,oldpos)
|
468
526
|
end
|
@@ -471,26 +529,29 @@ end
|
|
471
529
|
INET_NL_REX=/^(\r\n?|\n\r?)/
|
472
530
|
def readnl
|
473
531
|
#compatible with dos/mac style newlines...
|
532
|
+
|
533
|
+
eof? and return ''
|
534
|
+
|
474
535
|
nl=readahead(2)[INET_NL_REX]
|
475
536
|
nl or return nil
|
476
537
|
assert((1..2)===nl.length)
|
477
538
|
@linenum+=1
|
478
|
-
|
539
|
+
read nl.length
|
479
540
|
end
|
480
541
|
|
481
542
|
#-----------------------------------
|
482
543
|
def newline(ch)
|
483
|
-
offset
|
544
|
+
offset= input_position
|
484
545
|
nl=readnl
|
485
|
-
@moretokens << FileAndLineToken.new( @filename, @linenum,
|
546
|
+
@moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
|
486
547
|
return NewlineToken.new( nl,offset)
|
487
548
|
end
|
488
549
|
|
489
550
|
|
490
551
|
#-----------------------------------
|
491
552
|
def getchar_maybe_escape
|
492
|
-
|
493
|
-
c
|
553
|
+
eof? and raise "unterminated dq string"
|
554
|
+
c=getc
|
494
555
|
|
495
556
|
c == ?\\ and
|
496
557
|
(c = (dquote_esc_seq('\\')[-1] or ?\n))
|
@@ -498,26 +559,41 @@ end
|
|
498
559
|
return c
|
499
560
|
end
|
500
561
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
562
|
+
protected
|
563
|
+
# delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
|
564
|
+
require 'forwardable'
|
565
|
+
extend Forwardable
|
566
|
+
def_delegators :@file, :readahead,:readback, :read, :eof?
|
567
|
+
|
568
|
+
def til_charset cs,len=16; @file.read_til_charset cs,len end
|
569
|
+
def getc; @file.read1 end
|
570
|
+
def getchar; @file.read 1 end
|
571
|
+
def back1char; @file.move( -1 )end
|
572
|
+
def prevchar; @file.readbehind 1 end
|
573
|
+
def nextchar; @file.readahead1 end
|
574
|
+
def eat_next_if(ch);
|
575
|
+
saw=getchar
|
576
|
+
if Integer===ch
|
577
|
+
ch==saw[0]
|
578
|
+
else
|
579
|
+
ch===saw
|
580
|
+
end or (back1char; return)
|
581
|
+
return saw
|
509
582
|
end
|
510
583
|
|
511
|
-
|
512
|
-
|
584
|
+
#-----------------------------------
|
585
|
+
def input_position; @file.pos end
|
586
|
+
|
587
|
+
#-----------------------------------
|
588
|
+
def input_position_set x; @file.pos=x end
|
513
589
|
|
514
590
|
#-----------------------------------
|
515
|
-
def
|
591
|
+
def self.save_offsets_in(*funcnames)
|
516
592
|
eval funcnames.collect{|fn| <<-endeval }.to_s
|
517
593
|
class ::#{self}
|
518
594
|
alias #{fn}__no_offset #{fn} #rename old ver of fn
|
519
595
|
def #{fn}(*args) #create new version
|
520
|
-
pos
|
596
|
+
pos= input_position
|
521
597
|
result=#{fn}__no_offset(*args)
|
522
598
|
assert Token===result
|
523
599
|
result.offset||=pos
|
@@ -527,6 +603,8 @@ protected
|
|
527
603
|
endeval
|
528
604
|
end
|
529
605
|
|
530
|
-
|
606
|
+
end
|
531
607
|
|
532
608
|
end
|
609
|
+
|
610
|
+
|