rubylexer 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
@@ -17,7 +17,7 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
|
20
|
+
class RubyLexer
|
21
21
|
class SymbolTable
|
22
22
|
def initialize
|
23
23
|
#note: below Stack means Array (used as a stack)
|
@@ -42,6 +42,14 @@ class SymbolTable
|
|
42
42
|
assert @locals_lists.last
|
43
43
|
end
|
44
44
|
|
45
|
+
def names
|
46
|
+
@symbols.keys
|
47
|
+
end
|
48
|
+
|
49
|
+
def __locals_lists
|
50
|
+
@locals_lists
|
51
|
+
end
|
52
|
+
|
45
53
|
def [](name)
|
46
54
|
assert @locals_lists.last
|
47
55
|
(stack=@symbols[name]) and stack.last
|
@@ -63,3 +71,4 @@ class SymbolTable
|
|
63
71
|
return val
|
64
72
|
end
|
65
73
|
end
|
74
|
+
end
|
@@ -17,8 +17,9 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
require "rubycode"
|
21
20
|
|
21
|
+
|
22
|
+
class RubyLexer
|
22
23
|
#-------------------------
|
23
24
|
class Token
|
24
25
|
attr_accessor :ident
|
@@ -31,6 +32,8 @@ class Token
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def error; end
|
35
|
+
|
36
|
+
def has_no_block?; false end
|
34
37
|
end
|
35
38
|
|
36
39
|
#-------------------------
|
@@ -45,22 +48,33 @@ end
|
|
45
48
|
class KeywordToken < WToken #also some operators
|
46
49
|
|
47
50
|
#-----------------------------------
|
48
|
-
def set_callsite!
|
51
|
+
def set_callsite! #not needed
|
49
52
|
@callsite=true
|
50
53
|
end
|
51
54
|
|
52
55
|
#-----------------------------------
|
53
|
-
def callsite?
|
56
|
+
def callsite? #not used
|
54
57
|
@callsite ||= nil
|
55
58
|
end
|
56
59
|
|
60
|
+
|
61
|
+
#-----------------------------------
|
62
|
+
def set_infix!
|
63
|
+
@infix=true
|
64
|
+
end
|
65
|
+
|
66
|
+
#-----------------------------------
|
67
|
+
def infix?
|
68
|
+
@infix ||= nil
|
69
|
+
end
|
70
|
+
def prefix?; !infix? end
|
71
|
+
|
57
72
|
#-----------------------------------
|
58
73
|
def has_end!
|
59
74
|
assert self===RubyLexer::BEGINWORDS
|
60
75
|
@has_end=true
|
61
76
|
end
|
62
77
|
|
63
|
-
|
64
78
|
#-----------------------------------
|
65
79
|
def has_end?
|
66
80
|
self===RubyLexer::BEGINWORDS and @has_end||=nil
|
@@ -69,6 +83,9 @@ end
|
|
69
83
|
|
70
84
|
#-------------------------
|
71
85
|
class OperatorToken < WToken
|
86
|
+
attr_accessor :unary
|
87
|
+
alias prefix? unary
|
88
|
+
def infix?; !prefix? end
|
72
89
|
end
|
73
90
|
|
74
91
|
|
@@ -87,8 +104,8 @@ module TokenPat
|
|
87
104
|
end
|
88
105
|
end
|
89
106
|
|
90
|
-
class String; include TokenPat; end
|
91
|
-
class Regexp; include TokenPat; end
|
107
|
+
class ::String; include TokenPat; end
|
108
|
+
class ::Regexp; include TokenPat; end
|
92
109
|
|
93
110
|
#-------------------------
|
94
111
|
class VarNameToken < WToken
|
@@ -112,6 +129,7 @@ class MethNameToken < Token # < SymbolToken
|
|
112
129
|
def initialize(ident,offset=nil)
|
113
130
|
@ident= (VarNameToken===ident)? ident.ident : ident
|
114
131
|
@offset=offset
|
132
|
+
@has_no_block=false
|
115
133
|
# @char=''
|
116
134
|
end
|
117
135
|
|
@@ -121,6 +139,14 @@ class MethNameToken < Token # < SymbolToken
|
|
121
139
|
def ===(pattern)
|
122
140
|
pattern===@ident
|
123
141
|
end
|
142
|
+
|
143
|
+
def has_no_block!
|
144
|
+
@has_no_block=true
|
145
|
+
end
|
146
|
+
|
147
|
+
def has_no_block?
|
148
|
+
@has_no_block
|
149
|
+
end
|
124
150
|
end
|
125
151
|
|
126
152
|
#-------------------------
|
@@ -137,14 +163,21 @@ class StringToken < Token
|
|
137
163
|
|
138
164
|
attr_accessor :modifiers #for regex only
|
139
165
|
attr_accessor :elems
|
166
|
+
attr_accessor :line #line on which the string ENDS
|
167
|
+
|
168
|
+
def with_line(line)
|
169
|
+
@line=line
|
170
|
+
self
|
171
|
+
end
|
140
172
|
|
141
173
|
def initialize(type='"',ident='')
|
142
174
|
super(ident)
|
143
175
|
type=="'" and type='"'
|
144
176
|
@char=type
|
145
|
-
assert
|
177
|
+
assert @char[/^[\[{"`\/]$/] #"
|
146
178
|
@elems=[ident.dup] #why .dup?
|
147
179
|
@modifiers=nil
|
180
|
+
@line=nil
|
148
181
|
end
|
149
182
|
|
150
183
|
DQUOTE_ESCAPE_TABLE = [
|
@@ -161,7 +194,7 @@ class StringToken < Token
|
|
161
194
|
SUFFIXERS={ '['=>"]", '{'=>'}' }
|
162
195
|
|
163
196
|
def to_s(transname=:transform)
|
164
|
-
assert
|
197
|
+
assert @char[/[\[{"`\/]/] #"
|
165
198
|
#on output, all single-quoted strings become double-quoted
|
166
199
|
assert(@elems.length==1) if @char=='['
|
167
200
|
|
@@ -274,7 +307,7 @@ end
|
|
274
307
|
|
275
308
|
#-------------------------
|
276
309
|
class HerePlaceholderToken < WToken
|
277
|
-
attr_reader :termex, :quote, :ender
|
310
|
+
attr_reader :termex, :quote, :ender, :dash
|
278
311
|
attr_accessor :unsafe_to_use, :string
|
279
312
|
attr_accessor :bodyclass
|
280
313
|
|
@@ -292,14 +325,15 @@ class HerePlaceholderToken < WToken
|
|
292
325
|
def ===(bogus); false end
|
293
326
|
|
294
327
|
def to_s
|
295
|
-
if
|
296
|
-
result=
|
297
|
-
result << if/[^a-z_0-9]/i===@ender
|
328
|
+
if @bodyclass==OutlinedHereBodyToken
|
329
|
+
result=if/[^a-z_0-9]/i===@ender
|
298
330
|
%["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
|
299
331
|
else
|
300
332
|
@ender
|
301
333
|
end
|
334
|
+
["<<",@quote,@ender,@quote].to_s
|
302
335
|
else
|
336
|
+
assert !unsafe_to_use
|
303
337
|
@string.to_s
|
304
338
|
end
|
305
339
|
end
|
@@ -307,11 +341,22 @@ class HerePlaceholderToken < WToken
|
|
307
341
|
def append s; @string.append s end
|
308
342
|
|
309
343
|
def append_token tok; @string.append_token tok end
|
344
|
+
|
345
|
+
#def with_line(line) @string.line=line; self end
|
346
|
+
|
347
|
+
def line; @string.line end
|
348
|
+
def line=line; @string.line=line end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
#-------------------------
|
353
|
+
module StillIgnoreToken
|
310
354
|
|
311
355
|
end
|
312
356
|
|
313
357
|
#-------------------------
|
314
358
|
class IgnoreToken < Token
|
359
|
+
include StillIgnoreToken
|
315
360
|
end
|
316
361
|
|
317
362
|
#-------------------------
|
@@ -338,15 +383,20 @@ class NoWsToken < ZwToken
|
|
338
383
|
end
|
339
384
|
end
|
340
385
|
|
341
|
-
class ImplicitParamListStartToken <
|
342
|
-
|
343
|
-
|
386
|
+
class ImplicitParamListStartToken < KeywordToken
|
387
|
+
include StillIgnoreToken
|
388
|
+
def initialize(offset)
|
389
|
+
super("(",offset)
|
344
390
|
end
|
391
|
+
def to_s; '' end
|
345
392
|
end
|
346
|
-
|
347
|
-
|
348
|
-
|
393
|
+
|
394
|
+
class ImplicitParamListEndToken < KeywordToken
|
395
|
+
include StillIgnoreToken
|
396
|
+
def initialize(offset)
|
397
|
+
super(")",offset)
|
349
398
|
end
|
399
|
+
def to_s; '' end
|
350
400
|
end
|
351
401
|
|
352
402
|
class AssignmentRhsListStartToken < ZwToken
|
@@ -409,6 +459,7 @@ class HereBodyToken < IgnoreToken
|
|
409
459
|
@headtok=headtok
|
410
460
|
end
|
411
461
|
|
462
|
+
attr :headtok
|
412
463
|
end
|
413
464
|
|
414
465
|
#-------------------------
|
@@ -438,8 +489,7 @@ class OutlinedHereBodyToken < HereBodyToken
|
|
438
489
|
assert HerePlaceholderToken===@headtok
|
439
490
|
result=@headtok.string
|
440
491
|
result=result.to_s(:simple_transform).match(/^"(.*)"$/m)[1]
|
441
|
-
return
|
442
|
-
result +
|
492
|
+
return result +
|
443
493
|
@headtok.ender +
|
444
494
|
"\n"
|
445
495
|
end
|
@@ -482,5 +532,7 @@ class DecoratorToken < SubitemToken
|
|
482
532
|
def value() @subitem end
|
483
533
|
end
|
484
534
|
|
535
|
+
end
|
485
536
|
|
537
|
+
require "rubylexer/rubycode"
|
486
538
|
|
@@ -22,7 +22,7 @@
|
|
22
22
|
require "assert"
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
class RubyLexer
|
26
26
|
|
27
27
|
#-------------------------------
|
28
28
|
class SimpleTokenPrinter
|
@@ -34,7 +34,7 @@ class SimpleTokenPrinter
|
|
34
34
|
TOKENSPERLINE=8
|
35
35
|
TOKENSMAGICMAP="\n"+' '*(TOKENSPERLINE-1)
|
36
36
|
|
37
|
-
def pprint(tok) print(sprint(tok)) end
|
37
|
+
def pprint(tok,output=$stdout) output.print(sprint(tok)) end
|
38
38
|
|
39
39
|
def sprint(tok)
|
40
40
|
case tok
|
@@ -46,14 +46,19 @@ class SimpleTokenPrinter
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
class
|
49
|
+
class EscNlToken; def ws_munge(tp)
|
50
50
|
tp.lasttok=self
|
51
|
-
return
|
51
|
+
return " \\\n"
|
52
52
|
end end
|
53
53
|
class FileAndLineToken; def ws_munge(tp)
|
54
|
+
result=''
|
55
|
+
|
56
|
+
#faugh, doesn't fix it
|
57
|
+
#result= "\\\n"*(line-tp.lastfal.line) if StringToken===tp.lasttok
|
58
|
+
|
54
59
|
tp.lasttok=self
|
55
60
|
tp.lastfal=self
|
56
|
-
return
|
61
|
+
return result
|
57
62
|
end end
|
58
63
|
class Newline; def ws_munge(tp)
|
59
64
|
tp.lasttok=self
|
@@ -62,14 +67,20 @@ end
|
|
62
67
|
class IgnoreToken; def ws_munge(tp)
|
63
68
|
#tp.latestline+= to_s.scan("\n").size
|
64
69
|
tp.lasttok=self
|
65
|
-
unless tp.inws
|
70
|
+
result=unless tp.inws
|
66
71
|
tp.inws=true
|
67
|
-
|
72
|
+
' '
|
73
|
+
else
|
74
|
+
''
|
75
|
+
end
|
76
|
+
if ?= == @ident.to_s[0]
|
77
|
+
result+="\\\n"*@ident.to_s.scan(/\r\n?|\n\r?/).size
|
68
78
|
end
|
69
|
-
|
79
|
+
|
80
|
+
return result
|
70
81
|
end end
|
71
82
|
class OutlinedHereBodyToken; def ws_munge(tp)
|
72
|
-
|
83
|
+
nil
|
73
84
|
end end
|
74
85
|
class ZwToken; def ws_munge(tp)
|
75
86
|
case tp.showzw
|
@@ -100,24 +111,35 @@ class KeepWsTokenPrinter
|
|
100
111
|
@showzw=showzw
|
101
112
|
end
|
102
113
|
|
103
|
-
def pprint(tok)
|
104
|
-
|
114
|
+
def pprint(tok,output=$stdout)
|
105
115
|
@accum<<aprint(tok).to_s
|
106
|
-
if @accum.size>ACCUMSIZE or EoiToken===tok
|
107
|
-
print(@accum)
|
116
|
+
if (@accum.size>ACCUMSIZE and NewlineToken===tok) or EoiToken===tok
|
117
|
+
output.print(@accum)
|
108
118
|
@accum=[]
|
109
119
|
end
|
110
120
|
end
|
111
121
|
|
112
122
|
def aprint(tok)
|
123
|
+
if StringToken===tok or
|
124
|
+
(HerePlaceholderToken===tok and
|
125
|
+
tok.bodyclass!=OutlinedHereBodyToken
|
126
|
+
)
|
127
|
+
str_needs_escnls=(tok.line-@lastfal.line).nonzero?
|
128
|
+
end
|
113
129
|
result=tok.ws_munge(self) and return result
|
114
130
|
|
131
|
+
|
115
132
|
#insert extra ws unless an ambiguous op immediately follows
|
116
133
|
#id or num, in which case ws would change the meaning
|
117
|
-
result=if (ZwToken===tok or NoWsToken===@lasttok)
|
118
|
-
tok
|
134
|
+
result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
|
135
|
+
tok
|
119
136
|
else
|
120
|
-
[@sep.dup,tok
|
137
|
+
[@sep.dup,tok]
|
138
|
+
end
|
139
|
+
|
140
|
+
if str_needs_escnls
|
141
|
+
result=result.to_s
|
142
|
+
result.gsub!(/(["`\/])$/){ "\\\n"*str_needs_escnls+$1 }
|
121
143
|
end
|
122
144
|
|
123
145
|
@lasttok=tok
|
@@ -147,6 +169,7 @@ class KeepWsTokenPrinter
|
|
147
169
|
/^[$@a-zA-Z_]/===@lasttok)) #lasttok is id or num?
|
148
170
|
end
|
149
171
|
end
|
172
|
+
end
|
150
173
|
|
151
174
|
#-------------------------------
|
152
175
|
|
File without changes
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
Gem.manage_gems
|
3
|
+
|
4
|
+
class Gem::SourceInfoCache
|
5
|
+
public :read_cache
|
6
|
+
end
|
7
|
+
|
8
|
+
if ARGV.empty?
|
9
|
+
limit=1.0/0
|
10
|
+
else
|
11
|
+
limit=ARGV.first.to_i
|
12
|
+
end
|
13
|
+
|
14
|
+
gemdir="gems/"
|
15
|
+
|
16
|
+
Dir.mkdir gemdir rescue nil
|
17
|
+
total=0
|
18
|
+
db4=[]
|
19
|
+
db=Gem::SourceInfoCache.new.read_cache
|
20
|
+
db.each_pair{|site,db2|
|
21
|
+
newest={}
|
22
|
+
db3=db2.source_index.instance_variable_get(:@gems)
|
23
|
+
db3.each_pair{|filename,gemdata|
|
24
|
+
version=gemdata.version
|
25
|
+
newest[gemdata.name]=[version,filename,site] unless
|
26
|
+
newest[gemdata.name] and newest[gemdata.name].first>=version
|
27
|
+
}
|
28
|
+
newest.each_pair{|name,triad| triad.shift }
|
29
|
+
db4.push newest
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
db4.each{|hash| hash.each_pair{|filename,(fn,site)|
|
34
|
+
# fn=filename+".gem"
|
35
|
+
next if File.exist? gemdir+fn
|
36
|
+
fn+=".gem"
|
37
|
+
next if File.exist? gemdir+fn
|
38
|
+
url=site+"/gems/"+fn
|
39
|
+
puts url
|
40
|
+
system "wget #{url} -O "+gemdir+fn
|
41
|
+
total+=(4096.0+1.01*File.size(gemdir+fn)) rescue 0
|
42
|
+
exit if total>limit
|
43
|
+
}}
|
@@ -1,17 +1,20 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$Debug=true
|
2
3
|
require 'rubylexer'
|
3
4
|
require 'getoptlong'
|
4
5
|
|
5
|
-
|
6
|
+
#def puts(x) end
|
6
7
|
|
7
8
|
#a Token#inspect that omits the object id
|
9
|
+
class RubyLexer
|
8
10
|
class Token
|
9
|
-
def
|
10
|
-
[
|
11
|
+
def strify
|
12
|
+
[self.class.name[/[^:]+$/],": ",instance_variables.sort.collect{|v|
|
11
13
|
[v,"=",instance_variable_get(v).inspect," "]
|
12
|
-
}].to_s
|
14
|
+
}].to_s
|
13
15
|
end
|
14
16
|
end
|
17
|
+
end
|
15
18
|
|
16
19
|
file=nil
|
17
20
|
|
@@ -28,11 +31,11 @@ file||=if name=ARGV.first
|
|
28
31
|
File.open(name)
|
29
32
|
else
|
30
33
|
name='-'
|
31
|
-
$
|
34
|
+
$stdin
|
32
35
|
end
|
33
36
|
|
34
37
|
lexer=RubyLexer.new(name, file)
|
35
|
-
until EoiToken===(tok=lexer.get1token)
|
36
|
-
|
38
|
+
until RubyLexer::EoiToken===(tok=lexer.get1token)
|
39
|
+
puts tok.strify
|
37
40
|
end
|
38
|
-
|
41
|
+
puts tok.strify #print eoi token
|