rubylexer 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
@@ -17,7 +17,7 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
|
20
|
+
class RubyLexer
|
21
21
|
class SymbolTable
|
22
22
|
def initialize
|
23
23
|
#note: below Stack means Array (used as a stack)
|
@@ -42,6 +42,14 @@ class SymbolTable
|
|
42
42
|
assert @locals_lists.last
|
43
43
|
end
|
44
44
|
|
45
|
+
def names
|
46
|
+
@symbols.keys
|
47
|
+
end
|
48
|
+
|
49
|
+
def __locals_lists
|
50
|
+
@locals_lists
|
51
|
+
end
|
52
|
+
|
45
53
|
def [](name)
|
46
54
|
assert @locals_lists.last
|
47
55
|
(stack=@symbols[name]) and stack.last
|
@@ -63,3 +71,4 @@ class SymbolTable
|
|
63
71
|
return val
|
64
72
|
end
|
65
73
|
end
|
74
|
+
end
|
@@ -17,8 +17,9 @@
|
|
17
17
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
18
|
=end
|
19
19
|
|
20
|
-
require "rubycode"
|
21
20
|
|
21
|
+
|
22
|
+
class RubyLexer
|
22
23
|
#-------------------------
|
23
24
|
class Token
|
24
25
|
attr_accessor :ident
|
@@ -31,6 +32,8 @@ class Token
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def error; end
|
35
|
+
|
36
|
+
def has_no_block?; false end
|
34
37
|
end
|
35
38
|
|
36
39
|
#-------------------------
|
@@ -45,22 +48,33 @@ end
|
|
45
48
|
class KeywordToken < WToken #also some operators
|
46
49
|
|
47
50
|
#-----------------------------------
|
48
|
-
def set_callsite!
|
51
|
+
def set_callsite! #not needed
|
49
52
|
@callsite=true
|
50
53
|
end
|
51
54
|
|
52
55
|
#-----------------------------------
|
53
|
-
def callsite?
|
56
|
+
def callsite? #not used
|
54
57
|
@callsite ||= nil
|
55
58
|
end
|
56
59
|
|
60
|
+
|
61
|
+
#-----------------------------------
|
62
|
+
def set_infix!
|
63
|
+
@infix=true
|
64
|
+
end
|
65
|
+
|
66
|
+
#-----------------------------------
|
67
|
+
def infix?
|
68
|
+
@infix ||= nil
|
69
|
+
end
|
70
|
+
def prefix?; !infix? end
|
71
|
+
|
57
72
|
#-----------------------------------
|
58
73
|
def has_end!
|
59
74
|
assert self===RubyLexer::BEGINWORDS
|
60
75
|
@has_end=true
|
61
76
|
end
|
62
77
|
|
63
|
-
|
64
78
|
#-----------------------------------
|
65
79
|
def has_end?
|
66
80
|
self===RubyLexer::BEGINWORDS and @has_end||=nil
|
@@ -69,6 +83,9 @@ end
|
|
69
83
|
|
70
84
|
#-------------------------
|
71
85
|
class OperatorToken < WToken
|
86
|
+
attr_accessor :unary
|
87
|
+
alias prefix? unary
|
88
|
+
def infix?; !prefix? end
|
72
89
|
end
|
73
90
|
|
74
91
|
|
@@ -87,8 +104,8 @@ module TokenPat
|
|
87
104
|
end
|
88
105
|
end
|
89
106
|
|
90
|
-
class String; include TokenPat; end
|
91
|
-
class Regexp; include TokenPat; end
|
107
|
+
class ::String; include TokenPat; end
|
108
|
+
class ::Regexp; include TokenPat; end
|
92
109
|
|
93
110
|
#-------------------------
|
94
111
|
class VarNameToken < WToken
|
@@ -112,6 +129,7 @@ class MethNameToken < Token # < SymbolToken
|
|
112
129
|
def initialize(ident,offset=nil)
|
113
130
|
@ident= (VarNameToken===ident)? ident.ident : ident
|
114
131
|
@offset=offset
|
132
|
+
@has_no_block=false
|
115
133
|
# @char=''
|
116
134
|
end
|
117
135
|
|
@@ -121,6 +139,14 @@ class MethNameToken < Token # < SymbolToken
|
|
121
139
|
def ===(pattern)
|
122
140
|
pattern===@ident
|
123
141
|
end
|
142
|
+
|
143
|
+
def has_no_block!
|
144
|
+
@has_no_block=true
|
145
|
+
end
|
146
|
+
|
147
|
+
def has_no_block?
|
148
|
+
@has_no_block
|
149
|
+
end
|
124
150
|
end
|
125
151
|
|
126
152
|
#-------------------------
|
@@ -137,14 +163,21 @@ class StringToken < Token
|
|
137
163
|
|
138
164
|
attr_accessor :modifiers #for regex only
|
139
165
|
attr_accessor :elems
|
166
|
+
attr_accessor :line #line on which the string ENDS
|
167
|
+
|
168
|
+
def with_line(line)
|
169
|
+
@line=line
|
170
|
+
self
|
171
|
+
end
|
140
172
|
|
141
173
|
def initialize(type='"',ident='')
|
142
174
|
super(ident)
|
143
175
|
type=="'" and type='"'
|
144
176
|
@char=type
|
145
|
-
assert
|
177
|
+
assert @char[/^[\[{"`\/]$/] #"
|
146
178
|
@elems=[ident.dup] #why .dup?
|
147
179
|
@modifiers=nil
|
180
|
+
@line=nil
|
148
181
|
end
|
149
182
|
|
150
183
|
DQUOTE_ESCAPE_TABLE = [
|
@@ -161,7 +194,7 @@ class StringToken < Token
|
|
161
194
|
SUFFIXERS={ '['=>"]", '{'=>'}' }
|
162
195
|
|
163
196
|
def to_s(transname=:transform)
|
164
|
-
assert
|
197
|
+
assert @char[/[\[{"`\/]/] #"
|
165
198
|
#on output, all single-quoted strings become double-quoted
|
166
199
|
assert(@elems.length==1) if @char=='['
|
167
200
|
|
@@ -274,7 +307,7 @@ end
|
|
274
307
|
|
275
308
|
#-------------------------
|
276
309
|
class HerePlaceholderToken < WToken
|
277
|
-
attr_reader :termex, :quote, :ender
|
310
|
+
attr_reader :termex, :quote, :ender, :dash
|
278
311
|
attr_accessor :unsafe_to_use, :string
|
279
312
|
attr_accessor :bodyclass
|
280
313
|
|
@@ -292,14 +325,15 @@ class HerePlaceholderToken < WToken
|
|
292
325
|
def ===(bogus); false end
|
293
326
|
|
294
327
|
def to_s
|
295
|
-
if
|
296
|
-
result=
|
297
|
-
result << if/[^a-z_0-9]/i===@ender
|
328
|
+
if @bodyclass==OutlinedHereBodyToken
|
329
|
+
result=if/[^a-z_0-9]/i===@ender
|
298
330
|
%["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
|
299
331
|
else
|
300
332
|
@ender
|
301
333
|
end
|
334
|
+
["<<",@quote,@ender,@quote].to_s
|
302
335
|
else
|
336
|
+
assert !unsafe_to_use
|
303
337
|
@string.to_s
|
304
338
|
end
|
305
339
|
end
|
@@ -307,11 +341,22 @@ class HerePlaceholderToken < WToken
|
|
307
341
|
def append s; @string.append s end
|
308
342
|
|
309
343
|
def append_token tok; @string.append_token tok end
|
344
|
+
|
345
|
+
#def with_line(line) @string.line=line; self end
|
346
|
+
|
347
|
+
def line; @string.line end
|
348
|
+
def line=line; @string.line=line end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
#-------------------------
|
353
|
+
module StillIgnoreToken
|
310
354
|
|
311
355
|
end
|
312
356
|
|
313
357
|
#-------------------------
|
314
358
|
class IgnoreToken < Token
|
359
|
+
include StillIgnoreToken
|
315
360
|
end
|
316
361
|
|
317
362
|
#-------------------------
|
@@ -338,15 +383,20 @@ class NoWsToken < ZwToken
|
|
338
383
|
end
|
339
384
|
end
|
340
385
|
|
341
|
-
class ImplicitParamListStartToken <
|
342
|
-
|
343
|
-
|
386
|
+
class ImplicitParamListStartToken < KeywordToken
|
387
|
+
include StillIgnoreToken
|
388
|
+
def initialize(offset)
|
389
|
+
super("(",offset)
|
344
390
|
end
|
391
|
+
def to_s; '' end
|
345
392
|
end
|
346
|
-
|
347
|
-
|
348
|
-
|
393
|
+
|
394
|
+
class ImplicitParamListEndToken < KeywordToken
|
395
|
+
include StillIgnoreToken
|
396
|
+
def initialize(offset)
|
397
|
+
super(")",offset)
|
349
398
|
end
|
399
|
+
def to_s; '' end
|
350
400
|
end
|
351
401
|
|
352
402
|
class AssignmentRhsListStartToken < ZwToken
|
@@ -409,6 +459,7 @@ class HereBodyToken < IgnoreToken
|
|
409
459
|
@headtok=headtok
|
410
460
|
end
|
411
461
|
|
462
|
+
attr :headtok
|
412
463
|
end
|
413
464
|
|
414
465
|
#-------------------------
|
@@ -438,8 +489,7 @@ class OutlinedHereBodyToken < HereBodyToken
|
|
438
489
|
assert HerePlaceholderToken===@headtok
|
439
490
|
result=@headtok.string
|
440
491
|
result=result.to_s(:simple_transform).match(/^"(.*)"$/m)[1]
|
441
|
-
return
|
442
|
-
result +
|
492
|
+
return result +
|
443
493
|
@headtok.ender +
|
444
494
|
"\n"
|
445
495
|
end
|
@@ -482,5 +532,7 @@ class DecoratorToken < SubitemToken
|
|
482
532
|
def value() @subitem end
|
483
533
|
end
|
484
534
|
|
535
|
+
end
|
485
536
|
|
537
|
+
require "rubylexer/rubycode"
|
486
538
|
|
@@ -22,7 +22,7 @@
|
|
22
22
|
require "assert"
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
class RubyLexer
|
26
26
|
|
27
27
|
#-------------------------------
|
28
28
|
class SimpleTokenPrinter
|
@@ -34,7 +34,7 @@ class SimpleTokenPrinter
|
|
34
34
|
TOKENSPERLINE=8
|
35
35
|
TOKENSMAGICMAP="\n"+' '*(TOKENSPERLINE-1)
|
36
36
|
|
37
|
-
def pprint(tok) print(sprint(tok)) end
|
37
|
+
def pprint(tok,output=$stdout) output.print(sprint(tok)) end
|
38
38
|
|
39
39
|
def sprint(tok)
|
40
40
|
case tok
|
@@ -46,14 +46,19 @@ class SimpleTokenPrinter
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
class
|
49
|
+
class EscNlToken; def ws_munge(tp)
|
50
50
|
tp.lasttok=self
|
51
|
-
return
|
51
|
+
return " \\\n"
|
52
52
|
end end
|
53
53
|
class FileAndLineToken; def ws_munge(tp)
|
54
|
+
result=''
|
55
|
+
|
56
|
+
#faugh, doesn't fix it
|
57
|
+
#result= "\\\n"*(line-tp.lastfal.line) if StringToken===tp.lasttok
|
58
|
+
|
54
59
|
tp.lasttok=self
|
55
60
|
tp.lastfal=self
|
56
|
-
return
|
61
|
+
return result
|
57
62
|
end end
|
58
63
|
class Newline; def ws_munge(tp)
|
59
64
|
tp.lasttok=self
|
@@ -62,14 +67,20 @@ end
|
|
62
67
|
class IgnoreToken; def ws_munge(tp)
|
63
68
|
#tp.latestline+= to_s.scan("\n").size
|
64
69
|
tp.lasttok=self
|
65
|
-
unless tp.inws
|
70
|
+
result=unless tp.inws
|
66
71
|
tp.inws=true
|
67
|
-
|
72
|
+
' '
|
73
|
+
else
|
74
|
+
''
|
75
|
+
end
|
76
|
+
if ?= == @ident.to_s[0]
|
77
|
+
result+="\\\n"*@ident.to_s.scan(/\r\n?|\n\r?/).size
|
68
78
|
end
|
69
|
-
|
79
|
+
|
80
|
+
return result
|
70
81
|
end end
|
71
82
|
class OutlinedHereBodyToken; def ws_munge(tp)
|
72
|
-
|
83
|
+
nil
|
73
84
|
end end
|
74
85
|
class ZwToken; def ws_munge(tp)
|
75
86
|
case tp.showzw
|
@@ -100,24 +111,35 @@ class KeepWsTokenPrinter
|
|
100
111
|
@showzw=showzw
|
101
112
|
end
|
102
113
|
|
103
|
-
def pprint(tok)
|
104
|
-
|
114
|
+
def pprint(tok,output=$stdout)
|
105
115
|
@accum<<aprint(tok).to_s
|
106
|
-
if @accum.size>ACCUMSIZE or EoiToken===tok
|
107
|
-
print(@accum)
|
116
|
+
if (@accum.size>ACCUMSIZE and NewlineToken===tok) or EoiToken===tok
|
117
|
+
output.print(@accum)
|
108
118
|
@accum=[]
|
109
119
|
end
|
110
120
|
end
|
111
121
|
|
112
122
|
def aprint(tok)
|
123
|
+
if StringToken===tok or
|
124
|
+
(HerePlaceholderToken===tok and
|
125
|
+
tok.bodyclass!=OutlinedHereBodyToken
|
126
|
+
)
|
127
|
+
str_needs_escnls=(tok.line-@lastfal.line).nonzero?
|
128
|
+
end
|
113
129
|
result=tok.ws_munge(self) and return result
|
114
130
|
|
131
|
+
|
115
132
|
#insert extra ws unless an ambiguous op immediately follows
|
116
133
|
#id or num, in which case ws would change the meaning
|
117
|
-
result=if (ZwToken===tok or NoWsToken===@lasttok)
|
118
|
-
tok
|
134
|
+
result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
|
135
|
+
tok
|
119
136
|
else
|
120
|
-
[@sep.dup,tok
|
137
|
+
[@sep.dup,tok]
|
138
|
+
end
|
139
|
+
|
140
|
+
if str_needs_escnls
|
141
|
+
result=result.to_s
|
142
|
+
result.gsub!(/(["`\/])$/){ "\\\n"*str_needs_escnls+$1 }
|
121
143
|
end
|
122
144
|
|
123
145
|
@lasttok=tok
|
@@ -147,6 +169,7 @@ class KeepWsTokenPrinter
|
|
147
169
|
/^[$@a-zA-Z_]/===@lasttok)) #lasttok is id or num?
|
148
170
|
end
|
149
171
|
end
|
172
|
+
end
|
150
173
|
|
151
174
|
#-------------------------------
|
152
175
|
|
File without changes
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
Gem.manage_gems
|
3
|
+
|
4
|
+
class Gem::SourceInfoCache
|
5
|
+
public :read_cache
|
6
|
+
end
|
7
|
+
|
8
|
+
if ARGV.empty?
|
9
|
+
limit=1.0/0
|
10
|
+
else
|
11
|
+
limit=ARGV.first.to_i
|
12
|
+
end
|
13
|
+
|
14
|
+
gemdir="gems/"
|
15
|
+
|
16
|
+
Dir.mkdir gemdir rescue nil
|
17
|
+
total=0
|
18
|
+
db4=[]
|
19
|
+
db=Gem::SourceInfoCache.new.read_cache
|
20
|
+
db.each_pair{|site,db2|
|
21
|
+
newest={}
|
22
|
+
db3=db2.source_index.instance_variable_get(:@gems)
|
23
|
+
db3.each_pair{|filename,gemdata|
|
24
|
+
version=gemdata.version
|
25
|
+
newest[gemdata.name]=[version,filename,site] unless
|
26
|
+
newest[gemdata.name] and newest[gemdata.name].first>=version
|
27
|
+
}
|
28
|
+
newest.each_pair{|name,triad| triad.shift }
|
29
|
+
db4.push newest
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
db4.each{|hash| hash.each_pair{|filename,(fn,site)|
|
34
|
+
# fn=filename+".gem"
|
35
|
+
next if File.exist? gemdir+fn
|
36
|
+
fn+=".gem"
|
37
|
+
next if File.exist? gemdir+fn
|
38
|
+
url=site+"/gems/"+fn
|
39
|
+
puts url
|
40
|
+
system "wget #{url} -O "+gemdir+fn
|
41
|
+
total+=(4096.0+1.01*File.size(gemdir+fn)) rescue 0
|
42
|
+
exit if total>limit
|
43
|
+
}}
|
@@ -1,17 +1,20 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$Debug=true
|
2
3
|
require 'rubylexer'
|
3
4
|
require 'getoptlong'
|
4
5
|
|
5
|
-
|
6
|
+
#def puts(x) end
|
6
7
|
|
7
8
|
#a Token#inspect that omits the object id
|
9
|
+
class RubyLexer
|
8
10
|
class Token
|
9
|
-
def
|
10
|
-
[
|
11
|
+
def strify
|
12
|
+
[self.class.name[/[^:]+$/],": ",instance_variables.sort.collect{|v|
|
11
13
|
[v,"=",instance_variable_get(v).inspect," "]
|
12
|
-
}].to_s
|
14
|
+
}].to_s
|
13
15
|
end
|
14
16
|
end
|
17
|
+
end
|
15
18
|
|
16
19
|
file=nil
|
17
20
|
|
@@ -28,11 +31,11 @@ file||=if name=ARGV.first
|
|
28
31
|
File.open(name)
|
29
32
|
else
|
30
33
|
name='-'
|
31
|
-
$
|
34
|
+
$stdin
|
32
35
|
end
|
33
36
|
|
34
37
|
lexer=RubyLexer.new(name, file)
|
35
|
-
until EoiToken===(tok=lexer.get1token)
|
36
|
-
|
38
|
+
until RubyLexer::EoiToken===(tok=lexer.get1token)
|
39
|
+
puts tok.strify
|
37
40
|
end
|
38
|
-
|
41
|
+
puts tok.strify #print eoi token
|