rubylexer 0.6.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
@@ -17,7 +17,7 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
-
20
+ class RubyLexer
21
21
  class SymbolTable
22
22
  def initialize
23
23
  #note: below Stack means Array (used as a stack)
@@ -42,6 +42,14 @@ class SymbolTable
42
42
  assert @locals_lists.last
43
43
  end
44
44
 
45
+ def names
46
+ @symbols.keys
47
+ end
48
+
49
+ def __locals_lists
50
+ @locals_lists
51
+ end
52
+
45
53
  def [](name)
46
54
  assert @locals_lists.last
47
55
  (stack=@symbols[name]) and stack.last
@@ -63,3 +71,4 @@ class SymbolTable
63
71
  return val
64
72
  end
65
73
  end
74
+ end
@@ -17,8 +17,9 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
- require "rubycode"
21
20
 
21
+
22
+ class RubyLexer
22
23
  #-------------------------
23
24
  class Token
24
25
  attr_accessor :ident
@@ -31,6 +32,8 @@ class Token
31
32
  end
32
33
 
33
34
  def error; end
35
+
36
+ def has_no_block?; false end
34
37
  end
35
38
 
36
39
  #-------------------------
@@ -45,22 +48,33 @@ end
45
48
  class KeywordToken < WToken #also some operators
46
49
 
47
50
  #-----------------------------------
48
- def set_callsite!
51
+ def set_callsite! #not needed
49
52
  @callsite=true
50
53
  end
51
54
 
52
55
  #-----------------------------------
53
- def callsite?
56
+ def callsite? #not used
54
57
  @callsite ||= nil
55
58
  end
56
59
 
60
+
61
+ #-----------------------------------
62
+ def set_infix!
63
+ @infix=true
64
+ end
65
+
66
+ #-----------------------------------
67
+ def infix?
68
+ @infix ||= nil
69
+ end
70
+ def prefix?; !infix? end
71
+
57
72
  #-----------------------------------
58
73
  def has_end!
59
74
  assert self===RubyLexer::BEGINWORDS
60
75
  @has_end=true
61
76
  end
62
77
 
63
-
64
78
  #-----------------------------------
65
79
  def has_end?
66
80
  self===RubyLexer::BEGINWORDS and @has_end||=nil
@@ -69,6 +83,9 @@ end
69
83
 
70
84
  #-------------------------
71
85
  class OperatorToken < WToken
86
+ attr_accessor :unary
87
+ alias prefix? unary
88
+ def infix?; !prefix? end
72
89
  end
73
90
 
74
91
 
@@ -87,8 +104,8 @@ module TokenPat
87
104
  end
88
105
  end
89
106
 
90
- class String; include TokenPat; end
91
- class Regexp; include TokenPat; end
107
+ class ::String; include TokenPat; end
108
+ class ::Regexp; include TokenPat; end
92
109
 
93
110
  #-------------------------
94
111
  class VarNameToken < WToken
@@ -112,6 +129,7 @@ class MethNameToken < Token # < SymbolToken
112
129
  def initialize(ident,offset=nil)
113
130
  @ident= (VarNameToken===ident)? ident.ident : ident
114
131
  @offset=offset
132
+ @has_no_block=false
115
133
  # @char=''
116
134
  end
117
135
 
@@ -121,6 +139,14 @@ class MethNameToken < Token # < SymbolToken
121
139
  def ===(pattern)
122
140
  pattern===@ident
123
141
  end
142
+
143
+ def has_no_block!
144
+ @has_no_block=true
145
+ end
146
+
147
+ def has_no_block?
148
+ @has_no_block
149
+ end
124
150
  end
125
151
 
126
152
  #-------------------------
@@ -137,14 +163,21 @@ class StringToken < Token
137
163
 
138
164
  attr_accessor :modifiers #for regex only
139
165
  attr_accessor :elems
166
+ attr_accessor :line #line on which the string ENDS
167
+
168
+ def with_line(line)
169
+ @line=line
170
+ self
171
+ end
140
172
 
141
173
  def initialize(type='"',ident='')
142
174
  super(ident)
143
175
  type=="'" and type='"'
144
176
  @char=type
145
- assert(@char[/^[\[{"`\/]$/])
177
+ assert @char[/^[\[{"`\/]$/] #"
146
178
  @elems=[ident.dup] #why .dup?
147
179
  @modifiers=nil
180
+ @line=nil
148
181
  end
149
182
 
150
183
  DQUOTE_ESCAPE_TABLE = [
@@ -161,7 +194,7 @@ class StringToken < Token
161
194
  SUFFIXERS={ '['=>"]", '{'=>'}' }
162
195
 
163
196
  def to_s(transname=:transform)
164
- assert(@char[/[\[{"`\/]/])
197
+ assert @char[/[\[{"`\/]/] #"
165
198
  #on output, all single-quoted strings become double-quoted
166
199
  assert(@elems.length==1) if @char=='['
167
200
 
@@ -274,7 +307,7 @@ end
274
307
 
275
308
  #-------------------------
276
309
  class HerePlaceholderToken < WToken
277
- attr_reader :termex, :quote, :ender
310
+ attr_reader :termex, :quote, :ender, :dash
278
311
  attr_accessor :unsafe_to_use, :string
279
312
  attr_accessor :bodyclass
280
313
 
@@ -292,14 +325,15 @@ class HerePlaceholderToken < WToken
292
325
  def ===(bogus); false end
293
326
 
294
327
  def to_s
295
- if unsafe_to_use
296
- result="<<"
297
- result << if/[^a-z_0-9]/i===@ender
328
+ if @bodyclass==OutlinedHereBodyToken
329
+ result=if/[^a-z_0-9]/i===@ender
298
330
  %["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
299
331
  else
300
332
  @ender
301
333
  end
334
+ ["<<",@quote,@ender,@quote].to_s
302
335
  else
336
+ assert !unsafe_to_use
303
337
  @string.to_s
304
338
  end
305
339
  end
@@ -307,11 +341,22 @@ class HerePlaceholderToken < WToken
307
341
  def append s; @string.append s end
308
342
 
309
343
  def append_token tok; @string.append_token tok end
344
+
345
+ #def with_line(line) @string.line=line; self end
346
+
347
+ def line; @string.line end
348
+ def line=line; @string.line=line end
349
+
350
+ end
351
+
352
+ #-------------------------
353
+ module StillIgnoreToken
310
354
 
311
355
  end
312
356
 
313
357
  #-------------------------
314
358
  class IgnoreToken < Token
359
+ include StillIgnoreToken
315
360
  end
316
361
 
317
362
  #-------------------------
@@ -338,15 +383,20 @@ class NoWsToken < ZwToken
338
383
  end
339
384
  end
340
385
 
341
- class ImplicitParamListStartToken < ZwToken
342
- def explicit_form
343
- '('
386
+ class ImplicitParamListStartToken < KeywordToken
387
+ include StillIgnoreToken
388
+ def initialize(offset)
389
+ super("(",offset)
344
390
  end
391
+ def to_s; '' end
345
392
  end
346
- class ImplicitParamListEndToken < ZwToken
347
- def explicit_form
348
- ')'
393
+
394
+ class ImplicitParamListEndToken < KeywordToken
395
+ include StillIgnoreToken
396
+ def initialize(offset)
397
+ super(")",offset)
349
398
  end
399
+ def to_s; '' end
350
400
  end
351
401
 
352
402
  class AssignmentRhsListStartToken < ZwToken
@@ -409,6 +459,7 @@ class HereBodyToken < IgnoreToken
409
459
  @headtok=headtok
410
460
  end
411
461
 
462
+ attr :headtok
412
463
  end
413
464
 
414
465
  #-------------------------
@@ -438,8 +489,7 @@ class OutlinedHereBodyToken < HereBodyToken
438
489
  assert HerePlaceholderToken===@headtok
439
490
  result=@headtok.string
440
491
  result=result.to_s(:simple_transform).match(/^"(.*)"$/m)[1]
441
- return "\n" +
442
- result +
492
+ return result +
443
493
  @headtok.ender +
444
494
  "\n"
445
495
  end
@@ -482,5 +532,7 @@ class DecoratorToken < SubitemToken
482
532
  def value() @subitem end
483
533
  end
484
534
 
535
+ end
485
536
 
537
+ require "rubylexer/rubycode"
486
538
 
@@ -22,7 +22,7 @@
22
22
  require "assert"
23
23
 
24
24
 
25
-
25
+ class RubyLexer
26
26
 
27
27
  #-------------------------------
28
28
  class SimpleTokenPrinter
@@ -34,7 +34,7 @@ class SimpleTokenPrinter
34
34
  TOKENSPERLINE=8
35
35
  TOKENSMAGICMAP="\n"+' '*(TOKENSPERLINE-1)
36
36
 
37
- def pprint(tok) print(sprint(tok)) end
37
+ def pprint(tok,output=$stdout) output.print(sprint(tok)) end
38
38
 
39
39
  def sprint(tok)
40
40
  case tok
@@ -46,14 +46,19 @@ class SimpleTokenPrinter
46
46
  end
47
47
  end
48
48
 
49
- class EscNl; def ws_munge(tp)
49
+ class EscNlToken; def ws_munge(tp)
50
50
  tp.lasttok=self
51
- return to_s
51
+ return " \\\n"
52
52
  end end
53
53
  class FileAndLineToken; def ws_munge(tp)
54
+ result=''
55
+
56
+ #faugh, doesn't fix it
57
+ #result= "\\\n"*(line-tp.lastfal.line) if StringToken===tp.lasttok
58
+
54
59
  tp.lasttok=self
55
60
  tp.lastfal=self
56
- return ''
61
+ return result
57
62
  end end
58
63
  class Newline; def ws_munge(tp)
59
64
  tp.lasttok=self
@@ -62,14 +67,20 @@ end
62
67
  class IgnoreToken; def ws_munge(tp)
63
68
  #tp.latestline+= to_s.scan("\n").size
64
69
  tp.lasttok=self
65
- unless tp.inws
70
+ result=unless tp.inws
66
71
  tp.inws=true
67
- return ' '
72
+ ' '
73
+ else
74
+ ''
75
+ end
76
+ if ?= == @ident.to_s[0]
77
+ result+="\\\n"*@ident.to_s.scan(/\r\n?|\n\r?/).size
68
78
  end
69
- return ''
79
+
80
+ return result
70
81
  end end
71
82
  class OutlinedHereBodyToken; def ws_munge(tp)
72
- nil
83
+ nil
73
84
  end end
74
85
  class ZwToken; def ws_munge(tp)
75
86
  case tp.showzw
@@ -100,24 +111,35 @@ class KeepWsTokenPrinter
100
111
  @showzw=showzw
101
112
  end
102
113
 
103
- def pprint(tok)
104
-
114
+ def pprint(tok,output=$stdout)
105
115
  @accum<<aprint(tok).to_s
106
- if @accum.size>ACCUMSIZE or EoiToken===tok
107
- print(@accum)
116
+ if (@accum.size>ACCUMSIZE and NewlineToken===tok) or EoiToken===tok
117
+ output.print(@accum)
108
118
  @accum=[]
109
119
  end
110
120
  end
111
121
 
112
122
  def aprint(tok)
123
+ if StringToken===tok or
124
+ (HerePlaceholderToken===tok and
125
+ tok.bodyclass!=OutlinedHereBodyToken
126
+ )
127
+ str_needs_escnls=(tok.line-@lastfal.line).nonzero?
128
+ end
113
129
  result=tok.ws_munge(self) and return result
114
130
 
131
+
115
132
  #insert extra ws unless an ambiguous op immediately follows
116
133
  #id or num, in which case ws would change the meaning
117
- result=if (ZwToken===tok or NoWsToken===@lasttok)
118
- tok.to_s
134
+ result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
135
+ tok
119
136
  else
120
- [@sep.dup,tok.to_s]
137
+ [@sep.dup,tok]
138
+ end
139
+
140
+ if str_needs_escnls
141
+ result=result.to_s
142
+ result.gsub!(/(["`\/])$/){ "\\\n"*str_needs_escnls+$1 }
121
143
  end
122
144
 
123
145
  @lasttok=tok
@@ -147,6 +169,7 @@ class KeepWsTokenPrinter
147
169
  /^[$@a-zA-Z_]/===@lasttok)) #lasttok is id or num?
148
170
  end
149
171
  end
172
+ end
150
173
 
151
174
  #-------------------------------
152
175
 
@@ -0,0 +1,3 @@
1
+ class RubyLexer
2
+ VERSION='0.7.0'
3
+ end
File without changes
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ Gem.manage_gems
3
+
4
+ class Gem::SourceInfoCache
5
+ public :read_cache
6
+ end
7
+
8
+ if ARGV.empty?
9
+ limit=1.0/0
10
+ else
11
+ limit=ARGV.first.to_i
12
+ end
13
+
14
+ gemdir="gems/"
15
+
16
+ Dir.mkdir gemdir rescue nil
17
+ total=0
18
+ db4=[]
19
+ db=Gem::SourceInfoCache.new.read_cache
20
+ db.each_pair{|site,db2|
21
+ newest={}
22
+ db3=db2.source_index.instance_variable_get(:@gems)
23
+ db3.each_pair{|filename,gemdata|
24
+ version=gemdata.version
25
+ newest[gemdata.name]=[version,filename,site] unless
26
+ newest[gemdata.name] and newest[gemdata.name].first>=version
27
+ }
28
+ newest.each_pair{|name,triad| triad.shift }
29
+ db4.push newest
30
+ }
31
+
32
+
33
+ db4.each{|hash| hash.each_pair{|filename,(fn,site)|
34
+ # fn=filename+".gem"
35
+ next if File.exist? gemdir+fn
36
+ fn+=".gem"
37
+ next if File.exist? gemdir+fn
38
+ url=site+"/gems/"+fn
39
+ puts url
40
+ system "wget #{url} -O "+gemdir+fn
41
+ total+=(4096.0+1.01*File.size(gemdir+fn)) rescue 0
42
+ exit if total>limit
43
+ }}
@@ -1,17 +1,20 @@
1
- #!/usr/bin/env ruby -dw
1
+ #!/usr/bin/env ruby
2
+ $Debug=true
2
3
  require 'rubylexer'
3
4
  require 'getoptlong'
4
5
 
5
-
6
+ #def puts(x) end
6
7
 
7
8
  #a Token#inspect that omits the object id
9
+ class RubyLexer
8
10
  class Token
9
- def inspect
10
- ["#<",self.class,": ",instance_variables.sort.collect{|v|
11
+ def strify
12
+ [self.class.name[/[^:]+$/],": ",instance_variables.sort.collect{|v|
11
13
  [v,"=",instance_variable_get(v).inspect," "]
12
- }].to_s.sub(/ $/,'>')
14
+ }].to_s
13
15
  end
14
16
  end
17
+ end
15
18
 
16
19
  file=nil
17
20
 
@@ -28,11 +31,11 @@ file||=if name=ARGV.first
28
31
  File.open(name)
29
32
  else
30
33
  name='-'
31
- $stdout
34
+ $stdin
32
35
  end
33
36
 
34
37
  lexer=RubyLexer.new(name, file)
35
- until EoiToken===(tok=lexer.get1token)
36
- p tok
38
+ until RubyLexer::EoiToken===(tok=lexer.get1token)
39
+ puts tok.strify
37
40
  end
38
- p tok #print eoi token
41
+ puts tok.strify #print eoi token