rubylexer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
@@ -17,7 +17,7 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
-
20
+ class RubyLexer
21
21
  class SymbolTable
22
22
  def initialize
23
23
  #note: below Stack means Array (used as a stack)
@@ -42,6 +42,14 @@ class SymbolTable
42
42
  assert @locals_lists.last
43
43
  end
44
44
 
45
+ def names
46
+ @symbols.keys
47
+ end
48
+
49
+ def __locals_lists
50
+ @locals_lists
51
+ end
52
+
45
53
  def [](name)
46
54
  assert @locals_lists.last
47
55
  (stack=@symbols[name]) and stack.last
@@ -63,3 +71,4 @@ class SymbolTable
63
71
  return val
64
72
  end
65
73
  end
74
+ end
@@ -17,8 +17,9 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
- require "rubycode"
21
20
 
21
+
22
+ class RubyLexer
22
23
  #-------------------------
23
24
  class Token
24
25
  attr_accessor :ident
@@ -31,6 +32,8 @@ class Token
31
32
  end
32
33
 
33
34
  def error; end
35
+
36
+ def has_no_block?; false end
34
37
  end
35
38
 
36
39
  #-------------------------
@@ -45,22 +48,33 @@ end
45
48
  class KeywordToken < WToken #also some operators
46
49
 
47
50
  #-----------------------------------
48
- def set_callsite!
51
+ def set_callsite! #not needed
49
52
  @callsite=true
50
53
  end
51
54
 
52
55
  #-----------------------------------
53
- def callsite?
56
+ def callsite? #not used
54
57
  @callsite ||= nil
55
58
  end
56
59
 
60
+
61
+ #-----------------------------------
62
+ def set_infix!
63
+ @infix=true
64
+ end
65
+
66
+ #-----------------------------------
67
+ def infix?
68
+ @infix ||= nil
69
+ end
70
+ def prefix?; !infix? end
71
+
57
72
  #-----------------------------------
58
73
  def has_end!
59
74
  assert self===RubyLexer::BEGINWORDS
60
75
  @has_end=true
61
76
  end
62
77
 
63
-
64
78
  #-----------------------------------
65
79
  def has_end?
66
80
  self===RubyLexer::BEGINWORDS and @has_end||=nil
@@ -69,6 +83,9 @@ end
69
83
 
70
84
  #-------------------------
71
85
  class OperatorToken < WToken
86
+ attr_accessor :unary
87
+ alias prefix? unary
88
+ def infix?; !prefix? end
72
89
  end
73
90
 
74
91
 
@@ -87,8 +104,8 @@ module TokenPat
87
104
  end
88
105
  end
89
106
 
90
- class String; include TokenPat; end
91
- class Regexp; include TokenPat; end
107
+ class ::String; include TokenPat; end
108
+ class ::Regexp; include TokenPat; end
92
109
 
93
110
  #-------------------------
94
111
  class VarNameToken < WToken
@@ -112,6 +129,7 @@ class MethNameToken < Token # < SymbolToken
112
129
  def initialize(ident,offset=nil)
113
130
  @ident= (VarNameToken===ident)? ident.ident : ident
114
131
  @offset=offset
132
+ @has_no_block=false
115
133
  # @char=''
116
134
  end
117
135
 
@@ -121,6 +139,14 @@ class MethNameToken < Token # < SymbolToken
121
139
  def ===(pattern)
122
140
  pattern===@ident
123
141
  end
142
+
143
+ def has_no_block!
144
+ @has_no_block=true
145
+ end
146
+
147
+ def has_no_block?
148
+ @has_no_block
149
+ end
124
150
  end
125
151
 
126
152
  #-------------------------
@@ -137,14 +163,21 @@ class StringToken < Token
137
163
 
138
164
  attr_accessor :modifiers #for regex only
139
165
  attr_accessor :elems
166
+ attr_accessor :line #line on which the string ENDS
167
+
168
+ def with_line(line)
169
+ @line=line
170
+ self
171
+ end
140
172
 
141
173
  def initialize(type='"',ident='')
142
174
  super(ident)
143
175
  type=="'" and type='"'
144
176
  @char=type
145
- assert(@char[/^[\[{"`\/]$/])
177
+ assert @char[/^[\[{"`\/]$/] #"
146
178
  @elems=[ident.dup] #why .dup?
147
179
  @modifiers=nil
180
+ @line=nil
148
181
  end
149
182
 
150
183
  DQUOTE_ESCAPE_TABLE = [
@@ -161,7 +194,7 @@ class StringToken < Token
161
194
  SUFFIXERS={ '['=>"]", '{'=>'}' }
162
195
 
163
196
  def to_s(transname=:transform)
164
- assert(@char[/[\[{"`\/]/])
197
+ assert @char[/[\[{"`\/]/] #"
165
198
  #on output, all single-quoted strings become double-quoted
166
199
  assert(@elems.length==1) if @char=='['
167
200
 
@@ -274,7 +307,7 @@ end
274
307
 
275
308
  #-------------------------
276
309
  class HerePlaceholderToken < WToken
277
- attr_reader :termex, :quote, :ender
310
+ attr_reader :termex, :quote, :ender, :dash
278
311
  attr_accessor :unsafe_to_use, :string
279
312
  attr_accessor :bodyclass
280
313
 
@@ -292,14 +325,15 @@ class HerePlaceholderToken < WToken
292
325
  def ===(bogus); false end
293
326
 
294
327
  def to_s
295
- if unsafe_to_use
296
- result="<<"
297
- result << if/[^a-z_0-9]/i===@ender
328
+ if @bodyclass==OutlinedHereBodyToken
329
+ result=if/[^a-z_0-9]/i===@ender
298
330
  %["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
299
331
  else
300
332
  @ender
301
333
  end
334
+ ["<<",@quote,@ender,@quote].to_s
302
335
  else
336
+ assert !unsafe_to_use
303
337
  @string.to_s
304
338
  end
305
339
  end
@@ -307,11 +341,22 @@ class HerePlaceholderToken < WToken
307
341
  def append s; @string.append s end
308
342
 
309
343
  def append_token tok; @string.append_token tok end
344
+
345
+ #def with_line(line) @string.line=line; self end
346
+
347
+ def line; @string.line end
348
+ def line=line; @string.line=line end
349
+
350
+ end
351
+
352
+ #-------------------------
353
+ module StillIgnoreToken
310
354
 
311
355
  end
312
356
 
313
357
  #-------------------------
314
358
  class IgnoreToken < Token
359
+ include StillIgnoreToken
315
360
  end
316
361
 
317
362
  #-------------------------
@@ -338,15 +383,20 @@ class NoWsToken < ZwToken
338
383
  end
339
384
  end
340
385
 
341
- class ImplicitParamListStartToken < ZwToken
342
- def explicit_form
343
- '('
386
+ class ImplicitParamListStartToken < KeywordToken
387
+ include StillIgnoreToken
388
+ def initialize(offset)
389
+ super("(",offset)
344
390
  end
391
+ def to_s; '' end
345
392
  end
346
- class ImplicitParamListEndToken < ZwToken
347
- def explicit_form
348
- ')'
393
+
394
+ class ImplicitParamListEndToken < KeywordToken
395
+ include StillIgnoreToken
396
+ def initialize(offset)
397
+ super(")",offset)
349
398
  end
399
+ def to_s; '' end
350
400
  end
351
401
 
352
402
  class AssignmentRhsListStartToken < ZwToken
@@ -409,6 +459,7 @@ class HereBodyToken < IgnoreToken
409
459
  @headtok=headtok
410
460
  end
411
461
 
462
+ attr :headtok
412
463
  end
413
464
 
414
465
  #-------------------------
@@ -438,8 +489,7 @@ class OutlinedHereBodyToken < HereBodyToken
438
489
  assert HerePlaceholderToken===@headtok
439
490
  result=@headtok.string
440
491
  result=result.to_s(:simple_transform).match(/^"(.*)"$/m)[1]
441
- return "\n" +
442
- result +
492
+ return result +
443
493
  @headtok.ender +
444
494
  "\n"
445
495
  end
@@ -482,5 +532,7 @@ class DecoratorToken < SubitemToken
482
532
  def value() @subitem end
483
533
  end
484
534
 
535
+ end
485
536
 
537
+ require "rubylexer/rubycode"
486
538
 
@@ -22,7 +22,7 @@
22
22
  require "assert"
23
23
 
24
24
 
25
-
25
+ class RubyLexer
26
26
 
27
27
  #-------------------------------
28
28
  class SimpleTokenPrinter
@@ -34,7 +34,7 @@ class SimpleTokenPrinter
34
34
  TOKENSPERLINE=8
35
35
  TOKENSMAGICMAP="\n"+' '*(TOKENSPERLINE-1)
36
36
 
37
- def pprint(tok) print(sprint(tok)) end
37
+ def pprint(tok,output=$stdout) output.print(sprint(tok)) end
38
38
 
39
39
  def sprint(tok)
40
40
  case tok
@@ -46,14 +46,19 @@ class SimpleTokenPrinter
46
46
  end
47
47
  end
48
48
 
49
- class EscNl; def ws_munge(tp)
49
+ class EscNlToken; def ws_munge(tp)
50
50
  tp.lasttok=self
51
- return to_s
51
+ return " \\\n"
52
52
  end end
53
53
  class FileAndLineToken; def ws_munge(tp)
54
+ result=''
55
+
56
+ #faugh, doesn't fix it
57
+ #result= "\\\n"*(line-tp.lastfal.line) if StringToken===tp.lasttok
58
+
54
59
  tp.lasttok=self
55
60
  tp.lastfal=self
56
- return ''
61
+ return result
57
62
  end end
58
63
  class Newline; def ws_munge(tp)
59
64
  tp.lasttok=self
@@ -62,14 +67,20 @@ end
62
67
  class IgnoreToken; def ws_munge(tp)
63
68
  #tp.latestline+= to_s.scan("\n").size
64
69
  tp.lasttok=self
65
- unless tp.inws
70
+ result=unless tp.inws
66
71
  tp.inws=true
67
- return ' '
72
+ ' '
73
+ else
74
+ ''
75
+ end
76
+ if ?= == @ident.to_s[0]
77
+ result+="\\\n"*@ident.to_s.scan(/\r\n?|\n\r?/).size
68
78
  end
69
- return ''
79
+
80
+ return result
70
81
  end end
71
82
  class OutlinedHereBodyToken; def ws_munge(tp)
72
- nil
83
+ nil
73
84
  end end
74
85
  class ZwToken; def ws_munge(tp)
75
86
  case tp.showzw
@@ -100,24 +111,35 @@ class KeepWsTokenPrinter
100
111
  @showzw=showzw
101
112
  end
102
113
 
103
- def pprint(tok)
104
-
114
+ def pprint(tok,output=$stdout)
105
115
  @accum<<aprint(tok).to_s
106
- if @accum.size>ACCUMSIZE or EoiToken===tok
107
- print(@accum)
116
+ if (@accum.size>ACCUMSIZE and NewlineToken===tok) or EoiToken===tok
117
+ output.print(@accum)
108
118
  @accum=[]
109
119
  end
110
120
  end
111
121
 
112
122
  def aprint(tok)
123
+ if StringToken===tok or
124
+ (HerePlaceholderToken===tok and
125
+ tok.bodyclass!=OutlinedHereBodyToken
126
+ )
127
+ str_needs_escnls=(tok.line-@lastfal.line).nonzero?
128
+ end
113
129
  result=tok.ws_munge(self) and return result
114
130
 
131
+
115
132
  #insert extra ws unless an ambiguous op immediately follows
116
133
  #id or num, in which case ws would change the meaning
117
- result=if (ZwToken===tok or NoWsToken===@lasttok)
118
- tok.to_s
134
+ result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
135
+ tok
119
136
  else
120
- [@sep.dup,tok.to_s]
137
+ [@sep.dup,tok]
138
+ end
139
+
140
+ if str_needs_escnls
141
+ result=result.to_s
142
+ result.gsub!(/(["`\/])$/){ "\\\n"*str_needs_escnls+$1 }
121
143
  end
122
144
 
123
145
  @lasttok=tok
@@ -147,6 +169,7 @@ class KeepWsTokenPrinter
147
169
  /^[$@a-zA-Z_]/===@lasttok)) #lasttok is id or num?
148
170
  end
149
171
  end
172
+ end
150
173
 
151
174
  #-------------------------------
152
175
 
@@ -0,0 +1,3 @@
1
+ class RubyLexer
2
+ VERSION='0.7.0'
3
+ end
File without changes
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ Gem.manage_gems
3
+
4
+ class Gem::SourceInfoCache
5
+ public :read_cache
6
+ end
7
+
8
+ if ARGV.empty?
9
+ limit=1.0/0
10
+ else
11
+ limit=ARGV.first.to_i
12
+ end
13
+
14
+ gemdir="gems/"
15
+
16
+ Dir.mkdir gemdir rescue nil
17
+ total=0
18
+ db4=[]
19
+ db=Gem::SourceInfoCache.new.read_cache
20
+ db.each_pair{|site,db2|
21
+ newest={}
22
+ db3=db2.source_index.instance_variable_get(:@gems)
23
+ db3.each_pair{|filename,gemdata|
24
+ version=gemdata.version
25
+ newest[gemdata.name]=[version,filename,site] unless
26
+ newest[gemdata.name] and newest[gemdata.name].first>=version
27
+ }
28
+ newest.each_pair{|name,triad| triad.shift }
29
+ db4.push newest
30
+ }
31
+
32
+
33
+ db4.each{|hash| hash.each_pair{|filename,(fn,site)|
34
+ # fn=filename+".gem"
35
+ next if File.exist? gemdir+fn
36
+ fn+=".gem"
37
+ next if File.exist? gemdir+fn
38
+ url=site+"/gems/"+fn
39
+ puts url
40
+ system "wget #{url} -O "+gemdir+fn
41
+ total+=(4096.0+1.01*File.size(gemdir+fn)) rescue 0
42
+ exit if total>limit
43
+ }}
@@ -1,17 +1,20 @@
1
- #!/usr/bin/env ruby -dw
1
+ #!/usr/bin/env ruby
2
+ $Debug=true
2
3
  require 'rubylexer'
3
4
  require 'getoptlong'
4
5
 
5
-
6
+ #def puts(x) end
6
7
 
7
8
  #a Token#inspect that omits the object id
9
+ class RubyLexer
8
10
  class Token
9
- def inspect
10
- ["#<",self.class,": ",instance_variables.sort.collect{|v|
11
+ def strify
12
+ [self.class.name[/[^:]+$/],": ",instance_variables.sort.collect{|v|
11
13
  [v,"=",instance_variable_get(v).inspect," "]
12
- }].to_s.sub(/ $/,'>')
14
+ }].to_s
13
15
  end
14
16
  end
17
+ end
15
18
 
16
19
  file=nil
17
20
 
@@ -28,11 +31,11 @@ file||=if name=ARGV.first
28
31
  File.open(name)
29
32
  else
30
33
  name='-'
31
- $stdout
34
+ $stdin
32
35
  end
33
36
 
34
37
  lexer=RubyLexer.new(name, file)
35
- until EoiToken===(tok=lexer.get1token)
36
- p tok
38
+ until RubyLexer::EoiToken===(tok=lexer.get1token)
39
+ puts tok.strify
37
40
  end
38
- p tok #print eoi token
41
+ puts tok.strify #print eoi token