rubylexer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
@@ -0,0 +1,39 @@
1
+ require 'rubylexer/0.7.0'
2
+
3
+ #make ImplicitParamList Start and End tokens descend from IgnoreToken again
4
+ class RubyLexer
5
+ remove_const :ImplicitParamListStartToken
6
+ remove_const :ImplicitParamListEndToken
7
+
8
+ class ImplicitParamListStartToken < IgnoreToken
9
+ # include StillIgnoreToken
10
+ def initialize(offset)
11
+ super("(",offset)
12
+ end
13
+ def to_s; '' end
14
+ end
15
+
16
+ class ImplicitParamListEndToken < IgnoreToken
17
+ # include StillIgnoreToken
18
+ def initialize(offset)
19
+ super(")",offset)
20
+ end
21
+ def to_s; '' end
22
+ end
23
+ end
24
+
25
+ RubyLexer.constants.map{|k|
26
+ k.name[/[^:]+$/] if Token>=k or Context>=k
27
+ }.compact + %w[
28
+ RuLexer CharHandler CharSet SymbolTable
29
+ SimpleTokenPrinter KeepWsTokenPrinter
30
+ ].each{|name|
31
+ Object.const_set name, RubyLexer.const_get name
32
+ }
33
+
34
+
35
+ class RubyLexer
36
+ def merge_assignment_op_in_setter_callsites?
37
+ true
38
+ end
39
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubylexer/0.6.2'
2
+
3
+ class RubyLexer
4
+ alias lexerror lexerror_exception
5
+ end
@@ -0,0 +1,2 @@
1
+ require 'rubylexer'
2
+ #nothing else (yet)
@@ -17,7 +17,8 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
- require "charset"
20
+ require "rubylexer/charset"
21
+ class RubyLexer
21
22
  #------------------------------------
22
23
  class CharHandler
23
24
  #-----------------------------------
@@ -41,7 +42,7 @@ class CharHandler
41
42
  when Fixnum
42
43
  self[pattern]=action
43
44
  else
44
- raise "invalid pattern class #{pattern.class}"
45
+ raise "invalid pattern class #{pattern.class}: #{pattern}"
45
46
  end
46
47
  }
47
48
 
@@ -80,5 +81,6 @@ class CharHandler
80
81
  end until go(mychar,*args)
81
82
  end
82
83
  end
84
+ end
83
85
 
84
86
 
@@ -17,6 +17,7 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
+ class RubyLexer
20
21
  class CharSet
21
22
  def initialize(*charss)
22
23
  clear
@@ -31,9 +32,9 @@ class CharSet
31
32
 
32
33
  def add(chars)
33
34
  case chars
34
- when String
35
+ when ::String
35
36
  chars.each_byte {|c| @bitset |= (1<<c) }
36
- when Fixnum then @bitset |= (1<<chars)
37
+ when ::Fixnum then @bitset |= (1<<chars)
37
38
  else chars.each {|c| @bitset |= (1<<c) }
38
39
  end
39
40
  end
@@ -72,5 +73,5 @@ class CharSet
72
73
  return result
73
74
  end
74
75
  end
75
-
76
+ end
76
77
 
@@ -1,3 +1,4 @@
1
+ class RubyLexer
1
2
  module NestedContexts
2
3
  class NestedContext
3
4
  attr :starter
@@ -13,7 +14,8 @@ module NestedContexts
13
14
  @ender==tok
14
15
  end
15
16
 
16
- def see stack,msg; end
17
+ def see lxr,msg; end
18
+ def lhs=*x; end #do nothing
17
19
  end
18
20
 
19
21
  class ListContext < NestedContext
@@ -30,6 +32,7 @@ module NestedContexts
30
32
  def initialize(linenum)
31
33
  super('(', ')' ,linenum)
32
34
  end
35
+ attr_accessor :lhs
33
36
  end
34
37
 
35
38
  class BlockContext < NestedContext
@@ -38,16 +41,31 @@ module NestedContexts
38
41
  end
39
42
  end
40
43
 
41
- class BlockParamListContext < ListContext
42
- def initialize(linenum)
43
- super('|','|',linenum)
44
- end
45
- end
44
+ # class BlockParamListContext < ListContext
45
+ # def initialize(linenum)
46
+ # super('|','|',linenum)
47
+ # end
48
+ # end
46
49
 
47
50
  class ParamListContext < ListContext
48
51
  def initialize(linenum)
49
52
  super('(', ')',linenum)
50
53
  end
54
+ def lhs; false end
55
+ end
56
+
57
+ class ImplicitLhsContext < NestedContext
58
+ def initialize(linenum)
59
+ @linenum=linenum
60
+ end
61
+ def lhs; true end
62
+ def starter; nil end
63
+ def ender; '=' end
64
+ end
65
+
66
+ class BlockParamListLhsContext < ImplicitLhsContext
67
+ def starter; '|' end
68
+ def ender; '|' end
51
69
  end
52
70
 
53
71
  class ImplicitContext < ListContext
@@ -55,19 +73,26 @@ module NestedContexts
55
73
 
56
74
  class ParamListContextNoParen < ImplicitContext
57
75
  def initialize(linenum)
58
- dflt_initialize(nil,nil,linenum)
76
+ super(nil,nil,linenum)
59
77
  end
78
+ def lhs; false end
60
79
  end
61
80
 
62
- class KwParamListContext < ImplicitContext
81
+ class WhenParamListContext < ImplicitContext
63
82
  def initialize(starter,linenum)
64
- dflt_initialize(starter,nil,linenum)
83
+ super(starter,nil,linenum)
84
+ end
85
+ end
86
+
87
+ class AssignmentContext < NestedContext
88
+ def initialize(linenum)
89
+ super("assignment context", "=",linenum)
65
90
  end
66
91
  end
67
92
 
68
93
  class AssignmentRhsContext < ImplicitContext
69
94
  def initialize(linenum)
70
- dflt_initialize(nil,nil,linenum)
95
+ super(nil,nil,linenum)
71
96
  end
72
97
  end
73
98
 
@@ -76,8 +101,8 @@ module NestedContexts
76
101
  super(starter,'end',linenum)
77
102
  end
78
103
 
79
- def see stack,msg
80
- msg==:rescue ? stack.push_rescue_sm : super
104
+ def see lxr,msg
105
+ msg==:rescue ? lxr.parsestack.push_rescue_sm : super
81
106
  end
82
107
  end
83
108
 
@@ -115,7 +140,8 @@ module NestedContexts
115
140
  @state=:rescue
116
141
  end
117
142
 
118
- def see(stack,msg)
143
+ def see(lxr,msg)
144
+ stack=lxr.parsestack
119
145
  case msg
120
146
  when :rescue:
121
147
  WantsEndContext===stack.last or
@@ -125,7 +151,7 @@ module NestedContexts
125
151
  when :arrow: #local var defined in this state
126
152
  when :then,:semi,:colon:
127
153
  msg=:then
128
- RescueSMContext===stack.pop or raise 'syntax error: then not expected at this time'
154
+ self.equal? stack.pop or raise 'syntax error: then not expected at this time'
129
155
  #pop self off owning context stack
130
156
  else super
131
157
  end
@@ -149,16 +175,19 @@ module NestedContexts
149
175
  @state=:for
150
176
  end
151
177
 
152
- def see(stack,msg)
178
+ def see(lxr,msg)
179
+ stack=lxr.parsestack
180
+ assert msg!=:for
153
181
  case msg
154
182
  when :for: WantsEndContext===stack.last or raise 'syntax error: for not expected at this time'
155
183
  #local var defined in this state
156
- when :in: ForSMContext===stack.pop or raise 'syntax error: in not expected at this time'
184
+ #never actually used?
185
+ when :in: self.equal? stack.pop or raise 'syntax error: in not expected at this time'
157
186
  stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
158
187
  #pop self off owning context stack and push ExpectDoOrNlContext
159
188
  else super
160
189
  end
161
- LEGAL_SUCCESSORS[@state] == msg or raise 'for syntax error: #{msg} unexpected in #@state'
190
+ LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
162
191
  @state=msg
163
192
  end
164
193
  end
@@ -171,4 +200,5 @@ module NestedContexts
171
200
  dflt_initialize('?',':',linenum)
172
201
  end
173
202
  end
174
- end
203
+ end
204
+ end
@@ -19,8 +19,10 @@
19
19
 
20
20
 
21
21
 
22
- require "token.rb"
23
- require "tokenprinter.rb"
22
+ #require "token.rb"
23
+ #require "tokenprinter.rb"
24
+
25
+ class RubyLexer
24
26
 
25
27
  class RubyCode < Token
26
28
  def initialize(tokens,filename,linenum)
@@ -40,5 +42,5 @@ class RubyCode < Token
40
42
  return result.to_s
41
43
  end
42
44
  end
43
-
45
+ end
44
46
 
@@ -20,14 +20,27 @@
20
20
 
21
21
 
22
22
  require "assert"
23
- require "charhandler"
23
+ #require "charhandler"
24
24
  #require "term"
25
- require "rubycode"
26
- require "io.each_til_charset"
27
-
25
+ #require "rubycode"
26
+ #require "io.each_til_charset"
27
+ #begin
28
+ require 'rubygems'
29
+ #rescue Exception:
30
+ #end
31
+ #require 'sequence'
32
+ require 'sequence/indexed'
33
+ require 'sequence/file'
34
+ #-----------------------------------
35
+ assert !defined? ::RubyLexer
36
+ $RuLexer=Class.new{}
37
+ class RubyLexer < $RuLexer
38
+ RuLexer=$RuLexer
39
+ end
40
+ $RuLexer=nil
28
41
  #------------------------------------
29
- class RuLexer
30
-
42
+ class RubyLexer
43
+ class RuLexer
31
44
  WHSP=" \t\r\v\f"
32
45
  WHSPLF=WHSP+"\n"
33
46
  #maybe \r should be in WHSPLF instead
@@ -41,25 +54,28 @@ class RuLexer
41
54
  #-----------------------------------
42
55
  def initialize(filename, file, line)
43
56
  @filename=filename
44
- String===file && file=IOext::FakeFile.new(file)
45
- file.binmode
46
- @file=file
57
+
58
+ # String===file && file=IOext::FakeFile.new(file)
59
+ file.binmode if File===file
60
+ @original_file=file
61
+ @file=file.to_sequence
47
62
  @linenum=line
48
63
  @toptable=nil #descendants must fill this out
49
- @moretokens=[ FileAndLineToken.new(@filename, @linenum, @file.pos) ]
64
+ @moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
50
65
  @last_operative_token=nil
66
+ @endsets={}
51
67
  end
52
68
 
53
69
  #-----------------------------------
54
70
  def endoffile_detected s=''
55
- EoiToken.new(s,@file,@file.pos-s.size)
71
+ EoiToken.new(s,@original_file, input_position-s.size)
56
72
  end
57
73
 
58
74
  #-----------------------------------
59
75
  def get1token
60
76
  @moretokens.empty? or return @moretokens.shift
61
77
 
62
- if @file.eof?
78
+ if eof?
63
79
  #@moretokens<<nil
64
80
  return endoffile_detected()
65
81
  end
@@ -73,6 +89,14 @@ class RuLexer
73
89
  return true
74
90
  end
75
91
 
92
+ #-----------------------------------
93
+ def each
94
+ begin yield tok = get1token
95
+ end until tok.is_a? EoiToken
96
+ end
97
+
98
+ include Enumerable
99
+
76
100
  private
77
101
  #-----------------------------------
78
102
  def lexerror_errortoken(tok,str,file=@filename,line=@linenum)
@@ -99,16 +123,16 @@ private
99
123
  def regex(ch=nil)
100
124
  result=RenderExactlyStringToken.new('/').
101
125
  append_token double_quote("/")
102
-
126
+ result.line=@linenum
103
127
  return result
104
128
  end
105
129
 
106
130
  #-----------------------------------
107
- def single_char_token(str) return @file.getc.chr end
131
+ def single_char_token(str) getchar end
108
132
 
109
133
  #-----------------------------------
110
134
  def illegal_char(ch)
111
- pos=@file.pos
135
+ pos= input_position
112
136
  LEGALCHARS===ch and return( lexerror WsToken.new(getchar,pos), "legal (?!) bad char (code: #{ch[0]})" )
113
137
  lexerror WsToken.new(til_charset(LEGALCHARS),pos), "bad char (code: #{ch[0]})"
114
138
  end
@@ -116,7 +140,7 @@ private
116
140
  #-----------------------------------
117
141
  def fancy_quote (ch)
118
142
  assert ch=='%'
119
- oldpos=@file.pos
143
+ oldpos= input_position
120
144
  eat_next_if(ch) or raise "fancy_quote, no "+ch
121
145
 
122
146
  ch=getchar
@@ -133,7 +157,9 @@ private
133
157
  when /^[a-z0-9]$/oi
134
158
  error= "unrecognized %string type: "+ch; '"'
135
159
  when ''
136
- return lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
160
+ result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
161
+ result.line=@linenum
162
+ return result
137
163
  else back1char; '"' #no letter means string too
138
164
  end
139
165
 
@@ -144,8 +170,10 @@ private
144
170
 
145
171
  result=double_quote(beg, type, (PAIRS[beg] or beg))
146
172
  case ch
147
- when /^[Wwr]$/: result=RenderExactlyStringToken.new(type).append_token(result)
148
- when 's': result=SymbolToken.new(result.to_s)
173
+ when /^[Wwr]$/;
174
+ result=RenderExactlyStringToken.new(type).append_token(result)
175
+ result.line=@linenum
176
+ when 's'; result=SymbolToken.new(result.to_s)
149
177
  end
150
178
  result.offset=oldpos
151
179
  return lexerror(result,error)
@@ -159,11 +187,15 @@ private
159
187
 
160
188
  #-----------------------------------
161
189
  def all_quote(nester, type, delimiter, bs_handler=nil)
190
+ endset="\r\n\\\\"
191
+
162
192
  #string must start with nester
163
193
  if nester==INET_NL_REX
164
194
  readnl
165
195
  else
166
- eat_next_if(nester)
196
+ endset<< "\\"+nester
197
+ endset<< "\\"+delimiter if nester!=delimiter
198
+ eat_next_if(nester[0])
167
199
  end or return nil
168
200
 
169
201
  bs_handler ||= case type
@@ -177,9 +209,12 @@ private
177
209
  str=StringToken.new type
178
210
  old_linenum=@linenum
179
211
  nestlevel=1
180
- maybe_crunch= "'["[type] ? nil : "#"
181
- @file.each_byte {|b|
182
- b=b.chr
212
+ endset<<maybe_crunch="#" unless "'["[type]
213
+ endset=
214
+ @endsets[endset] ||= /[#{endset}]/
215
+ loop{
216
+ str.append(til_charset( endset ))
217
+ b=getchar
183
218
  if /^[\r\n]$/===b
184
219
  back1char
185
220
  b=readnl
@@ -189,8 +224,9 @@ private
189
224
  if (nestlevel-=1)==0
190
225
  str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
191
226
  #emit eol marker later if line has changed
227
+ str.line=@linenum
192
228
  @linenum != old_linenum and @moretokens <<
193
- FileAndLineToken.new(@filename,@linenum,@file.pos)
229
+ FileAndLineToken.new(@filename,@linenum, input_position)
194
230
  return str
195
231
  end
196
232
  when nester
@@ -198,17 +234,25 @@ private
198
234
  assert(nester!=delimiter)
199
235
  nestlevel+=1
200
236
  when "\\"
237
+ begin
201
238
  b= send(bs_handler,'\\',nester,delimiter)
239
+ rescue e
240
+ lexerror str, e.message
241
+ end
202
242
  when nil then raise "nil char from each_byte?" #never happens
203
243
  when maybe_crunch
204
244
  nc=nextchar.chr
205
245
  nc[/^[{@$]$/] and b=ruby_code(nc)
246
+ when "" #eof
247
+ lexerror str, "unterminated #{delimiter}-string at eof"
248
+ break
206
249
  end
207
250
  str.append b
208
251
  }
209
252
 
210
- assert @file.eof?
211
- lexerror str,"unterminated #{delimiter}-string"
253
+ assert eof?
254
+ str.line=@linenum
255
+ str
212
256
  end
213
257
 
214
258
  #-----------------------------------
@@ -216,7 +260,7 @@ private
216
260
  ESCAPESEQS="\a\b\e\f\n\r\s\t\v"
217
261
  def dquote_esc_seq(ch,nester=nil,delimiter=nil)
218
262
  assert ch == '\\'
219
- #see ruby book, p 205 for documentation of escape sequences
263
+ #see pickaxe (1st ed), p 205 for documentation of escape sequences
220
264
  return case k=getchar
221
265
  when "\n" then @linenum+=1; ""
222
266
  when "\\" then "\\"
@@ -236,11 +280,11 @@ private
236
280
  when "v" then "\v"
237
281
  =end
238
282
  when "M"
239
- eat_next_if(?-) or lexerror 'bad \\M sequence'
283
+ eat_next_if(?-) or raise 'bad \\M sequence'
240
284
  (getchar_maybe_escape | 0x80).chr
241
285
 
242
286
  when "C"
243
- eat_next_if(?-) or lexerror 'bad \\C sequence'
287
+ eat_next_if(?-) or raise 'bad \\C sequence'
244
288
  (getchar_maybe_escape & 0x9F).chr
245
289
 
246
290
  when "c"
@@ -249,16 +293,16 @@ private
249
293
  when /^[0-7]$/
250
294
  str=k
251
295
  while str.length < 3
252
- str << (eat_next_if(/^[0-7]$/) or break)
296
+ str << (eat_next_if(/[0-7]/) or break)
253
297
  end
254
298
  (str.oct&0xFF).chr
255
299
 
256
300
  when "x"
257
301
  str=''
258
302
  while str.length < 2
259
- str << (eat_next_if(/^[0-9A-F]$/i) or break)
303
+ str << (eat_next_if(/[0-9A-F]/i) or break)
260
304
  end
261
- str=='' and lexerror "bad \\x sequence"
305
+ str=='' and raise "bad \\x sequence"
262
306
  str.hex.chr
263
307
 
264
308
  else
@@ -281,7 +325,7 @@ private
281
325
  #when "M","C"
282
326
  # eat_next_if(?-) or
283
327
  # lexerror "illegal \\#{c}- esc sequence"
284
- # ch + c + '-' + (eat_next_if(/^[^\\]$/)or'')
328
+ # ch + c + '-' + (eat_next_if(/[^\\]/)or'')
285
329
  # #if this \M- or \C- sequence is continued by
286
330
  # #another backslash, we'll just leave the
287
331
  # #backslash on the input, to be read by the next pass
@@ -332,7 +376,11 @@ private
332
376
  klass= RubyLexer===self ? self.class : RubyLexer
333
377
  rl=klass.new(@filename,@file,@linenum)
334
378
 
335
-
379
+ #pass current local vars into new parser
380
+ localvars.names.each{|varname|
381
+ rl.localvars[varname]=true
382
+ }
383
+ rl.localvars.start_block
336
384
 
337
385
  case ch
338
386
  when '@'
@@ -363,6 +411,10 @@ private
363
411
  rl.no_more? or
364
412
  raise 'uh-oh, ruby tokens were lexed past end of ruby code'
365
413
 
414
+ #local vars defined in inclusion get propagated to outer parser
415
+ newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
416
+ newvars.each{|newvar| localvars[newvar]=true }
417
+
366
418
  result=RubyCode.new(tokens,@filename,@linenum)
367
419
  @linenum=rl.linenum
368
420
  return result
@@ -375,10 +427,14 @@ private
375
427
 
376
428
 
377
429
  #-----------------------------------
378
- BINCHARS=?0..?1
379
- OCTCHARS=?0..?7
380
- DECCHARS=?0..?9
381
- HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
430
+ # BINCHARS=?0..?1
431
+ # OCTCHARS=?0..?7
432
+ # DECCHARS=?0..?9
433
+ # HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
434
+ BINCHARS=/[^01_]/
435
+ OCTCHARS=/[^0-7_]/
436
+ DECCHARS=/[^0-9_]/
437
+ HEXCHARS=/[^0-9a-f_]/i
382
438
  #0-9
383
439
  #-----------------------------------
384
440
  def number(str)
@@ -386,83 +442,85 @@ private
386
442
  return nil unless /^[0-9+\-]$/===str
387
443
 
388
444
  interp=:to_i
389
- str= (eat_next_if(/^[+\-]$/)or'')
390
- str<< (eat_next_if(/^[0-9]$/)or'')
445
+ str= (eat_next_if(/[+\-]/)or'')
446
+ str<< (eat_next_if(?0)or'')
391
447
 
392
- if str[-1] == ?0 and nextchar !=?.
393
- typechar=eat_next_if(/^[BOX]$/i)||'o'
448
+ if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
449
+ typechar=eat_next_if(/[BOXD]/i)||'o'
394
450
  str << typechar
395
451
  interp=:oct
396
- allowed=case typechar
397
- when 'b','B': BINCHARS
398
- when 'x','X': HEXCHARS
399
- when 'o','O': OCTCHARS
452
+ unallowed=case typechar
453
+ when 'b','B'; BINCHARS
454
+ when 'x','X'; HEXCHARS
455
+ when 'o','O'; OCTCHARS
456
+ when 'd','D'; interp=:to_i; DECCHARS
400
457
  else raise :impossible
401
458
  end
402
459
  else
403
460
  interp=:to_i
404
- allowed =DECCHARS
461
+ unallowed =DECCHARS
405
462
  end
406
463
 
407
- addl_dig_seqs= ((!typechar)? 2 : 0) #den 210
464
+ addl_dig_seqs= (typechar)? 0 : 2 #den 210
408
465
  error=nil
409
- @file.each_byte { |b|
410
- if allowed === b or ?_ == b
411
- str << b
412
- else
466
+
467
+ # @file.each_byte { |b|
468
+ # if unallowed === b or ?_ == b
469
+ # str << b
470
+ # else
471
+ str<<til_charset(unallowed)
472
+ b=getc
413
473
  #digits must follow and precede . and e
414
- if ?.==b and addl_dig_seqs==2 and allowed===nextchar
415
- addl_dig_seqs=1
474
+ if ?.==b and addl_dig_seqs==2 and !(unallowed===nextchar.chr)
475
+ #addl_dig_seqs=1
416
476
  str << b
477
+ str<<til_charset(unallowed)
478
+ b=getc
479
+ interp=:to_s
480
+ end
417
481
  #digits must follow and precede . and e
418
- elsif (?e==b or ?E==b) and addl_dig_seqs>=1 and
482
+ if (?e==b or ?E==b) and addl_dig_seqs>=1 and
419
483
  readahead(2)[/^[-+]?[0-9]/]
420
- addl_dig_seqs=0
484
+ #addl_dig_seqs=0
421
485
  str << b
422
486
  str << (eat_next_if(/[+\-]/)or'')
423
- else
424
- back1char
425
- #return(str.send(interp))
426
- break
487
+ str<<til_charset(unallowed)
488
+ b=getc
489
+ interp=:to_s
427
490
  end
428
- #OCTCHARS allowed here to permit constants like this: 01.2
429
- allowed == DECCHARS or allowed == OCTCHARS or error= "floats are always decimal (currently)"
430
- allowed = DECCHARS
431
- interp=:to_s
432
- end
433
- }
491
+ back1char if b
492
+ #return(str.send(interp))
493
+ # break
494
+ # #OCTCHARS allowed here to permit constants like this: 01.2
495
+ # unallowed == DECCHARS or unallowed == OCTCHARS or error= "floats are always decimal (currently)"
496
+ # unallowed = DECCHARS
497
+ # interp=:to_s
498
+ # end
499
+ # }
434
500
 
435
501
  assert(str[/[0-9]/])
436
502
  lexerror NumberToken.new(str.send(interp)), error
437
503
  end
438
504
 
505
+ if (defined? DEBUGGER__ or defined? Debugger)
439
506
  #-----------------------------------
440
507
  def comment(str=nil)
441
508
  #assert str == '#'
442
- str=eat_next_if(?#) or return nil
443
- Process.kill("INT",0) if
444
- readahead(10)==%/breakpoint/ and defined? DEBUGGER__
445
- if false
446
- @file.each_byte {|b|
447
- if b==?\n #leave \n's on input for newline to eat
448
- back1char
449
- else
450
- str << b
451
- end
452
- return IgnoreToken.new(str) if b==?\n or b==?#
453
- }
454
- #eof...
509
+ Process.kill("INT",0) if readahead(11)==%/#breakpoint/
510
+
511
+ IgnoreToken.new(til_charset(/[\r\n]/))
512
+ end
455
513
  else
456
- str<<til_charset(/[\r\n#]/)
457
- eat_next_if ?# and str<<?#
458
- end
459
- return IgnoreToken.new(str)
514
+ #-----------------------------------
515
+ def comment(str=nil)
516
+ IgnoreToken.new(til_charset(/[\r\n]/))
460
517
  end
518
+ end
461
519
 
462
520
  #-----------------------------------
463
521
  def whitespace(ch)
464
522
  assert ch[/^[#{WHSP}]$/o]
465
- oldpos=@file.pos
523
+ oldpos= input_position
466
524
  str=til_charset(/[^#{WHSP}]/o)
467
525
  return WsToken.new(str,oldpos)
468
526
  end
@@ -471,26 +529,29 @@ end
471
529
  INET_NL_REX=/^(\r\n?|\n\r?)/
472
530
  def readnl
473
531
  #compatible with dos/mac style newlines...
532
+
533
+ eof? and return ''
534
+
474
535
  nl=readahead(2)[INET_NL_REX]
475
536
  nl or return nil
476
537
  assert((1..2)===nl.length)
477
538
  @linenum+=1
478
- @file.read nl.length
539
+ read nl.length
479
540
  end
480
541
 
481
542
  #-----------------------------------
482
543
  def newline(ch)
483
- offset=@file.pos
544
+ offset= input_position
484
545
  nl=readnl
485
- @moretokens << FileAndLineToken.new( @filename, @linenum, @file.pos )
546
+ @moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
486
547
  return NewlineToken.new( nl,offset)
487
548
  end
488
549
 
489
550
 
490
551
  #-----------------------------------
491
552
  def getchar_maybe_escape
492
- @file.eof? and lexerror huh,"unterminated dq string"
493
- c=@file.getc
553
+ eof? and raise "unterminated dq string"
554
+ c=getc
494
555
 
495
556
  c == ?\\ and
496
557
  (c = (dquote_esc_seq('\\')[-1] or ?\n))
@@ -498,26 +559,41 @@ end
498
559
  return c
499
560
  end
500
561
 
501
- #-----------------------------------
502
- def RuLexer.delegate_to(obj,*names)
503
- eval names.collect {|name|
504
- "define_method(:#{name}) do|*args|
505
- #{obj}.#{name}(*args)
506
- end
507
- "
508
- }.to_s
562
+ protected
563
+ # delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
564
+ require 'forwardable'
565
+ extend Forwardable
566
+ def_delegators :@file, :readahead,:readback, :read, :eof?
567
+
568
+ def til_charset cs,len=16; @file.read_til_charset cs,len end
569
+ def getc; @file.read1 end
570
+ def getchar; @file.read 1 end
571
+ def back1char; @file.move( -1 )end
572
+ def prevchar; @file.readbehind 1 end
573
+ def nextchar; @file.readahead1 end
574
+ def eat_next_if(ch);
575
+ saw=getchar
576
+ if Integer===ch
577
+ ch==saw[0]
578
+ else
579
+ ch===saw
580
+ end or (back1char; return)
581
+ return saw
509
582
  end
510
583
 
511
- protected
512
- delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:back1char,:readahead,:readback,:til_charset
584
+ #-----------------------------------
585
+ def input_position; @file.pos end
586
+
587
+ #-----------------------------------
588
+ def input_position_set x; @file.pos=x end
513
589
 
514
590
  #-----------------------------------
515
- def RuLexer.save_offsets_in(*funcnames)
591
+ def self.save_offsets_in(*funcnames)
516
592
  eval funcnames.collect{|fn| <<-endeval }.to_s
517
593
  class ::#{self}
518
594
  alias #{fn}__no_offset #{fn} #rename old ver of fn
519
595
  def #{fn}(*args) #create new version
520
- pos=@file.pos
596
+ pos= input_position
521
597
  result=#{fn}__no_offset(*args)
522
598
  assert Token===result
523
599
  result.offset||=pos
@@ -527,6 +603,8 @@ protected
527
603
  endeval
528
604
  end
529
605
 
530
-
606
+ end
531
607
 
532
608
  end
609
+
610
+