rubylexer 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
@@ -1,2 +1,11 @@
1
- require 'rubylexer'
2
- #nothing else (yet)
1
+ require 'rubylexer/0.7.1'
2
+
3
+ class RubyLexer
4
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=0
5
+ DECIMAL_INT_INTERP=:to_i
6
+ ARBITRARY_INT_INTERP=:oct
7
+ AUTO_UNESCAPE_STRINGS=true
8
+ end
9
+
10
+
11
+
@@ -0,0 +1,2 @@
1
+ require 'rubylexer'
2
+ #nothing else (yet)
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -59,8 +59,8 @@ class CharHandler
59
59
  assert !frozen?
60
60
 
61
61
  @table[b]=action
62
- @matcher<<?\\ if CHARSETSPECIALS===b
63
- @matcher<<b
62
+ @matcher << ?\\ if CHARSETSPECIALS===b
63
+ @matcher << b
64
64
  end
65
65
  private :[]=
66
66
 
@@ -1,3 +1,23 @@
1
+ =begin legal crap
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2008 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
1
21
  class RubyLexer
2
22
  module NestedContexts
3
23
  class NestedContext
@@ -18,6 +38,8 @@ module NestedContexts
18
38
  def lhs=*x; end #do nothing
19
39
  end
20
40
 
41
+ #contexts which expect to see commas,
42
+ #(other than assignment lhs, which has no context)
21
43
  class ListContext < NestedContext
22
44
  end
23
45
 
@@ -41,6 +63,12 @@ module NestedContexts
41
63
  end
42
64
  end
43
65
 
66
+ class BeginEndContext < NestedContext
67
+ def initialize(str,linenum)
68
+ super('{','}',linenum)
69
+ end
70
+ end
71
+
44
72
  # class BlockParamListContext < ListContext
45
73
  # def initialize(linenum)
46
74
  # super('|','|',linenum)
@@ -67,7 +95,7 @@ module NestedContexts
67
95
  def starter; '|' end
68
96
  def ender; '|' end
69
97
  end
70
-
98
+
71
99
  class ImplicitContext < ListContext
72
100
  end
73
101
 
@@ -78,6 +106,9 @@ module NestedContexts
78
106
  def lhs; false end
79
107
  end
80
108
 
109
+ class KWParamListContextNoParen < ParamListContextNoParen
110
+ end
111
+
81
112
  class WhenParamListContext < ImplicitContext
82
113
  def initialize(starter,linenum)
83
114
  super(starter,nil,linenum)
@@ -94,16 +125,54 @@ module NestedContexts
94
125
  def initialize(linenum)
95
126
  super(nil,nil,linenum)
96
127
  end
128
+ def see lxr,msg
129
+ case msg
130
+ when :semi; lxr.parsestack.pop
131
+ when :comma,:splat; @multi=true
132
+ end
133
+ end
134
+ def multi_assign?; @multi end
97
135
  end
98
136
 
99
137
  class WantsEndContext < NestedContext
100
138
  def initialize(starter,linenum)
101
139
  super(starter,'end',linenum)
102
140
  end
141
+
142
+ attr_accessor :state
103
143
 
104
144
  def see lxr,msg
105
- msg==:rescue ? lxr.parsestack.push_rescue_sm : super
145
+ msg==:rescue and lxr.parsestack.push_rescue_sm
146
+ end
147
+ end
148
+
149
+ class ClassContext < WantsEndContext
150
+ def see(lxr,msg)
151
+ if msg==:semi and @state!=:semi
152
+ lxr.localvars_stack.push SymbolTable.new
153
+ @state=:semi
154
+ else
155
+ super
156
+ end
157
+ end
158
+ end
159
+
160
+ class DefContext < WantsEndContext
161
+ def initialize(linenum)
162
+ super('def', linenum)
163
+ @in_body=false
164
+ end
165
+
166
+ def see(lxr,msg)
167
+ if msg==:semi and @state!=:semi
168
+ @in_body=true
169
+ @state=:semi
170
+ else
171
+ super
172
+ end
106
173
  end
174
+
175
+ attr :in_body
107
176
  end
108
177
 
109
178
  class StringContext < NestedContext #not used yet
@@ -125,13 +194,19 @@ module NestedContexts
125
194
  end
126
195
 
127
196
 
128
- class RescueSMContext < NestedContext
197
+ class RescueSMContext < ListContext
129
198
  #normal progression: rescue => arrow => then
130
199
  EVENTS=[:rescue,:arrow,:then,:semi,:colon]
131
- LEGAL_SUCCESSORS={nil=> [:rescue], :rescue => [:arrow,:then,:semi,:colon],:arrow => [:then,:semi,:colon],:then => [nil]}
132
- #note on :semi and :colon events: in arrow state (and only then),
200
+ LEGAL_SUCCESSORS={
201
+ nil=> [:rescue],
202
+ :rescue => [:arrow,:then,:semi,:colon],
203
+ :arrow => [:then,:semi,:colon],
204
+ :then => []
205
+ }
206
+ #note on :semi and :colon events:
133
207
  # (unescaped) newline, semicolon, and (unaccompanied) colon
134
- # also trigger the :then event. otherwise, they are ignored.
208
+ # also trigger the :then event. they are ignored if in :then
209
+ # state already.
135
210
  attr :state
136
211
 
137
212
  def initialize linenum
@@ -153,6 +228,7 @@ module NestedContexts
153
228
  msg=:then
154
229
  self.equal? stack.pop or raise 'syntax error: then not expected at this time'
155
230
  #pop self off owning context stack
231
+ when :comma, :splat: return
156
232
  else super
157
233
  end
158
234
  LEGAL_SUCCESSORS[@state].include? msg or raise "rescue syntax error: #{msg} unexpected in #@state"
@@ -161,10 +237,10 @@ module NestedContexts
161
237
 
162
238
  end
163
239
 
164
- class ForSMContext < NestedContext
240
+ class ForSMContext < ImplicitLhsContext
165
241
  #normal progression: for => in
166
242
  EVENTS=[:for,:in]
167
- LEGAL_SUCCESSORS={nil=> :for, :for => :in,:in => nil}
243
+ LEGAL_SUCCESSORS={nil=> [:for], :for => [:in],:in => []}
168
244
  #note on :semi and :colon events: in :in state (and only then),
169
245
  # (unescaped) newline, semicolon, and (unaccompanied) colon
170
246
  # also trigger the :then event. otherwise, they are ignored.
@@ -185,9 +261,10 @@ module NestedContexts
185
261
  when :in: self.equal? stack.pop or raise 'syntax error: in not expected at this time'
186
262
  stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
187
263
  #pop self off owning context stack and push ExpectDoOrNlContext
264
+ when :comma, :splat: return
188
265
  else super
189
266
  end
190
- LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
267
+ LEGAL_SUCCESSORS[@state].include? msg or raise "for syntax error: #{msg} unexpected in #@state"
191
268
  @state=msg
192
269
  end
193
270
  end
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -17,6 +17,8 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
+ #warn "hacking $LOAD_PATH to find latest sequence"
21
+ #$:<<"../sequence/lib"
20
22
 
21
23
 
22
24
  require "assert"
@@ -31,6 +33,7 @@ require 'rubygems'
31
33
  #require 'sequence'
32
34
  require 'sequence/indexed'
33
35
  require 'sequence/file'
36
+ require 'sequence/list'
34
37
  #-----------------------------------
35
38
  assert !defined? ::RubyLexer
36
39
  $RuLexer=Class.new{}
@@ -40,6 +43,9 @@ end
40
43
  $RuLexer=nil
41
44
  #------------------------------------
42
45
  class RubyLexer
46
+ FASTER_STRING_ESCAPES=true
47
+ warn "FASTER_STRING_ESCAPES is off" unless FASTER_STRING_ESCAPES
48
+ AUTO_UNESCAPE_STRINGS=false
43
49
  class RuLexer
44
50
  WHSP=" \t\r\v\f"
45
51
  WHSPLF=WHSP+"\n"
@@ -49,20 +55,22 @@ class RubyLexer
49
55
 
50
56
  PAIRS={ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}
51
57
 
52
- attr_reader :linenum,:last_operative_token
58
+ attr_reader :linenum,:last_operative_token,:original_file,:filename
59
+ attr_accessor :file #hack
53
60
 
54
61
  #-----------------------------------
55
- def initialize(filename, file, line)
62
+ def initialize(filename, file, line, offset_adjust=0)
56
63
  @filename=filename
57
64
 
58
65
  # String===file && file=IOext::FakeFile.new(file)
59
66
  file.binmode if File===file
60
67
  @original_file=file
61
68
  @file=file.to_sequence
69
+ @file.pos=@original_file.pos if @original_file.respond_to? :pos
62
70
  @linenum=line
63
71
  @toptable=nil #descendants must fill this out
72
+ @min_offset_adjust=@offset_adjust=offset_adjust
64
73
  @moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
65
- @last_operative_token=nil
66
74
  @endsets={}
67
75
  end
68
76
 
@@ -95,6 +103,14 @@ class RubyLexer
95
103
  end until tok.is_a? EoiToken
96
104
  end
97
105
 
106
+ #-----------------------------------
107
+ # def offset_adjust; 0 end
108
+
109
+ #-----------------------------------
110
+ # def offset_adjust_set! offset_adjust
111
+ # @offset_adjust=offset_adjust
112
+ # end
113
+
98
114
  include Enumerable
99
115
 
100
116
  private
@@ -121,8 +137,8 @@ private
121
137
 
122
138
  #-----------------------------------
123
139
  def regex(ch=nil)
124
- result=RenderExactlyStringToken.new('/').
125
- append_token double_quote("/")
140
+ result=RenderExactlyStringToken.new('/').append_token str=double_quote("/")
141
+ result.open=result.close="/"
126
142
  result.line=@linenum
127
143
  return result
128
144
  end
@@ -142,17 +158,20 @@ private
142
158
  assert ch=='%'
143
159
  oldpos= input_position
144
160
  eat_next_if(ch) or raise "fancy_quote, no "+ch
161
+ strlex=:double_quote
162
+ open="%"
145
163
 
146
164
  ch=getchar
165
+ open+=ch
147
166
  #ch.tr!('qwQWrx','"["{/`')
148
167
  type=case ch
149
- when 'q' then "'"
168
+ when 'q' then strlex=:single_quote; "'"
150
169
  when 'w' then "[" #word array
151
170
  when 'Q' then '"' #regular string
152
171
  when 'W' then '{' #dquotish word array
153
172
  when 'r' then '/' #regex
154
173
  when 'x' then '`' #exec it
155
- when 's' then '"' #symbol
174
+ when 's' then strlex=:single_quote; "'" #symbol
156
175
  #other letters, nums are illegal here
157
176
  when /^[a-z0-9]$/oi
158
177
  error= "unrecognized %string type: "+ch; '"'
@@ -160,33 +179,191 @@ private
160
179
  result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
161
180
  result.line=@linenum
162
181
  return result
163
- else back1char; '"' #no letter means string too
182
+
183
+ else open.chop!; back1char; '"' #no letter means string too
164
184
  end
165
185
 
186
+ if FASTER_STRING_ESCAPES
187
+ beg= readahead(2)=="\r\n" ? "\r\n" : nextchar.chr
188
+ assert /[\r\n]/===nextchar.chr if beg=="\r\n"
189
+ else
166
190
  beg=nextchar.chr
167
191
  if /^[\r\n]$/===beg then
168
192
  beg=INET_NL_REX
169
193
  end
170
-
171
- result=double_quote(beg, type, (PAIRS[beg] or beg))
194
+ end
195
+ result=send(strlex, beg, type, close=(PAIRS[beg] or beg))
172
196
  case ch
173
- when /^[Wwr]$/;
197
+ when /^[Wwr]$/:
198
+ str=result
174
199
  result=RenderExactlyStringToken.new(type).append_token(result)
200
+ result.open=str.open; result.close=str.close
175
201
  result.line=@linenum
176
- when 's'; result=SymbolToken.new(result.to_s)
202
+ when 's':
203
+ result.open=open+beg
204
+ result.close=close
205
+ result=SymbolToken.new result,nil,"%s"
177
206
  end
207
+ result.open=open+beg
208
+ result.close=close
178
209
  result.offset=oldpos
179
210
  return lexerror(result,error)
180
211
  end
181
212
 
182
213
  #-----------------------------------
183
- #this method is now misnamed, since it handles single quotes as well
184
214
  def double_quote(nester, type=nester, delimiter=nester)
185
- all_quote(nester,type,delimiter)
215
+ result=all_quote(nester,type,delimiter)
216
+ result.open=nester
217
+ result.close=delimiter
218
+ return result
186
219
  end
187
220
 
188
221
  #-----------------------------------
222
+ def single_quote(nester, type=nester, delimiter=nester)
223
+ result=all_quote nester, type, delimiter
224
+ # result.elems.first.gsub! /\\\\/, '\\'
225
+ result.open=result.close="'"
226
+ return result
227
+ end
228
+
229
+ #-----------------------------------
230
+ INTERIOR_REX_CACHE={}
231
+ EVEN_BS_S=/
232
+ ($|
233
+ [^\\c-]|
234
+ ($|[^\\])(c|[CM]-)|
235
+ ($|[^CM])-
236
+ )
237
+ (\\(?:c|[CM]-)?\\)*
238
+ /x
239
+ ILLEGAL_ESCAPED=/#{EVEN_BS_S}(\\([CM][^-]|x[^a-fA-F0-9]))/o #whaddaya do with this?
240
+ ILLEGAL_CRUNCH=/#{EVEN_BS_S}(\#@[^a-zA-Z_]|\#$[^a-zA-Z_0-9\-!@&+`'=~\/\\,.;<>*"$?:;])/o #and this?
189
241
  def all_quote(nester, type, delimiter, bs_handler=nil)
242
+ if FASTER_STRING_ESCAPES
243
+ #string must start with nester
244
+ if nester=="\r\n" #treat dos nl like unix
245
+ nester=delimiter="\n"
246
+ readnl
247
+ else
248
+ eat_next_if(nester[0])
249
+ end or return nil
250
+ special_char= nester.dup
251
+ special_char<< (delimiter) if nester!=delimiter
252
+
253
+ if "'["[type]
254
+ single_quotish=true
255
+ special=/\\./m
256
+ else
257
+ crunch=/#(?=[^{$@])/
258
+ escaped=/\\([^xcCM0-7]|(c|[CM].)([^\\]|(?=\\))|x.[0-9a-fA-F]?|[0-7]{1,3})/m
259
+ special=
260
+ case delimiter
261
+ when '\\': crunch
262
+ when '#': escaped
263
+ else /#{escaped}|#{crunch}/o
264
+ end
265
+ special_char<< maybe_crunch="#"
266
+ end
267
+ normal="[^#{Regexp.quote '\\'+special_char}]"
268
+ interior=INTERIOR_REX_CACHE[special_char]||=/#{normal}*(#{special}+#{normal}*)*/
269
+
270
+ #backslash is just scanned thru, not interpreted
271
+ #... that will change token format
272
+ #, which will make lots of downstream headaches.
273
+
274
+ str=StringToken.new type
275
+ str.bs_handler ||= case type
276
+ when '/' then :regex_esc_seq
277
+ when '{' then :Wquote_esc_seq
278
+ when '"','`',':' then :dquote_esc_seq
279
+ when "'" then :squote_esc_seq
280
+ when "[" then :wquote_esc_seq
281
+ else raise "unknown quote type: #{type}"
282
+ end
283
+
284
+ old_linenum=@linenum
285
+ nestlevel=1
286
+ loop{
287
+ str.append(@file.scan( interior ))
288
+ #scan could stop at any character if at the end of its buffer.
289
+ b=getchar
290
+ case b
291
+ when delimiter
292
+ assert nestlevel>0
293
+ if (nestlevel-=1)==0
294
+
295
+
296
+ case str.elems.last
297
+ #if last str data fragment was empty and
298
+ #followed an inclusion, delete it
299
+ #unless there was an escnl between inclusion and string end
300
+ when ''
301
+ str.elems.size>1 and
302
+ if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
303
+ #do nothing
304
+ else
305
+ str.elems.pop
306
+ end
307
+ when /\r\Z/ #if delim is \n, trailing (literal) \r is chopped
308
+ str.elems.last.chomp! "\r" if delimiter=="\n"
309
+ end
310
+
311
+ str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
312
+
313
+ nlcount=0
314
+ str.elems.each{|frag|
315
+ next unless String===frag
316
+ #dos nls turn into unix nls in string literals
317
+ nlcount+=frag.count("\n")
318
+ frag.gsub!(/\r\n/, "\n")
319
+ }
320
+
321
+ nlcount+=1 if delimiter=="\n"
322
+ str.line=@linenum+=nlcount
323
+ if nlcount>0
324
+ #emit eol marker later if line has changed
325
+ @moretokens << FileAndLineToken.new(
326
+ @filename,@linenum,input_position
327
+ )
328
+ @pending_here_bodies.each{|body|
329
+ body.allow_ooo_offset=true
330
+ } unless delimiter=="\n"
331
+ end
332
+
333
+
334
+ str.open=nester
335
+ str.close=delimiter
336
+ return str
337
+ end
338
+ assert nestlevel>0
339
+ when nester
340
+ #this branch ignored if nester==delimiter
341
+ assert(nester!=delimiter)
342
+ nestlevel+=1
343
+ when nil then raise "nil char from each_byte?" #never happens
344
+ when maybe_crunch
345
+ nc=nextchar.chr
346
+ nc[/^[{@$]$/] and b=ruby_code(nc)
347
+ when "\\"
348
+ back1char
349
+ next
350
+ when "" #eof
351
+ lexerror str, "unterminated #{delimiter}-string at eof"
352
+ break
353
+ end
354
+
355
+ #shouldn't tolerate ILLEGAL_ESCAPED in str (unless single quotish)....
356
+ lexerror str, "illegal escape sequence" if !("['"[type]) and ILLEGAL_ESCAPED===b
357
+
358
+ str.append b
359
+ }
360
+
361
+ assert eof?
362
+ str.line=@linenum
363
+ str
364
+ else
365
+
366
+
190
367
  endset="\r\n\\\\"
191
368
 
192
369
  #string must start with nester
@@ -199,7 +376,8 @@ private
199
376
  end or return nil
200
377
 
201
378
  bs_handler ||= case type
202
- when '/','{' then :regex_esc_seq
379
+ when '/' then :regex_esc_seq
380
+ when '{' then :Wquote_esc_seq
203
381
  when '"','`',':' then :dquote_esc_seq
204
382
  when "'" then :squote_esc_seq
205
383
  when "[" then :wquote_esc_seq
@@ -212,6 +390,7 @@ private
212
390
  endset<<maybe_crunch="#" unless "'["[type]
213
391
  endset=
214
392
  @endsets[endset] ||= /[#{endset}]/
393
+ false&& last_escnl_elem_idx=nil
215
394
  loop{
216
395
  str.append(til_charset( endset ))
217
396
  b=getchar
@@ -221,14 +400,34 @@ private
221
400
  end
222
401
  case b
223
402
  when delimiter
403
+ assert nestlevel>0
224
404
  if (nestlevel-=1)==0
405
+
406
+ #if last str data fragment was empty and
407
+ #followed an inclusion, delete it
408
+ #unless there was an escnl between inclusion and string end
409
+ if str.elems.last=='' and str.elems.size>1
410
+ if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
411
+ #do nothing
412
+ else
413
+ str.elems.pop
414
+ end
415
+ end
416
+
225
417
  str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
226
- #emit eol marker later if line has changed
227
418
  str.line=@linenum
228
- @linenum != old_linenum and @moretokens <<
229
- FileAndLineToken.new(@filename,@linenum, input_position)
419
+ if @linenum != old_linenum
420
+ #emit eol marker later if line has changed
421
+ @moretokens << FileAndLineToken.new(
422
+ @filename,@linenum,input_position
423
+ )
424
+ @pending_here_bodies.each{|body|
425
+ body.allow_ooo_offset=true
426
+ } unless nester==INET_NL_REX
427
+ end
230
428
  return str
231
429
  end
430
+ assert nestlevel>0
232
431
  when nester
233
432
  #this branch ignored if nester==delimiter
234
433
  assert(nester!=delimiter)
@@ -248,11 +447,13 @@ private
248
447
  break
249
448
  end
250
449
  str.append b
450
+
251
451
  }
252
452
 
253
453
  assert eof?
254
454
  str.line=@linenum
255
455
  str
456
+ end
256
457
  end
257
458
 
258
459
  #-----------------------------------
@@ -268,26 +469,17 @@ private
268
469
  when '#' then '#'
269
470
  when /^[#{ESCAPECHRS}]$/o
270
471
  k.tr(ESCAPECHRS,ESCAPESEQS)
271
- =begin not needed anymore
272
- when "a" then "\a"
273
- when "b" then "\b"
274
- when "e" then "\e"
275
- when "f" then "\f"
276
- when "n" then "\n"
277
- when "r" then "\r"
278
- when "s" then "\ "
279
- when "t" then "\t"
280
- when "v" then "\v"
281
- =end
282
472
  when "M"
283
473
  eat_next_if(?-) or raise 'bad \\M sequence'
284
474
  (getchar_maybe_escape | 0x80).chr
285
475
 
286
476
  when "C"
287
477
  eat_next_if(?-) or raise 'bad \\C sequence'
478
+ nextchar==?? and getchar and return "\177" #wtf?
288
479
  (getchar_maybe_escape & 0x9F).chr
289
480
 
290
481
  when "c"
482
+ nextchar==?? and getchar and return "\177" #wtf?
291
483
  (getchar_maybe_escape & 0x9F).chr
292
484
 
293
485
  when /^[0-7]$/
@@ -306,31 +498,33 @@ private
306
498
  str.hex.chr
307
499
 
308
500
  else
309
- '\\'+k
501
+ k
310
502
  end
311
503
  end
312
504
 
313
505
  #-----------------------------------
314
506
  def regex_esc_seq(ch,nester,delimiter)
315
507
  assert ch == '\\'
316
- c=getchar
317
- return case c
318
- when "\n"
319
- @linenum+=1
320
- ''#ch+c
321
- when nester,delimiter ,"/"
322
- c
323
- #when "c"
324
- # ch + c + getchar
325
- #when "M","C"
326
- # eat_next_if(?-) or
327
- # lexerror "illegal \\#{c}- esc sequence"
328
- # ch + c + '-' + (eat_next_if(/[^\\]/)or'')
329
- # #if this \M- or \C- sequence is continued by
330
- # #another backslash, we'll just leave the
331
- # #backslash on the input, to be read by the next pass
332
- else
333
- ch+c
508
+ ch=getchar
509
+ if ch=="\n"
510
+ @linenum+=1
511
+ return ''
512
+ end
513
+ '\\'+ch
514
+ end
515
+
516
+ #-----------------------------------
517
+ def Wquote_esc_seq(ch,nester,delimiter)
518
+ assert ch == '\\'
519
+ case ch=getchar
520
+ when "\n": @linenum+=1; ch
521
+ when nester,delimiter: ch
522
+ when /[\s\v\\]/: ch
523
+ else
524
+ back1char
525
+ result=dquote_esc_seq('\\',nester,delimiter)
526
+ #/\s|\v/===result and result="\\"+result
527
+ result
334
528
  end
335
529
  end
336
530
 
@@ -340,16 +534,16 @@ private
340
534
 
341
535
  #get the escaped character
342
536
  escchar=getchar
343
- return (case escchar
344
- #all \ sequences but \delimiter, \nester
345
- #are passed thru unchanged; actual
537
+ case escchar
538
+ #all \ sequences
539
+ #are unescaped; actual
346
540
  #newlines are counted but not changed
347
- when delimiter,nester
348
- ''
349
- when "\n"
350
- @linenum+=1; "\\"
351
- else '\\'
352
- end+escchar)
541
+ when delimiter,nester,'\\': escchar
542
+ # when delimiter,nester: escchar
543
+ when "\n": @linenum+=1; escchar
544
+ when /[\s\v]/: escchar
545
+ else "\\"+escchar
546
+ end
353
547
  end
354
548
 
355
549
  #-----------------------------------
@@ -358,52 +552,173 @@ private
358
552
 
359
553
  #get the escaped character
360
554
  escchar=getchar
361
- return (case escchar
362
- #all \ sequences but \delimiter, \nester and \\
363
- #are passed thru unchanged; actual
555
+ case escchar
556
+ #all \ sequences
557
+ #are unescaped; actual
558
+ #newlines are counted but not changed
559
+ when delimiter,nester,'\\': escchar
560
+ # when delimiter,nester: escchar
561
+ when "\n": @linenum+=1; "\\"+escchar
562
+ else "\\"+escchar
563
+ end
564
+ end
565
+
566
+ #-----------------------------------
567
+ def squote_heredoc_esc_seq(ch,nester,delimiter)
568
+ assert(ch=='\\')
569
+
570
+ #get the escaped character
571
+ escchar=getchar
572
+ case escchar
573
+ #all \ sequences
574
+ #are unescaped; actual
364
575
  #newlines are counted but not changed
365
- when delimiter,nester,'\\'
366
- ''
367
- when "\n"
368
- @linenum+=1; "\\"
369
- else '\\'
370
- end+escchar)
576
+ when delimiter,nester: escchar
577
+ # when delimiter,nester: escchar
578
+ when "\n": @linenum+=1; "\\"+escchar
579
+ else "\\"+escchar
580
+ end
581
+ end
582
+
583
+ =begin
584
+ #-----------------------------------
585
+ def squote_esc_seq(ch,nester,delimiter)
586
+ assert(ch=='\\')
587
+
588
+ #get the escaped character
589
+ escchar=getchar
590
+ escchar=="\n" and @linenum+=1
591
+ escchar="\\"+escchar unless escchar[/['\\]/]
592
+ return escchar
371
593
  end
594
+ =end
595
+ # alias squote_esc_seq wquote_esc_seq
372
596
 
597
+ module RecursiveRubyLexer
598
+ def initial_nonblock_levels
599
+ @localvars_stack.size==1 ? 2 : 1
600
+ end
601
+ end
602
+
603
+ def initial_nonblock_levels; 1 end
604
+ def first_current_level
605
+ result=@localvars_stack.last.__locals_lists.size-initial_nonblock_levels
606
+ result=[initial_nonblock_levels,result].max
607
+ result
608
+ end
609
+
610
+ def merge_levels levels, nil_empty_class
611
+ case (levels.size rescue 0)
612
+ when 0: {} unless nil_empty_class
613
+ when 1: levels.first.dup
614
+ else levels.inject{|a,b| a.merge b}
615
+ end
616
+ end
617
+
618
+ def decompose_lvars(nil_empty_class=false)
619
+ levels=
620
+ @localvars_stack.last.__locals_lists
621
+ nonblocky=merge_levels levels[0...initial_nonblock_levels], nil_empty_class
622
+ blocky=merge_levels levels[initial_nonblock_levels...first_current_level], nil_empty_class
623
+ current=merge_levels levels[first_current_level..-1], nil_empty_class
624
+ return nonblocky,blocky,current
625
+ end
626
+
627
+ def new_lvar_type
628
+ size=@localvars_stack.last.__locals_lists.size
629
+ return :local if size<=initial_nonblock_levels
630
+ return :block if size<first_current_level
631
+ return :current
632
+ end
633
+
634
+ def lvar_type(name)
635
+ nonblocky,blocky,current=decompose_lvars
636
+ nonblocky[name] and return :local
637
+ blocky[name] and return :block
638
+ current[name] and return :current
639
+ return new_lvar_type
640
+ end
641
+
642
+ def assign_lvar_type!(vartok)
643
+ vartok.respond_to? :lvar_type= and
644
+ vartok.lvar_type=lvar_type(vartok.ident)
645
+ return vartok
646
+ end
647
+
373
648
  #-----------------------------------
374
649
  def ruby_code(ch='{')
375
650
  assert ch[/^[{(@$]$/]
376
651
  klass= RubyLexer===self ? self.class : RubyLexer
377
- rl=klass.new(@filename,@file,@linenum)
652
+ rl=klass.new(@filename,@file,@linenum,offset_adjust())
653
+ rl.extend RecursiveRubyLexer
654
+ # rl.offset_adjust_set! offset_adjust()
655
+ assert offset_adjust()==rl.offset_adjust()
378
656
 
379
657
  #pass current local vars into new parser
380
- localvars.names.each{|varname|
658
+ #must pass the lists of nonblock, parentblock and currentblock vars separately
659
+ #then a table increment after each
660
+ nonblocky,blocky,current=decompose_lvars(true)
661
+ nonblocky.keys.each{|varname|
381
662
  rl.localvars[varname]=true
382
663
  }
383
- rl.localvars.start_block
664
+ rl.localvars.start_block
665
+ #incremental table, tells us what :local vars are defined in the str inclusion
666
+
667
+ if blocky
668
+ rl.localvars.start_block
669
+ blocky.keys.each{|varname|
670
+ rl.localvars[varname]=true
671
+ }
672
+ rl.localvars.start_block
673
+ #incremental table, tells us what :block vars are defined in the str inclusion
674
+ end
675
+
676
+ if current
677
+ rl.localvars.start_block
678
+ current.keys.each{|varname|
679
+ rl.localvars[varname]=true
680
+ }
681
+ rl.localvars.start_block
682
+ #incremental table, tells us what :current vars are defined in the str inclusion
683
+ end
684
+
685
+ rl.pending_here_bodies=@pending_here_bodies
384
686
 
385
687
  case ch
386
688
  when '@'
387
689
  tokens=[rl.at_identifier]
388
690
  when '$'
389
691
  tokens=[rl.dollar_identifier]
390
- when '{','('
692
+ when '{'#,'('
391
693
  tokens=[]
392
694
  loop {
393
695
  tok=rl.get1token
394
- EoiToken===tok and lexerror tok,"unterminated string inclusion"
395
696
  tokens << tok
396
- break if tok===PAIRS[ch] and rl.no_more? and rl.balanced_braces?
697
+ if EoiToken===tok
698
+ lexerror tok,"unterminated string inclusion"
699
+ break
700
+ end
701
+ if tok==='}'
702
+ if ErrorToken===tok #mismatched?
703
+ parsestack[1..-1].reverse_each{|ctx|
704
+ tok.error<< "\nno end found for #{ctx.class}"
705
+ }
706
+ break
707
+ end
708
+ break if rl.no_more? and rl.balanced_braces?
709
+ end
397
710
  }
398
711
  else
399
712
  raise 'hell'
400
713
  end
401
714
 
715
+ =begin
402
716
  if @linenum != rl.linenum
403
717
  last=tokens.pop
404
718
  fal=FileAndLineToken.new(@filename,@linenum, last.offset)
405
719
  tokens.push fal,last
406
720
  end
721
+ =end
407
722
 
408
723
  #need to verify that rl's @moretokens, @incomplete_here_tokens are empty
409
724
  rl.incomplete_here_tokens.empty? or
@@ -411,6 +726,13 @@ private
411
726
  rl.no_more? or
412
727
  raise 'uh-oh, ruby tokens were lexed past end of ruby code'
413
728
 
729
+ #assert offset_adjust()==rl.offset_adjust() #|| rl.offset_adjust().zero?
730
+ @offset_adjust=rl.offset_adjust
731
+
732
+ #input_position_set rl.input_position_raw
733
+ @file=rl.file
734
+ # @pending_here_bodies=rl.pending_here_bodies
735
+
414
736
  #local vars defined in inclusion get propagated to outer parser
415
737
  newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
416
738
  newvars.each{|newvar| localvars[newvar]=true }
@@ -431,36 +753,53 @@ private
431
753
  # OCTCHARS=?0..?7
432
754
  # DECCHARS=?0..?9
433
755
  # HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
434
- BINCHARS=/[^01_]/
435
- OCTCHARS=/[^0-7_]/
436
- DECCHARS=/[^0-9_]/
437
- HEXCHARS=/[^0-9a-f_]/i
756
+ BINCHARS=/[01_]+/
757
+ OCTCHARS=/[0-7_]+/
758
+ allowed=/[0-9_]/
759
+ DECCHARS=/^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
760
+ HEXCHARS=/[0-9a-f_]+/i
761
+ DECIMAL_INT_INTERP=:to_s
762
+ ARBITRARY_INT_INTERP=:to_s
763
+ NUMREXCACHE={}
438
764
  #0-9
439
765
  #-----------------------------------
440
766
  def number(str)
441
767
 
442
768
  return nil unless /^[0-9+\-]$/===str
443
769
 
444
- interp=:to_i
770
+ interp=DECIMAL_INT_INTERP
445
771
  str= (eat_next_if(/[+\-]/)or'')
446
772
  str<< (eat_next_if(?0)or'')
447
773
 
448
- if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
449
- typechar=eat_next_if(/[BOXD]/i)||'o'
450
- str << typechar
451
- interp=:oct
452
- unallowed=case typechar
453
- when 'b','B'; BINCHARS
454
- when 'x','X'; HEXCHARS
455
- when 'o','O'; OCTCHARS
456
- when 'd','D'; interp=:to_i; DECCHARS
457
- else raise :impossible
458
- end
774
+ if str[-1] == ?0 and !eof?
775
+ if nextchar.chr[/[bodx]/i]
776
+ typechar=eat_next_if(/[bodx]/i)
777
+ str << typechar
778
+ interp=ARBITRARY_INT_INTERP
779
+ allowed=case typechar
780
+ when 'b','B'; BINCHARS
781
+ when 'x','X'; HEXCHARS
782
+ when 'o','O'; OCTCHARS
783
+ when 'd','D'; DECCHARS
784
+ else raise :impossible
785
+ end
786
+ elsif /[.e]/i===nextchar.chr
787
+ interp=ARBITRARY_INT_INTERP
788
+ allowed=DECCHARS
789
+ else
790
+ interp=ARBITRARY_INT_INTERP
791
+ allowed=OCTCHARS
792
+ end
459
793
  else
460
- interp=:to_i
461
- unallowed =DECCHARS
794
+ interp=DECIMAL_INT_INTERP
795
+ allowed =DECCHARS
462
796
  end
463
797
 
798
+ #allowed = NUMREXCACHE[allowed] ||= /^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
799
+ str<<(@file.scan(allowed)||'')
800
+ interp=:to_s if $1 or $2
801
+ return NumberToken.new(str.send(interp))
802
+
464
803
  addl_dig_seqs= (typechar)? 0 : 2 #den 210
465
804
  error=nil
466
805
 
@@ -528,11 +867,11 @@ end
528
867
  #-----------------------------------
529
868
  INET_NL_REX=/^(\r\n?|\n\r?)/
530
869
  def readnl
531
- #compatible with dos/mac style newlines...
870
+ #compatible with dos style newlines...
532
871
 
533
872
  eof? and return ''
534
873
 
535
- nl=readahead(2)[INET_NL_REX]
874
+ nl=readahead(2)[/\A\r?\n/]
536
875
  nl or return nil
537
876
  assert((1..2)===nl.length)
538
877
  @linenum+=1
@@ -542,7 +881,8 @@ end
542
881
  #-----------------------------------
543
882
  def newline(ch)
544
883
  offset= input_position
545
- nl=readnl
884
+ nl=read 1
885
+ @linenum+=1
546
886
  @moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
547
887
  return NewlineToken.new( nl,offset)
548
888
  end
@@ -563,7 +903,7 @@ protected
563
903
  # delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
564
904
  require 'forwardable'
565
905
  extend Forwardable
566
- def_delegators :@file, :readahead,:readback, :read, :eof?
906
+ def_delegators :@file, :readahead, :readback, :read, :eof?
567
907
 
568
908
  def til_charset cs,len=16; @file.read_til_charset cs,len end
569
909
  def getc; @file.read1 end
@@ -571,14 +911,28 @@ protected
571
911
  def back1char; @file.move( -1 )end
572
912
  def prevchar; @file.readbehind 1 end
573
913
  def nextchar; @file.readahead1 end
574
- def eat_next_if(ch);
575
- saw=getchar
914
+
915
+ #-----------------------------------
916
+ def eat_next_if(ch)
917
+ saw=getc or return
576
918
  if Integer===ch
577
- ch==saw[0]
919
+ ch==saw
578
920
  else
579
- ch===saw
921
+ ch===saw.chr
580
922
  end or (back1char; return)
581
- return saw
923
+ return saw.chr
924
+ end
925
+
926
+ #-----------------------------------
927
+ def eat_if(pat,count)
928
+ oldpos=@file.pos
929
+ saw=read count
930
+ if pat===saw
931
+ return saw
932
+ else
933
+ @file.pos=oldpos
934
+ return nil
935
+ end
582
936
  end
583
937
 
584
938
  #-----------------------------------