rubylexer 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
@@ -1,2 +1,11 @@
1
- require 'rubylexer'
2
- #nothing else (yet)
1
+ require 'rubylexer/0.7.1'
2
+
3
+ class RubyLexer
4
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=0
5
+ DECIMAL_INT_INTERP=:to_i
6
+ ARBITRARY_INT_INTERP=:oct
7
+ AUTO_UNESCAPE_STRINGS=true
8
+ end
9
+
10
+
11
+
@@ -0,0 +1,2 @@
1
+ require 'rubylexer'
2
+ #nothing else (yet)
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -59,8 +59,8 @@ class CharHandler
59
59
  assert !frozen?
60
60
 
61
61
  @table[b]=action
62
- @matcher<<?\\ if CHARSETSPECIALS===b
63
- @matcher<<b
62
+ @matcher << ?\\ if CHARSETSPECIALS===b
63
+ @matcher << b
64
64
  end
65
65
  private :[]=
66
66
 
@@ -1,3 +1,23 @@
1
+ =begin legal crap
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2008 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
1
21
  class RubyLexer
2
22
  module NestedContexts
3
23
  class NestedContext
@@ -18,6 +38,8 @@ module NestedContexts
18
38
  def lhs=*x; end #do nothing
19
39
  end
20
40
 
41
+ #contexts which expect to see commas,
42
+ #(other than assignment lhs, which has no context)
21
43
  class ListContext < NestedContext
22
44
  end
23
45
 
@@ -41,6 +63,12 @@ module NestedContexts
41
63
  end
42
64
  end
43
65
 
66
+ class BeginEndContext < NestedContext
67
+ def initialize(str,linenum)
68
+ super('{','}',linenum)
69
+ end
70
+ end
71
+
44
72
  # class BlockParamListContext < ListContext
45
73
  # def initialize(linenum)
46
74
  # super('|','|',linenum)
@@ -67,7 +95,7 @@ module NestedContexts
67
95
  def starter; '|' end
68
96
  def ender; '|' end
69
97
  end
70
-
98
+
71
99
  class ImplicitContext < ListContext
72
100
  end
73
101
 
@@ -78,6 +106,9 @@ module NestedContexts
78
106
  def lhs; false end
79
107
  end
80
108
 
109
+ class KWParamListContextNoParen < ParamListContextNoParen
110
+ end
111
+
81
112
  class WhenParamListContext < ImplicitContext
82
113
  def initialize(starter,linenum)
83
114
  super(starter,nil,linenum)
@@ -94,16 +125,54 @@ module NestedContexts
94
125
  def initialize(linenum)
95
126
  super(nil,nil,linenum)
96
127
  end
128
+ def see lxr,msg
129
+ case msg
130
+ when :semi; lxr.parsestack.pop
131
+ when :comma,:splat; @multi=true
132
+ end
133
+ end
134
+ def multi_assign?; @multi end
97
135
  end
98
136
 
99
137
  class WantsEndContext < NestedContext
100
138
  def initialize(starter,linenum)
101
139
  super(starter,'end',linenum)
102
140
  end
141
+
142
+ attr_accessor :state
103
143
 
104
144
  def see lxr,msg
105
- msg==:rescue ? lxr.parsestack.push_rescue_sm : super
145
+ msg==:rescue and lxr.parsestack.push_rescue_sm
146
+ end
147
+ end
148
+
149
+ class ClassContext < WantsEndContext
150
+ def see(lxr,msg)
151
+ if msg==:semi and @state!=:semi
152
+ lxr.localvars_stack.push SymbolTable.new
153
+ @state=:semi
154
+ else
155
+ super
156
+ end
157
+ end
158
+ end
159
+
160
+ class DefContext < WantsEndContext
161
+ def initialize(linenum)
162
+ super('def', linenum)
163
+ @in_body=false
164
+ end
165
+
166
+ def see(lxr,msg)
167
+ if msg==:semi and @state!=:semi
168
+ @in_body=true
169
+ @state=:semi
170
+ else
171
+ super
172
+ end
106
173
  end
174
+
175
+ attr :in_body
107
176
  end
108
177
 
109
178
  class StringContext < NestedContext #not used yet
@@ -125,13 +194,19 @@ module NestedContexts
125
194
  end
126
195
 
127
196
 
128
- class RescueSMContext < NestedContext
197
+ class RescueSMContext < ListContext
129
198
  #normal progression: rescue => arrow => then
130
199
  EVENTS=[:rescue,:arrow,:then,:semi,:colon]
131
- LEGAL_SUCCESSORS={nil=> [:rescue], :rescue => [:arrow,:then,:semi,:colon],:arrow => [:then,:semi,:colon],:then => [nil]}
132
- #note on :semi and :colon events: in arrow state (and only then),
200
+ LEGAL_SUCCESSORS={
201
+ nil=> [:rescue],
202
+ :rescue => [:arrow,:then,:semi,:colon],
203
+ :arrow => [:then,:semi,:colon],
204
+ :then => []
205
+ }
206
+ #note on :semi and :colon events:
133
207
  # (unescaped) newline, semicolon, and (unaccompanied) colon
134
- # also trigger the :then event. otherwise, they are ignored.
208
+ # also trigger the :then event. they are ignored if in :then
209
+ # state already.
135
210
  attr :state
136
211
 
137
212
  def initialize linenum
@@ -153,6 +228,7 @@ module NestedContexts
153
228
  msg=:then
154
229
  self.equal? stack.pop or raise 'syntax error: then not expected at this time'
155
230
  #pop self off owning context stack
231
+ when :comma, :splat: return
156
232
  else super
157
233
  end
158
234
  LEGAL_SUCCESSORS[@state].include? msg or raise "rescue syntax error: #{msg} unexpected in #@state"
@@ -161,10 +237,10 @@ module NestedContexts
161
237
 
162
238
  end
163
239
 
164
- class ForSMContext < NestedContext
240
+ class ForSMContext < ImplicitLhsContext
165
241
  #normal progression: for => in
166
242
  EVENTS=[:for,:in]
167
- LEGAL_SUCCESSORS={nil=> :for, :for => :in,:in => nil}
243
+ LEGAL_SUCCESSORS={nil=> [:for], :for => [:in],:in => []}
168
244
  #note on :semi and :colon events: in :in state (and only then),
169
245
  # (unescaped) newline, semicolon, and (unaccompanied) colon
170
246
  # also trigger the :then event. otherwise, they are ignored.
@@ -185,9 +261,10 @@ module NestedContexts
185
261
  when :in: self.equal? stack.pop or raise 'syntax error: in not expected at this time'
186
262
  stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
187
263
  #pop self off owning context stack and push ExpectDoOrNlContext
264
+ when :comma, :splat: return
188
265
  else super
189
266
  end
190
- LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
267
+ LEGAL_SUCCESSORS[@state].include? msg or raise "for syntax error: #{msg} unexpected in #@state"
191
268
  @state=msg
192
269
  end
193
270
  end
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -17,6 +17,8 @@
17
17
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
  =end
19
19
 
20
+ #warn "hacking $LOAD_PATH to find latest sequence"
21
+ #$:<<"../sequence/lib"
20
22
 
21
23
 
22
24
  require "assert"
@@ -31,6 +33,7 @@ require 'rubygems'
31
33
  #require 'sequence'
32
34
  require 'sequence/indexed'
33
35
  require 'sequence/file'
36
+ require 'sequence/list'
34
37
  #-----------------------------------
35
38
  assert !defined? ::RubyLexer
36
39
  $RuLexer=Class.new{}
@@ -40,6 +43,9 @@ end
40
43
  $RuLexer=nil
41
44
  #------------------------------------
42
45
  class RubyLexer
46
+ FASTER_STRING_ESCAPES=true
47
+ warn "FASTER_STRING_ESCAPES is off" unless FASTER_STRING_ESCAPES
48
+ AUTO_UNESCAPE_STRINGS=false
43
49
  class RuLexer
44
50
  WHSP=" \t\r\v\f"
45
51
  WHSPLF=WHSP+"\n"
@@ -49,20 +55,22 @@ class RubyLexer
49
55
 
50
56
  PAIRS={ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}
51
57
 
52
- attr_reader :linenum,:last_operative_token
58
+ attr_reader :linenum,:last_operative_token,:original_file,:filename
59
+ attr_accessor :file #hack
53
60
 
54
61
  #-----------------------------------
55
- def initialize(filename, file, line)
62
+ def initialize(filename, file, line, offset_adjust=0)
56
63
  @filename=filename
57
64
 
58
65
  # String===file && file=IOext::FakeFile.new(file)
59
66
  file.binmode if File===file
60
67
  @original_file=file
61
68
  @file=file.to_sequence
69
+ @file.pos=@original_file.pos if @original_file.respond_to? :pos
62
70
  @linenum=line
63
71
  @toptable=nil #descendants must fill this out
72
+ @min_offset_adjust=@offset_adjust=offset_adjust
64
73
  @moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
65
- @last_operative_token=nil
66
74
  @endsets={}
67
75
  end
68
76
 
@@ -95,6 +103,14 @@ class RubyLexer
95
103
  end until tok.is_a? EoiToken
96
104
  end
97
105
 
106
+ #-----------------------------------
107
+ # def offset_adjust; 0 end
108
+
109
+ #-----------------------------------
110
+ # def offset_adjust_set! offset_adjust
111
+ # @offset_adjust=offset_adjust
112
+ # end
113
+
98
114
  include Enumerable
99
115
 
100
116
  private
@@ -121,8 +137,8 @@ private
121
137
 
122
138
  #-----------------------------------
123
139
  def regex(ch=nil)
124
- result=RenderExactlyStringToken.new('/').
125
- append_token double_quote("/")
140
+ result=RenderExactlyStringToken.new('/').append_token str=double_quote("/")
141
+ result.open=result.close="/"
126
142
  result.line=@linenum
127
143
  return result
128
144
  end
@@ -142,17 +158,20 @@ private
142
158
  assert ch=='%'
143
159
  oldpos= input_position
144
160
  eat_next_if(ch) or raise "fancy_quote, no "+ch
161
+ strlex=:double_quote
162
+ open="%"
145
163
 
146
164
  ch=getchar
165
+ open+=ch
147
166
  #ch.tr!('qwQWrx','"["{/`')
148
167
  type=case ch
149
- when 'q' then "'"
168
+ when 'q' then strlex=:single_quote; "'"
150
169
  when 'w' then "[" #word array
151
170
  when 'Q' then '"' #regular string
152
171
  when 'W' then '{' #dquotish word array
153
172
  when 'r' then '/' #regex
154
173
  when 'x' then '`' #exec it
155
- when 's' then '"' #symbol
174
+ when 's' then strlex=:single_quote; "'" #symbol
156
175
  #other letters, nums are illegal here
157
176
  when /^[a-z0-9]$/oi
158
177
  error= "unrecognized %string type: "+ch; '"'
@@ -160,33 +179,191 @@ private
160
179
  result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
161
180
  result.line=@linenum
162
181
  return result
163
- else back1char; '"' #no letter means string too
182
+
183
+ else open.chop!; back1char; '"' #no letter means string too
164
184
  end
165
185
 
186
+ if FASTER_STRING_ESCAPES
187
+ beg= readahead(2)=="\r\n" ? "\r\n" : nextchar.chr
188
+ assert /[\r\n]/===nextchar.chr if beg=="\r\n"
189
+ else
166
190
  beg=nextchar.chr
167
191
  if /^[\r\n]$/===beg then
168
192
  beg=INET_NL_REX
169
193
  end
170
-
171
- result=double_quote(beg, type, (PAIRS[beg] or beg))
194
+ end
195
+ result=send(strlex, beg, type, close=(PAIRS[beg] or beg))
172
196
  case ch
173
- when /^[Wwr]$/;
197
+ when /^[Wwr]$/:
198
+ str=result
174
199
  result=RenderExactlyStringToken.new(type).append_token(result)
200
+ result.open=str.open; result.close=str.close
175
201
  result.line=@linenum
176
- when 's'; result=SymbolToken.new(result.to_s)
202
+ when 's':
203
+ result.open=open+beg
204
+ result.close=close
205
+ result=SymbolToken.new result,nil,"%s"
177
206
  end
207
+ result.open=open+beg
208
+ result.close=close
178
209
  result.offset=oldpos
179
210
  return lexerror(result,error)
180
211
  end
181
212
 
182
213
  #-----------------------------------
183
- #this method is now misnamed, since it handles single quotes as well
184
214
  def double_quote(nester, type=nester, delimiter=nester)
185
- all_quote(nester,type,delimiter)
215
+ result=all_quote(nester,type,delimiter)
216
+ result.open=nester
217
+ result.close=delimiter
218
+ return result
186
219
  end
187
220
 
188
221
  #-----------------------------------
222
+ def single_quote(nester, type=nester, delimiter=nester)
223
+ result=all_quote nester, type, delimiter
224
+ # result.elems.first.gsub! /\\\\/, '\\'
225
+ result.open=result.close="'"
226
+ return result
227
+ end
228
+
229
+ #-----------------------------------
230
+ INTERIOR_REX_CACHE={}
231
+ EVEN_BS_S=/
232
+ ($|
233
+ [^\\c-]|
234
+ ($|[^\\])(c|[CM]-)|
235
+ ($|[^CM])-
236
+ )
237
+ (\\(?:c|[CM]-)?\\)*
238
+ /x
239
+ ILLEGAL_ESCAPED=/#{EVEN_BS_S}(\\([CM][^-]|x[^a-fA-F0-9]))/o #whaddaya do with this?
240
+ ILLEGAL_CRUNCH=/#{EVEN_BS_S}(\#@[^a-zA-Z_]|\#$[^a-zA-Z_0-9\-!@&+`'=~\/\\,.;<>*"$?:;])/o #and this?
189
241
  def all_quote(nester, type, delimiter, bs_handler=nil)
242
+ if FASTER_STRING_ESCAPES
243
+ #string must start with nester
244
+ if nester=="\r\n" #treat dos nl like unix
245
+ nester=delimiter="\n"
246
+ readnl
247
+ else
248
+ eat_next_if(nester[0])
249
+ end or return nil
250
+ special_char= nester.dup
251
+ special_char<< (delimiter) if nester!=delimiter
252
+
253
+ if "'["[type]
254
+ single_quotish=true
255
+ special=/\\./m
256
+ else
257
+ crunch=/#(?=[^{$@])/
258
+ escaped=/\\([^xcCM0-7]|(c|[CM].)([^\\]|(?=\\))|x.[0-9a-fA-F]?|[0-7]{1,3})/m
259
+ special=
260
+ case delimiter
261
+ when '\\': crunch
262
+ when '#': escaped
263
+ else /#{escaped}|#{crunch}/o
264
+ end
265
+ special_char<< maybe_crunch="#"
266
+ end
267
+ normal="[^#{Regexp.quote '\\'+special_char}]"
268
+ interior=INTERIOR_REX_CACHE[special_char]||=/#{normal}*(#{special}+#{normal}*)*/
269
+
270
+ #backslash is just scanned thru, not interpreted
271
+ #... that will change token format
272
+ #, which will make lots of downstream headaches.
273
+
274
+ str=StringToken.new type
275
+ str.bs_handler ||= case type
276
+ when '/' then :regex_esc_seq
277
+ when '{' then :Wquote_esc_seq
278
+ when '"','`',':' then :dquote_esc_seq
279
+ when "'" then :squote_esc_seq
280
+ when "[" then :wquote_esc_seq
281
+ else raise "unknown quote type: #{type}"
282
+ end
283
+
284
+ old_linenum=@linenum
285
+ nestlevel=1
286
+ loop{
287
+ str.append(@file.scan( interior ))
288
+ #scan could stop at any character if at the end of its buffer.
289
+ b=getchar
290
+ case b
291
+ when delimiter
292
+ assert nestlevel>0
293
+ if (nestlevel-=1)==0
294
+
295
+
296
+ case str.elems.last
297
+ #if last str data fragment was empty and
298
+ #followed an inclusion, delete it
299
+ #unless there was an escnl between inclusion and string end
300
+ when ''
301
+ str.elems.size>1 and
302
+ if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
303
+ #do nothing
304
+ else
305
+ str.elems.pop
306
+ end
307
+ when /\r\Z/ #if delim is \n, trailing (literal) \r is chopped
308
+ str.elems.last.chomp! "\r" if delimiter=="\n"
309
+ end
310
+
311
+ str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
312
+
313
+ nlcount=0
314
+ str.elems.each{|frag|
315
+ next unless String===frag
316
+ #dos nls turn into unix nls in string literals
317
+ nlcount+=frag.count("\n")
318
+ frag.gsub!(/\r\n/, "\n")
319
+ }
320
+
321
+ nlcount+=1 if delimiter=="\n"
322
+ str.line=@linenum+=nlcount
323
+ if nlcount>0
324
+ #emit eol marker later if line has changed
325
+ @moretokens << FileAndLineToken.new(
326
+ @filename,@linenum,input_position
327
+ )
328
+ @pending_here_bodies.each{|body|
329
+ body.allow_ooo_offset=true
330
+ } unless delimiter=="\n"
331
+ end
332
+
333
+
334
+ str.open=nester
335
+ str.close=delimiter
336
+ return str
337
+ end
338
+ assert nestlevel>0
339
+ when nester
340
+ #this branch ignored if nester==delimiter
341
+ assert(nester!=delimiter)
342
+ nestlevel+=1
343
+ when nil then raise "nil char from each_byte?" #never happens
344
+ when maybe_crunch
345
+ nc=nextchar.chr
346
+ nc[/^[{@$]$/] and b=ruby_code(nc)
347
+ when "\\"
348
+ back1char
349
+ next
350
+ when "" #eof
351
+ lexerror str, "unterminated #{delimiter}-string at eof"
352
+ break
353
+ end
354
+
355
+ #shouldn't tolerate ILLEGAL_ESCAPED in str (unless single quotish)....
356
+ lexerror str, "illegal escape sequence" if !("['"[type]) and ILLEGAL_ESCAPED===b
357
+
358
+ str.append b
359
+ }
360
+
361
+ assert eof?
362
+ str.line=@linenum
363
+ str
364
+ else
365
+
366
+
190
367
  endset="\r\n\\\\"
191
368
 
192
369
  #string must start with nester
@@ -199,7 +376,8 @@ private
199
376
  end or return nil
200
377
 
201
378
  bs_handler ||= case type
202
- when '/','{' then :regex_esc_seq
379
+ when '/' then :regex_esc_seq
380
+ when '{' then :Wquote_esc_seq
203
381
  when '"','`',':' then :dquote_esc_seq
204
382
  when "'" then :squote_esc_seq
205
383
  when "[" then :wquote_esc_seq
@@ -212,6 +390,7 @@ private
212
390
  endset<<maybe_crunch="#" unless "'["[type]
213
391
  endset=
214
392
  @endsets[endset] ||= /[#{endset}]/
393
+ false&& last_escnl_elem_idx=nil
215
394
  loop{
216
395
  str.append(til_charset( endset ))
217
396
  b=getchar
@@ -221,14 +400,34 @@ private
221
400
  end
222
401
  case b
223
402
  when delimiter
403
+ assert nestlevel>0
224
404
  if (nestlevel-=1)==0
405
+
406
+ #if last str data fragment was empty and
407
+ #followed an inclusion, delete it
408
+ #unless there was an escnl between inclusion and string end
409
+ if str.elems.last=='' and str.elems.size>1
410
+ if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
411
+ #do nothing
412
+ else
413
+ str.elems.pop
414
+ end
415
+ end
416
+
225
417
  str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
226
- #emit eol marker later if line has changed
227
418
  str.line=@linenum
228
- @linenum != old_linenum and @moretokens <<
229
- FileAndLineToken.new(@filename,@linenum, input_position)
419
+ if @linenum != old_linenum
420
+ #emit eol marker later if line has changed
421
+ @moretokens << FileAndLineToken.new(
422
+ @filename,@linenum,input_position
423
+ )
424
+ @pending_here_bodies.each{|body|
425
+ body.allow_ooo_offset=true
426
+ } unless nester==INET_NL_REX
427
+ end
230
428
  return str
231
429
  end
430
+ assert nestlevel>0
232
431
  when nester
233
432
  #this branch ignored if nester==delimiter
234
433
  assert(nester!=delimiter)
@@ -248,11 +447,13 @@ private
248
447
  break
249
448
  end
250
449
  str.append b
450
+
251
451
  }
252
452
 
253
453
  assert eof?
254
454
  str.line=@linenum
255
455
  str
456
+ end
256
457
  end
257
458
 
258
459
  #-----------------------------------
@@ -268,26 +469,17 @@ private
268
469
  when '#' then '#'
269
470
  when /^[#{ESCAPECHRS}]$/o
270
471
  k.tr(ESCAPECHRS,ESCAPESEQS)
271
- =begin not needed anymore
272
- when "a" then "\a"
273
- when "b" then "\b"
274
- when "e" then "\e"
275
- when "f" then "\f"
276
- when "n" then "\n"
277
- when "r" then "\r"
278
- when "s" then "\ "
279
- when "t" then "\t"
280
- when "v" then "\v"
281
- =end
282
472
  when "M"
283
473
  eat_next_if(?-) or raise 'bad \\M sequence'
284
474
  (getchar_maybe_escape | 0x80).chr
285
475
 
286
476
  when "C"
287
477
  eat_next_if(?-) or raise 'bad \\C sequence'
478
+ nextchar==?? and getchar and return "\177" #wtf?
288
479
  (getchar_maybe_escape & 0x9F).chr
289
480
 
290
481
  when "c"
482
+ nextchar==?? and getchar and return "\177" #wtf?
291
483
  (getchar_maybe_escape & 0x9F).chr
292
484
 
293
485
  when /^[0-7]$/
@@ -306,31 +498,33 @@ private
306
498
  str.hex.chr
307
499
 
308
500
  else
309
- '\\'+k
501
+ k
310
502
  end
311
503
  end
312
504
 
313
505
  #-----------------------------------
314
506
  def regex_esc_seq(ch,nester,delimiter)
315
507
  assert ch == '\\'
316
- c=getchar
317
- return case c
318
- when "\n"
319
- @linenum+=1
320
- ''#ch+c
321
- when nester,delimiter ,"/"
322
- c
323
- #when "c"
324
- # ch + c + getchar
325
- #when "M","C"
326
- # eat_next_if(?-) or
327
- # lexerror "illegal \\#{c}- esc sequence"
328
- # ch + c + '-' + (eat_next_if(/[^\\]/)or'')
329
- # #if this \M- or \C- sequence is continued by
330
- # #another backslash, we'll just leave the
331
- # #backslash on the input, to be read by the next pass
332
- else
333
- ch+c
508
+ ch=getchar
509
+ if ch=="\n"
510
+ @linenum+=1
511
+ return ''
512
+ end
513
+ '\\'+ch
514
+ end
515
+
516
+ #-----------------------------------
517
+ def Wquote_esc_seq(ch,nester,delimiter)
518
+ assert ch == '\\'
519
+ case ch=getchar
520
+ when "\n": @linenum+=1; ch
521
+ when nester,delimiter: ch
522
+ when /[\s\v\\]/: ch
523
+ else
524
+ back1char
525
+ result=dquote_esc_seq('\\',nester,delimiter)
526
+ #/\s|\v/===result and result="\\"+result
527
+ result
334
528
  end
335
529
  end
336
530
 
@@ -340,16 +534,16 @@ private
340
534
 
341
535
  #get the escaped character
342
536
  escchar=getchar
343
- return (case escchar
344
- #all \ sequences but \delimiter, \nester
345
- #are passed thru unchanged; actual
537
+ case escchar
538
+ #all \ sequences
539
+ #are unescaped; actual
346
540
  #newlines are counted but not changed
347
- when delimiter,nester
348
- ''
349
- when "\n"
350
- @linenum+=1; "\\"
351
- else '\\'
352
- end+escchar)
541
+ when delimiter,nester,'\\': escchar
542
+ # when delimiter,nester: escchar
543
+ when "\n": @linenum+=1; escchar
544
+ when /[\s\v]/: escchar
545
+ else "\\"+escchar
546
+ end
353
547
  end
354
548
 
355
549
  #-----------------------------------
@@ -358,52 +552,173 @@ private
358
552
 
359
553
  #get the escaped character
360
554
  escchar=getchar
361
- return (case escchar
362
- #all \ sequences but \delimiter, \nester and \\
363
- #are passed thru unchanged; actual
555
+ case escchar
556
+ #all \ sequences
557
+ #are unescaped; actual
558
+ #newlines are counted but not changed
559
+ when delimiter,nester,'\\': escchar
560
+ # when delimiter,nester: escchar
561
+ when "\n": @linenum+=1; "\\"+escchar
562
+ else "\\"+escchar
563
+ end
564
+ end
565
+
566
+ #-----------------------------------
567
+ def squote_heredoc_esc_seq(ch,nester,delimiter)
568
+ assert(ch=='\\')
569
+
570
+ #get the escaped character
571
+ escchar=getchar
572
+ case escchar
573
+ #all \ sequences
574
+ #are unescaped; actual
364
575
  #newlines are counted but not changed
365
- when delimiter,nester,'\\'
366
- ''
367
- when "\n"
368
- @linenum+=1; "\\"
369
- else '\\'
370
- end+escchar)
576
+ when delimiter,nester: escchar
577
+ # when delimiter,nester: escchar
578
+ when "\n": @linenum+=1; "\\"+escchar
579
+ else "\\"+escchar
580
+ end
581
+ end
582
+
583
+ =begin
584
+ #-----------------------------------
585
+ def squote_esc_seq(ch,nester,delimiter)
586
+ assert(ch=='\\')
587
+
588
+ #get the escaped character
589
+ escchar=getchar
590
+ escchar=="\n" and @linenum+=1
591
+ escchar="\\"+escchar unless escchar[/['\\]/]
592
+ return escchar
371
593
  end
594
+ =end
595
+ # alias squote_esc_seq wquote_esc_seq
372
596
 
597
+ module RecursiveRubyLexer
598
+ def initial_nonblock_levels
599
+ @localvars_stack.size==1 ? 2 : 1
600
+ end
601
+ end
602
+
603
+ def initial_nonblock_levels; 1 end
604
+ def first_current_level
605
+ result=@localvars_stack.last.__locals_lists.size-initial_nonblock_levels
606
+ result=[initial_nonblock_levels,result].max
607
+ result
608
+ end
609
+
610
+ def merge_levels levels, nil_empty_class
611
+ case (levels.size rescue 0)
612
+ when 0: {} unless nil_empty_class
613
+ when 1: levels.first.dup
614
+ else levels.inject{|a,b| a.merge b}
615
+ end
616
+ end
617
+
618
+ def decompose_lvars(nil_empty_class=false)
619
+ levels=
620
+ @localvars_stack.last.__locals_lists
621
+ nonblocky=merge_levels levels[0...initial_nonblock_levels], nil_empty_class
622
+ blocky=merge_levels levels[initial_nonblock_levels...first_current_level], nil_empty_class
623
+ current=merge_levels levels[first_current_level..-1], nil_empty_class
624
+ return nonblocky,blocky,current
625
+ end
626
+
627
+ def new_lvar_type
628
+ size=@localvars_stack.last.__locals_lists.size
629
+ return :local if size<=initial_nonblock_levels
630
+ return :block if size<first_current_level
631
+ return :current
632
+ end
633
+
634
+ def lvar_type(name)
635
+ nonblocky,blocky,current=decompose_lvars
636
+ nonblocky[name] and return :local
637
+ blocky[name] and return :block
638
+ current[name] and return :current
639
+ return new_lvar_type
640
+ end
641
+
642
+ def assign_lvar_type!(vartok)
643
+ vartok.respond_to? :lvar_type= and
644
+ vartok.lvar_type=lvar_type(vartok.ident)
645
+ return vartok
646
+ end
647
+
373
648
  #-----------------------------------
374
649
  def ruby_code(ch='{')
375
650
  assert ch[/^[{(@$]$/]
376
651
  klass= RubyLexer===self ? self.class : RubyLexer
377
- rl=klass.new(@filename,@file,@linenum)
652
+ rl=klass.new(@filename,@file,@linenum,offset_adjust())
653
+ rl.extend RecursiveRubyLexer
654
+ # rl.offset_adjust_set! offset_adjust()
655
+ assert offset_adjust()==rl.offset_adjust()
378
656
 
379
657
  #pass current local vars into new parser
380
- localvars.names.each{|varname|
658
+ #must pass the lists of nonblock, parentblock and currentblock vars separately
659
+ #then a table increment after each
660
+ nonblocky,blocky,current=decompose_lvars(true)
661
+ nonblocky.keys.each{|varname|
381
662
  rl.localvars[varname]=true
382
663
  }
383
- rl.localvars.start_block
664
+ rl.localvars.start_block
665
+ #incremental table, tells us what :local vars are defined in the str inclusion
666
+
667
+ if blocky
668
+ rl.localvars.start_block
669
+ blocky.keys.each{|varname|
670
+ rl.localvars[varname]=true
671
+ }
672
+ rl.localvars.start_block
673
+ #incremental table, tells us what :block vars are defined in the str inclusion
674
+ end
675
+
676
+ if current
677
+ rl.localvars.start_block
678
+ current.keys.each{|varname|
679
+ rl.localvars[varname]=true
680
+ }
681
+ rl.localvars.start_block
682
+ #incremental table, tells us what :current vars are defined in the str inclusion
683
+ end
684
+
685
+ rl.pending_here_bodies=@pending_here_bodies
384
686
 
385
687
  case ch
386
688
  when '@'
387
689
  tokens=[rl.at_identifier]
388
690
  when '$'
389
691
  tokens=[rl.dollar_identifier]
390
- when '{','('
692
+ when '{'#,'('
391
693
  tokens=[]
392
694
  loop {
393
695
  tok=rl.get1token
394
- EoiToken===tok and lexerror tok,"unterminated string inclusion"
395
696
  tokens << tok
396
- break if tok===PAIRS[ch] and rl.no_more? and rl.balanced_braces?
697
+ if EoiToken===tok
698
+ lexerror tok,"unterminated string inclusion"
699
+ break
700
+ end
701
+ if tok==='}'
702
+ if ErrorToken===tok #mismatched?
703
+ parsestack[1..-1].reverse_each{|ctx|
704
+ tok.error<< "\nno end found for #{ctx.class}"
705
+ }
706
+ break
707
+ end
708
+ break if rl.no_more? and rl.balanced_braces?
709
+ end
397
710
  }
398
711
  else
399
712
  raise 'hell'
400
713
  end
401
714
 
715
+ =begin
402
716
  if @linenum != rl.linenum
403
717
  last=tokens.pop
404
718
  fal=FileAndLineToken.new(@filename,@linenum, last.offset)
405
719
  tokens.push fal,last
406
720
  end
721
+ =end
407
722
 
408
723
  #need to verify that rl's @moretokens, @incomplete_here_tokens are empty
409
724
  rl.incomplete_here_tokens.empty? or
@@ -411,6 +726,13 @@ private
411
726
  rl.no_more? or
412
727
  raise 'uh-oh, ruby tokens were lexed past end of ruby code'
413
728
 
729
+ #assert offset_adjust()==rl.offset_adjust() #|| rl.offset_adjust().zero?
730
+ @offset_adjust=rl.offset_adjust
731
+
732
+ #input_position_set rl.input_position_raw
733
+ @file=rl.file
734
+ # @pending_here_bodies=rl.pending_here_bodies
735
+
414
736
  #local vars defined in inclusion get propagated to outer parser
415
737
  newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
416
738
  newvars.each{|newvar| localvars[newvar]=true }
@@ -431,36 +753,53 @@ private
431
753
  # OCTCHARS=?0..?7
432
754
  # DECCHARS=?0..?9
433
755
  # HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
434
- BINCHARS=/[^01_]/
435
- OCTCHARS=/[^0-7_]/
436
- DECCHARS=/[^0-9_]/
437
- HEXCHARS=/[^0-9a-f_]/i
756
+ BINCHARS=/[01_]+/
757
+ OCTCHARS=/[0-7_]+/
758
+ allowed=/[0-9_]/
759
+ DECCHARS=/^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
760
+ HEXCHARS=/[0-9a-f_]+/i
761
+ DECIMAL_INT_INTERP=:to_s
762
+ ARBITRARY_INT_INTERP=:to_s
763
+ NUMREXCACHE={}
438
764
  #0-9
439
765
  #-----------------------------------
440
766
  def number(str)
441
767
 
442
768
  return nil unless /^[0-9+\-]$/===str
443
769
 
444
- interp=:to_i
770
+ interp=DECIMAL_INT_INTERP
445
771
  str= (eat_next_if(/[+\-]/)or'')
446
772
  str<< (eat_next_if(?0)or'')
447
773
 
448
- if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
449
- typechar=eat_next_if(/[BOXD]/i)||'o'
450
- str << typechar
451
- interp=:oct
452
- unallowed=case typechar
453
- when 'b','B'; BINCHARS
454
- when 'x','X'; HEXCHARS
455
- when 'o','O'; OCTCHARS
456
- when 'd','D'; interp=:to_i; DECCHARS
457
- else raise :impossible
458
- end
774
+ if str[-1] == ?0 and !eof?
775
+ if nextchar.chr[/[bodx]/i]
776
+ typechar=eat_next_if(/[bodx]/i)
777
+ str << typechar
778
+ interp=ARBITRARY_INT_INTERP
779
+ allowed=case typechar
780
+ when 'b','B'; BINCHARS
781
+ when 'x','X'; HEXCHARS
782
+ when 'o','O'; OCTCHARS
783
+ when 'd','D'; DECCHARS
784
+ else raise :impossible
785
+ end
786
+ elsif /[.e]/i===nextchar.chr
787
+ interp=ARBITRARY_INT_INTERP
788
+ allowed=DECCHARS
789
+ else
790
+ interp=ARBITRARY_INT_INTERP
791
+ allowed=OCTCHARS
792
+ end
459
793
  else
460
- interp=:to_i
461
- unallowed =DECCHARS
794
+ interp=DECIMAL_INT_INTERP
795
+ allowed =DECCHARS
462
796
  end
463
797
 
798
+ #allowed = NUMREXCACHE[allowed] ||= /^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
799
+ str<<(@file.scan(allowed)||'')
800
+ interp=:to_s if $1 or $2
801
+ return NumberToken.new(str.send(interp))
802
+
464
803
  addl_dig_seqs= (typechar)? 0 : 2 #den 210
465
804
  error=nil
466
805
 
@@ -528,11 +867,11 @@ end
528
867
  #-----------------------------------
529
868
  INET_NL_REX=/^(\r\n?|\n\r?)/
530
869
  def readnl
531
- #compatible with dos/mac style newlines...
870
+ #compatible with dos style newlines...
532
871
 
533
872
  eof? and return ''
534
873
 
535
- nl=readahead(2)[INET_NL_REX]
874
+ nl=readahead(2)[/\A\r?\n/]
536
875
  nl or return nil
537
876
  assert((1..2)===nl.length)
538
877
  @linenum+=1
@@ -542,7 +881,8 @@ end
542
881
  #-----------------------------------
543
882
  def newline(ch)
544
883
  offset= input_position
545
- nl=readnl
884
+ nl=read 1
885
+ @linenum+=1
546
886
  @moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
547
887
  return NewlineToken.new( nl,offset)
548
888
  end
@@ -563,7 +903,7 @@ protected
563
903
  # delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
564
904
  require 'forwardable'
565
905
  extend Forwardable
566
- def_delegators :@file, :readahead,:readback, :read, :eof?
906
+ def_delegators :@file, :readahead, :readback, :read, :eof?
567
907
 
568
908
  def til_charset cs,len=16; @file.read_til_charset cs,len end
569
909
  def getc; @file.read1 end
@@ -571,14 +911,28 @@ protected
571
911
  def back1char; @file.move( -1 )end
572
912
  def prevchar; @file.readbehind 1 end
573
913
  def nextchar; @file.readahead1 end
574
- def eat_next_if(ch);
575
- saw=getchar
914
+
915
+ #-----------------------------------
916
+ def eat_next_if(ch)
917
+ saw=getc or return
576
918
  if Integer===ch
577
- ch==saw[0]
919
+ ch==saw
578
920
  else
579
- ch===saw
921
+ ch===saw.chr
580
922
  end or (back1char; return)
581
- return saw
923
+ return saw.chr
924
+ end
925
+
926
+ #-----------------------------------
927
+ def eat_if(pat,count)
928
+ oldpos=@file.pos
929
+ saw=read count
930
+ if pat===saw
931
+ return saw
932
+ else
933
+ @file.pos=oldpos
934
+ return nil
935
+ end
582
936
  end
583
937
 
584
938
  #-----------------------------------