rubylexer 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/test/code/strgen.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
module Strgen
|
2
|
+
PAIRS=[
|
3
|
+
['<','>'],
|
4
|
+
['(',')'],
|
5
|
+
['[',']'],
|
6
|
+
['{','}']
|
7
|
+
]
|
8
|
+
ALLOWED_UNNESTING_FANCY=/[^<>\[\]{}()a-z0-9_]/i
|
9
|
+
FANCY_TYPES=%w[q Q r s x w W]<<''
|
10
|
+
SIMPLE_QUOTES=%w[" ' / `]
|
11
|
+
SIMPLE_ESCAPES=%w[s n r t v f a b e]
|
12
|
+
MULTI_ESCAPES=%w[x c C M 0 1 2 3 4 5 6 7]
|
13
|
+
NON_ESCAPES=/[^#{SIMPLE_ESCAPES+MULTI_ESCAPES}]/
|
14
|
+
|
15
|
+
def Strgen.rand_char_including(allow,disallow='')
|
16
|
+
q=nil
|
17
|
+
q=rand(255).chr until ((allow===q) and not (disallow[q]))
|
18
|
+
q
|
19
|
+
end
|
20
|
+
|
21
|
+
def Strgen.rand_esc_seq(disallow,bsonly)
|
22
|
+
limit=4
|
23
|
+
bsonly[/\\/] and disallow+='\\'
|
24
|
+
(disallow['#'] or bsonly['#']) and limit=3
|
25
|
+
choice=rand limit
|
26
|
+
choice=3 if disallow[/\\/] or bsonly[/\#/]
|
27
|
+
case choice
|
28
|
+
when 0: "\\"+rand_char_including(NON_ESCAPES,disallow)
|
29
|
+
when 1: "\\"+SIMPLE_ESCAPES[rand(SIMPLE_ESCAPES.size)]
|
30
|
+
when 2:
|
31
|
+
"\\"+
|
32
|
+
case ch=MULTI_ESCAPES[rand(MULTI_ESCAPES.size)]
|
33
|
+
when "x": "x"+rand(256).to_s(16)
|
34
|
+
when "0".."7": rand(256).to_s(8)
|
35
|
+
when "c":
|
36
|
+
"c"+
|
37
|
+
if rand(2).zero?
|
38
|
+
rand_char_including(/[^\\]/,disallow+bsonly)
|
39
|
+
else
|
40
|
+
rand_esc_seq disallow+"#",bsonly
|
41
|
+
end
|
42
|
+
when "C","M":
|
43
|
+
return rand_esc_seq(disallow,bsonly) if disallow['-'] or bsonly['-']
|
44
|
+
ch+"-"+
|
45
|
+
if rand(2).zero?
|
46
|
+
rand_char_including(/[^\\]/,disallow+bsonly)
|
47
|
+
else
|
48
|
+
rand_esc_seq disallow+"#",bsonly
|
49
|
+
end
|
50
|
+
end
|
51
|
+
when 3:
|
52
|
+
'#{'+rand(9999999999).to_s+'}'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
CACHE={}
|
57
|
+
|
58
|
+
def Strgen.strgen
|
59
|
+
must_be_escaped="#\\"
|
60
|
+
case rand(3)
|
61
|
+
when 0
|
62
|
+
starter=ender=SIMPLE_QUOTES[rand(SIMPLE_QUOTES.size)]
|
63
|
+
must_be_escaped<<starter
|
64
|
+
when 1
|
65
|
+
type=FANCY_TYPES[rand(FANCY_TYPES.size)]
|
66
|
+
pair=PAIRS[rand(PAIRS.size)]
|
67
|
+
starter= "%"+type+pair[0]
|
68
|
+
ender= pair[1]
|
69
|
+
must_be_escaped<<pair.to_s
|
70
|
+
when 2
|
71
|
+
type=FANCY_TYPES[rand(FANCY_TYPES.size)]
|
72
|
+
q=rand_char_including ALLOWED_UNNESTING_FANCY
|
73
|
+
/w/i===type and /\s|\v/===q and q='"'
|
74
|
+
starter= "%"+type+q
|
75
|
+
ender=q
|
76
|
+
must_be_escaped<<q
|
77
|
+
end
|
78
|
+
|
79
|
+
if starter=="/" or type=='r'
|
80
|
+
must_be_escaped+="[]{}()?+*"
|
81
|
+
end
|
82
|
+
must_be_escaped+="\0" if type=='s'
|
83
|
+
ckey=must_be_escaped
|
84
|
+
ordinary=
|
85
|
+
CACHE[ckey]||=
|
86
|
+
/[^#{must_be_escaped.gsub(/./){"\\"+$&}}]/
|
87
|
+
|
88
|
+
interior=(1..rand(40)).map{|x|
|
89
|
+
rand_char_including ordinary
|
90
|
+
}.to_s
|
91
|
+
|
92
|
+
interior["\\"] and fail
|
93
|
+
|
94
|
+
disallow=''
|
95
|
+
bsonly=starter[-1,1]+ender
|
96
|
+
# disallow+='#' if /[\#\\\-]/===starter[-1,1]
|
97
|
+
# disallow+=starter[-1,1]+ender if type=='r' or starter=='/'
|
98
|
+
disallow+="\0" if type=='s'
|
99
|
+
disallow+=must_be_escaped.gsub('\\','') if type=='r' or starter=='/'
|
100
|
+
|
101
|
+
poslimit=interior.size+1
|
102
|
+
rand(5).times{
|
103
|
+
pos=rand poslimit
|
104
|
+
interior[pos,0]=rand_esc_seq disallow,bsonly
|
105
|
+
poslimit=pos
|
106
|
+
} unless starter[-1]==?\\
|
107
|
+
|
108
|
+
interior.gsub!(/\\[a-z]/i,'') if type=='r' or starter=='/'
|
109
|
+
|
110
|
+
starter[-1]==?\r and interior.gsub!(/\A\n+/,'')
|
111
|
+
|
112
|
+
starter[1]==?s and interior=='' and interior="x"
|
113
|
+
|
114
|
+
result= starter+interior+ender
|
115
|
+
|
116
|
+
begin
|
117
|
+
begin eval "BEGIN{break};proc() do #{result} end" end while false
|
118
|
+
rescue Exception
|
119
|
+
#puts %<failing string: eval "#{result.gsub(/./){'\\x'+$&[0].to_s(16)}}">
|
120
|
+
return strgen
|
121
|
+
end
|
122
|
+
|
123
|
+
return result
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
if __FILE__==$0
|
128
|
+
start=Time.now
|
129
|
+
i=0
|
130
|
+
10_000_000.times{|i|
|
131
|
+
begin
|
132
|
+
ss=Strgen.strgen
|
133
|
+
RubyLexerVsRuby.rubylexervsruby "-e#{i}", ss
|
134
|
+
rescue Exception
|
135
|
+
puts %<failing string: eval "#{ss.gsub(/./){'\\x'+$&[0].to_s(16)}}">
|
136
|
+
end
|
137
|
+
}
|
138
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
class Tarball
|
4
|
+
PROTOCOLS=%w[http https ftp]
|
5
|
+
EXTENSIONS=%w[tar zip rb tgz tbz2 tbz gem]
|
6
|
+
EXTRA_EXTENSIONS=%w[gz bz2 Z]
|
7
|
+
EXTRA_EXTENSIONS_REX="\\.(?:#{EXTRA_EXTENSIONS.join'|'})"
|
8
|
+
VERSIONTOO='' #was: "(?:[_-](.*))"
|
9
|
+
ENDINGS="\\.(?:#{EXTENSIONS.join('|')})(?:#{EXTRA_EXTENSIONS_REX})?"
|
10
|
+
TARBALL=%r<
|
11
|
+
\A(?:#{PROTOCOLS.join('|')})://
|
12
|
+
(?:[^/]+/)+
|
13
|
+
(.*)
|
14
|
+
#{VERSIONTOO}
|
15
|
+
#{ENDINGS}
|
16
|
+
\Z
|
17
|
+
>ixo
|
18
|
+
|
19
|
+
def Tarball.unpack1gem(gem)
|
20
|
+
dir=gem.dup
|
21
|
+
dir[/\.gem$/]=''
|
22
|
+
Dir.mkdir dir rescue nil
|
23
|
+
|
24
|
+
system "tar x -f #{gem} -C #{dir}"
|
25
|
+
files_in_dir=Dir[dir+"/*"]-[dir+"/data.tar.gz",dir+"/metadata.gz"]
|
26
|
+
files_in_dir.empty? or puts "gem archive toplevel contains extra files: #{files_in_dir.join(' ')}"
|
27
|
+
system "gunzip -f #{dir}/metadata.gz -c > #{dir}/metadata"
|
28
|
+
system "tar xz -f #{dir}/data.tar.gz -C #{dir}"
|
29
|
+
|
30
|
+
|
31
|
+
(File.unlink gem rescue nil) if File.exist? dir+"/metadata.gz" and File.exist? dir+"/data.tar.gz"
|
32
|
+
(File.unlink dir+"/metadata.gz" rescue nil) if File.exist? dir+"/metadata"
|
33
|
+
files_in_dir=Dir[dir+"/*"]-[dir+"/data.tar.gz",dir+"/metadata.gz",dir+"/metadata"]-files_in_dir
|
34
|
+
(File.unlink dir+"/data.tar.gz" rescue nil) unless files_in_dir.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
def Tarball.dl_and_unpack(cachedir,url)
|
38
|
+
projectname=url[%r{[^/]+\Z}]
|
39
|
+
localname=cachedir+projectname
|
40
|
+
projectname.sub!(/#{ENDINGS}\Z/o,'')+"/"
|
41
|
+
localdir= cachedir+projectname
|
42
|
+
#localdir=localname.sub(/#{ENDINGS}\Z/o,'')+"/"
|
43
|
+
|
44
|
+
if File.exist? localdir
|
45
|
+
puts "skipping already extant #{localdir}"
|
46
|
+
return
|
47
|
+
end
|
48
|
+
begin
|
49
|
+
open(localname,"w"){|disk|
|
50
|
+
open(url){|net|
|
51
|
+
while buf=net.read(40960)
|
52
|
+
disk.write buf
|
53
|
+
end
|
54
|
+
}
|
55
|
+
}
|
56
|
+
rescue Interrupt=>e
|
57
|
+
File.unlink localname rescue nil
|
58
|
+
raise if e.class==Interrupt
|
59
|
+
return
|
60
|
+
rescue Exception
|
61
|
+
File.unlink localname rescue nil
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
unpack(cachedir,localname,projectname,localdir)
|
66
|
+
end
|
67
|
+
|
68
|
+
def Tarball.unpack(cachedir,localname,
|
69
|
+
projectname=localname[/\/(.*)#{ENDINGS}\Z/,1],
|
70
|
+
localdir=localname.sub(/#{ENDINGS}\Z/o,'')+"/")
|
71
|
+
cachedir[-1]==?/ or fail
|
72
|
+
localname[0,cachedir.size]==cachedir or fail
|
73
|
+
|
74
|
+
if File.exist? localdir
|
75
|
+
puts "skipping already extant #{localdir}"
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
79
|
+
#rename .tgz,.tbz2? to the full form
|
80
|
+
case localname
|
81
|
+
when /\.tgz\Z/:
|
82
|
+
oldln=localname
|
83
|
+
localname=localname[0...-4]+".tar.gz"
|
84
|
+
when /\.tbz2?\Z/:
|
85
|
+
oldln=localname
|
86
|
+
localname=localname[0...-$&.size]+".tar.bz2"
|
87
|
+
end
|
88
|
+
File.rename oldln, localname if oldln
|
89
|
+
|
90
|
+
#remove any gz or bz2 whole-archive compression
|
91
|
+
case localname
|
92
|
+
when /\.bz2\Z/:
|
93
|
+
system "bunzip2 "+localname or return
|
94
|
+
localname=localname[0...-$&.size]
|
95
|
+
when /\.(gz|Z)\Z/:
|
96
|
+
system "gunzip -f "+localname or return
|
97
|
+
localname=localname[0...-$&.size]
|
98
|
+
end
|
99
|
+
|
100
|
+
#now actually unpack the archive
|
101
|
+
case localname
|
102
|
+
when /\.rb\Z/:
|
103
|
+
Dir.mkdir localdir
|
104
|
+
File.rename localname, localdir+localname[%r{[^/]+\Z}]
|
105
|
+
when /\.gem\Z/: unpack1gem localname
|
106
|
+
when /\.zip\Z/:
|
107
|
+
filelist=`unzip -L -l #{localname}`
|
108
|
+
if $?>>8 > 1
|
109
|
+
puts "invalid zip file #{localname}"
|
110
|
+
return
|
111
|
+
end
|
112
|
+
filelist=filelist.split("\n")[3...-2]
|
113
|
+
wellformed=!filelist.find{|entry|
|
114
|
+
entry[/\A\s*[^\s]+\s+[^\s]+\s+[^\s]+\s+\^?(.*)\Z/,1][0...projectname.size] != projectname
|
115
|
+
}
|
116
|
+
if wellformed
|
117
|
+
zipopts=" -d #{cachedir}"
|
118
|
+
else
|
119
|
+
zipopts=" -d #{localdir}"
|
120
|
+
Dir.mkdir localdir
|
121
|
+
end
|
122
|
+
puts "unzip -L #{localname} #{zipopts}"
|
123
|
+
system "unzip -L #{localname} #{zipopts}"
|
124
|
+
(File.unlink localname rescue nil) if $?>>8 <= 1
|
125
|
+
|
126
|
+
when /\.tar\Z/:
|
127
|
+
wellformed=!`tar tf #{localname}`.split("\n").find{|entry|
|
128
|
+
entry[0...projectname.size] != projectname
|
129
|
+
}
|
130
|
+
if wellformed
|
131
|
+
taropts=" -C #{cachedir}"
|
132
|
+
else
|
133
|
+
taropts=" -C #{localdir}"
|
134
|
+
Dir.mkdir localdir
|
135
|
+
end
|
136
|
+
system "tar xf #{localname} #{taropts}"
|
137
|
+
File.unlink localname rescue nil
|
138
|
+
|
139
|
+
else fail "unknown tarball type: #{localname}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module TestCases
|
2
|
+
# fail unless File.exist 'test/data/oneliners.rb' and File.exist 'test/data/stanzas.rb'
|
3
|
+
rldir=$:.find{|dir| File.exist? dir+'/test/data/oneliners.rb' and File.exist? dir+'/test/data/stanzas.rb' }
|
4
|
+
ONELINERS=IO.readlines(rldir+'/test/data/oneliners.rb').map{|x| x.chomp}.grep(/\A\s*[^#\s\n]/).reverse
|
5
|
+
STANZAS=IO.read(rldir+'/test/data/stanzas.rb').split("\n\n").grep(/./).reverse
|
6
|
+
STANZAS.each{|stanza| stanza<<"\n" }
|
7
|
+
ILLEGAL_ONELINERS=IO.readlines(rldir+'/test/data/illegal_oneliners.rb').map{|x| x.chomp}.grep(/\A\s*[^#\s\n]/).reverse
|
8
|
+
ILLEGAL_STANZAS=IO.read(rldir+'/test/data/illegal_stanzas.rb').split("\n\n").grep(/./).reverse
|
9
|
+
TESTCASES=ONELINERS+STANZAS
|
10
|
+
ILLEGAL_TESTCASES=ILLEGAL_ONELINERS+ILLEGAL_STANZAS
|
11
|
+
end
|
data/test/code/tokentest.rb
CHANGED
@@ -1,4 +1,22 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
=begin legal crap
|
3
|
+
rubylexer - a ruby lexer written in ruby
|
4
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
5
|
+
|
6
|
+
This library is free software; you can redistribute it and/or
|
7
|
+
modify it under the terms of the GNU Lesser General Public
|
8
|
+
License as published by the Free Software Foundation; either
|
9
|
+
version 2.1 of the License, or (at your option) any later version.
|
10
|
+
|
11
|
+
This library is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public
|
17
|
+
License along with this library; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
=end
|
2
20
|
$Debug=true
|
3
21
|
require "rubylexer"
|
4
22
|
require "getoptlong"
|
@@ -34,24 +52,52 @@ module SimpleVerify
|
|
34
52
|
end
|
35
53
|
|
36
54
|
class WToken; include SimpleVerify; end
|
37
|
-
class NewlineToken; include SimpleVerify; end
|
38
55
|
class IgnoreToken; include SimpleVerify; end
|
39
56
|
class MethNameToken; include SimpleVerify; end
|
40
57
|
|
58
|
+
class NewlineToken
|
59
|
+
include SimpleVerify
|
60
|
+
def verify_offset(fd)
|
61
|
+
super or fd.eof?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
41
65
|
class SymbolToken
|
42
66
|
def verify_offset(fd)
|
43
67
|
la=fd.read(2)
|
44
68
|
case la
|
45
|
-
when '%s':
|
46
|
-
|
69
|
+
when '%s':
|
70
|
+
quote=fd.read(1)
|
71
|
+
ender=RubyLexer::PAIRS[quote] || quote
|
72
|
+
body=@ident[2...-1]
|
73
|
+
when /^:(['"])/:
|
74
|
+
#stay right here
|
75
|
+
quote=ender=$1
|
76
|
+
body=@ident[2...-1]
|
77
|
+
when /^:/:
|
78
|
+
fd.pos-=1
|
79
|
+
body=@ident[1..-1]
|
47
80
|
else raise 'unrecognized symbol type'
|
48
81
|
end
|
49
|
-
|
82
|
+
|
83
|
+
bodyread=fd.read(body.length)
|
84
|
+
|
85
|
+
#punt if its too hard
|
86
|
+
if quote
|
87
|
+
bs="\\"
|
88
|
+
hardstuff= /[#{bs}#{quote}#{bs}#{ender}\#\\]/
|
89
|
+
return true if (body+bodyread).match(hardstuff)
|
90
|
+
end
|
50
91
|
|
92
|
+
if bodyread==body
|
93
|
+
return fd.read(1)==ender if ender
|
94
|
+
return true
|
95
|
+
end
|
51
96
|
end
|
52
97
|
end
|
53
98
|
|
54
99
|
class EoiToken
|
100
|
+
include SimpleVerify
|
55
101
|
def verify_offset(fd)
|
56
102
|
result=super(fd)
|
57
103
|
fd.eof?
|
@@ -78,8 +124,8 @@ end
|
|
78
124
|
class HerePlaceholderToken
|
79
125
|
def verify_offset(fd)
|
80
126
|
'<<'==fd.read(2) or return false
|
81
|
-
@dash and (
|
82
|
-
case ch=fd.
|
127
|
+
@dash and ('-'==fd.read(1) or return false)
|
128
|
+
case ch=fd.read(1)[0]
|
83
129
|
when ?', ?`, ?"
|
84
130
|
@quote==ch.chr and
|
85
131
|
fd.read(@ender.size)==@ender and
|
@@ -98,14 +144,38 @@ class StringToken
|
|
98
144
|
FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
|
99
145
|
'"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
|
100
146
|
def verify_offset(fd)
|
101
|
-
|
102
|
-
|
147
|
+
fd.read(open.size)==open or return false
|
148
|
+
# str=fd.read(2)
|
149
|
+
# @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
|
103
150
|
verify_subtoken_offsets(fd)
|
104
151
|
end
|
105
152
|
|
106
153
|
def verify_subtoken_offsets(fd)
|
107
154
|
#verify offsets of subtokens
|
108
|
-
|
155
|
+
@elems.each{|elem|
|
156
|
+
case elem
|
157
|
+
when String:
|
158
|
+
#get string data to compare against,
|
159
|
+
#translating dos newlines to unix.
|
160
|
+
#(buffer mgt is a PITA)
|
161
|
+
goal=elem.size
|
162
|
+
saw=fd.read(goal)
|
163
|
+
saw.gsub!("\r\n","\n")
|
164
|
+
now_at=nil
|
165
|
+
loop do
|
166
|
+
now_at=saw.size
|
167
|
+
saw.chomp!("\r") and fd.pos-=1 and now_at-=1
|
168
|
+
break if now_at>=goal
|
169
|
+
more=fd.read([goal-now_at,2].max)
|
170
|
+
more.gsub!("\r\n","\n")
|
171
|
+
saw<<more
|
172
|
+
end
|
173
|
+
#assert now_at<=goal+1 #not needed
|
174
|
+
saw[goal..-1]='' unless goal==now_at
|
175
|
+
saw==elem or return false
|
176
|
+
else elem.verify_offset(fd) or raise LexerError
|
177
|
+
end
|
178
|
+
}
|
109
179
|
return true
|
110
180
|
end
|
111
181
|
|
@@ -127,7 +197,7 @@ class RubyCode
|
|
127
197
|
}
|
128
198
|
assert nexttok.nil?
|
129
199
|
assert thistok.object_id==@ident.last.object_id
|
130
|
-
assert WToken===thistok
|
200
|
+
assert(( WToken===thistok or EoiToken===thistok&&thistok.error ))
|
131
201
|
fd.pos=endpos
|
132
202
|
end
|
133
203
|
|
@@ -150,40 +220,52 @@ end
|
|
150
220
|
# end
|
151
221
|
#end
|
152
222
|
end
|
223
|
+
|
153
224
|
public
|
154
225
|
|
226
|
+
|
155
227
|
def check_offset(tok,file=nil,endpos=nil)
|
156
|
-
|
157
|
-
|
228
|
+
#the errors detected here are now reduced to warnings....
|
229
|
+
file||=@original_file
|
230
|
+
String===file and file=file.to_sequence
|
231
|
+
allow_ooo= @moretokens&&@moretokens[0]&&@moretokens[0].allow_ooo_offset unless endpos
|
232
|
+
endpos||=((@moretokens.empty?)? input_position : @moretokens[0].offset)
|
158
233
|
oldpos=file.pos
|
159
234
|
|
160
235
|
assert Integer===tok.offset
|
161
236
|
assert Integer===endpos
|
162
|
-
endpos
|
163
|
-
|
164
|
-
|
237
|
+
if endpos<tok.offset and !allow_ooo
|
238
|
+
$stderr.puts "expected #{endpos} to be >= #{tok.offset} token #{tok.to_s.gsub("\n","\n ")}:#{tok.class}"
|
239
|
+
end
|
165
240
|
|
166
241
|
file.pos=tok.offset
|
167
|
-
tok.verify_offset(file) or
|
242
|
+
tok.verify_offset(file) or
|
243
|
+
$stderr.puts "couldn't check offset of token #{tok.class}: #{tok.to_s.gsub("\n","\n ")} at #{tok.offset}"
|
168
244
|
case tok
|
169
245
|
when RubyLexer::StringToken,RubyLexer::NumberToken,
|
170
|
-
RubyLexer::HereBodyToken,RubyLexer::SymbolToken
|
171
|
-
|
246
|
+
RubyLexer::HereBodyToken,RubyLexer::SymbolToken,
|
247
|
+
RubyLexer::HerePlaceholderToken,
|
248
|
+
RubyLexer::FileAndLineToken: #do nothing
|
249
|
+
else
|
250
|
+
file.pos==endpos or allow_ooo or
|
251
|
+
$stderr.puts "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok.to_s.gsub("\n","\n ") }"
|
172
252
|
end
|
173
253
|
file.pos=oldpos
|
254
|
+
return
|
174
255
|
end
|
175
256
|
|
176
257
|
|
177
258
|
|
178
259
|
|
179
260
|
|
180
|
-
|
181
261
|
def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
|
182
262
|
input ||= File.open(name)
|
183
263
|
if output!=$stdout
|
184
264
|
output=File.open(output,'w')
|
185
265
|
end
|
186
266
|
|
267
|
+
input=input.read if IO===input and not File===input
|
268
|
+
|
187
269
|
fd=input
|
188
270
|
#File.open(name) {|fd|
|
189
271
|
lxr=lexertype.new(name,fd,1)
|
@@ -213,7 +295,7 @@ if __FILE__==$0
|
|
213
295
|
sep,line,showzw='',1,0
|
214
296
|
# lexertype= RumaLexer if defined? RumaLexer
|
215
297
|
lexertype=RubyLexer
|
216
|
-
insertnils=fd=name=nil
|
298
|
+
insertnils=fd=name=loop=nil
|
217
299
|
pprinter=RubyLexer::SimpleTokenPrinter
|
218
300
|
|
219
301
|
opts=GetoptLong.new \
|
@@ -222,27 +304,36 @@ if __FILE__==$0
|
|
222
304
|
["--keepws","-k", GetoptLong::NO_ARGUMENT],
|
223
305
|
["--maxws","-m", GetoptLong::NO_ARGUMENT],
|
224
306
|
["--implicit","-i", GetoptLong::NO_ARGUMENT],
|
225
|
-
["--implicit-all", GetoptLong::NO_ARGUMENT]
|
307
|
+
["--implicit-all", GetoptLong::NO_ARGUMENT],
|
308
|
+
["--loop", GetoptLong::NO_ARGUMENT]
|
226
309
|
|
227
310
|
saweval=nil
|
228
311
|
opts.each do|opt,arg|
|
229
312
|
case opt
|
230
313
|
when '--eval' then
|
231
314
|
tokentest('-e',lexertype,pprinter.new(sep,line,showzw),arg)
|
232
|
-
saweval=
|
315
|
+
saweval=arg
|
233
316
|
# when '--ruby' then lexertype=RubyLexer
|
234
317
|
when '--keepws' then pprinter= RubyLexer::KeepWsTokenPrinter
|
235
318
|
when '--maxws' then pprinter= RubyLexer::KeepWsTokenPrinter;sep=' '
|
236
319
|
when '--implicit' then showzw=1
|
237
320
|
when '--implicit-all' then showzw=2
|
321
|
+
when '--loop' then loop=true
|
238
322
|
else raise :impossible
|
239
323
|
end
|
240
324
|
end
|
241
325
|
|
242
326
|
pprinter =pprinter.new(sep,line,showzw)
|
243
327
|
|
244
|
-
|
245
|
-
|
328
|
+
begin
|
329
|
+
if ARGV.empty?
|
330
|
+
saweval ?
|
331
|
+
tokentest('-e',lexertype,pprinter,saweval) :
|
332
|
+
tokentest('-',lexertype,pprinter,$stdin)
|
333
|
+
else
|
334
|
+
ARGV.each{|fn| tokentest(fn,lexertype,pprinter) }
|
335
|
+
end
|
246
336
|
# ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer #filename with _rb are special hack
|
337
|
+
end while loop
|
247
338
|
|
248
339
|
end
|