rubylexer 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/test/code/strgen.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
module Strgen
|
2
|
+
PAIRS=[
|
3
|
+
['<','>'],
|
4
|
+
['(',')'],
|
5
|
+
['[',']'],
|
6
|
+
['{','}']
|
7
|
+
]
|
8
|
+
ALLOWED_UNNESTING_FANCY=/[^<>\[\]{}()a-z0-9_]/i
|
9
|
+
FANCY_TYPES=%w[q Q r s x w W]<<''
|
10
|
+
SIMPLE_QUOTES=%w[" ' / `]
|
11
|
+
SIMPLE_ESCAPES=%w[s n r t v f a b e]
|
12
|
+
MULTI_ESCAPES=%w[x c C M 0 1 2 3 4 5 6 7]
|
13
|
+
NON_ESCAPES=/[^#{SIMPLE_ESCAPES+MULTI_ESCAPES}]/
|
14
|
+
|
15
|
+
def Strgen.rand_char_including(allow,disallow='')
|
16
|
+
q=nil
|
17
|
+
q=rand(255).chr until ((allow===q) and not (disallow[q]))
|
18
|
+
q
|
19
|
+
end
|
20
|
+
|
21
|
+
def Strgen.rand_esc_seq(disallow,bsonly)
|
22
|
+
limit=4
|
23
|
+
bsonly[/\\/] and disallow+='\\'
|
24
|
+
(disallow['#'] or bsonly['#']) and limit=3
|
25
|
+
choice=rand limit
|
26
|
+
choice=3 if disallow[/\\/] or bsonly[/\#/]
|
27
|
+
case choice
|
28
|
+
when 0: "\\"+rand_char_including(NON_ESCAPES,disallow)
|
29
|
+
when 1: "\\"+SIMPLE_ESCAPES[rand(SIMPLE_ESCAPES.size)]
|
30
|
+
when 2:
|
31
|
+
"\\"+
|
32
|
+
case ch=MULTI_ESCAPES[rand(MULTI_ESCAPES.size)]
|
33
|
+
when "x": "x"+rand(256).to_s(16)
|
34
|
+
when "0".."7": rand(256).to_s(8)
|
35
|
+
when "c":
|
36
|
+
"c"+
|
37
|
+
if rand(2).zero?
|
38
|
+
rand_char_including(/[^\\]/,disallow+bsonly)
|
39
|
+
else
|
40
|
+
rand_esc_seq disallow+"#",bsonly
|
41
|
+
end
|
42
|
+
when "C","M":
|
43
|
+
return rand_esc_seq(disallow,bsonly) if disallow['-'] or bsonly['-']
|
44
|
+
ch+"-"+
|
45
|
+
if rand(2).zero?
|
46
|
+
rand_char_including(/[^\\]/,disallow+bsonly)
|
47
|
+
else
|
48
|
+
rand_esc_seq disallow+"#",bsonly
|
49
|
+
end
|
50
|
+
end
|
51
|
+
when 3:
|
52
|
+
'#{'+rand(9999999999).to_s+'}'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
CACHE={}
|
57
|
+
|
58
|
+
def Strgen.strgen
|
59
|
+
must_be_escaped="#\\"
|
60
|
+
case rand(3)
|
61
|
+
when 0
|
62
|
+
starter=ender=SIMPLE_QUOTES[rand(SIMPLE_QUOTES.size)]
|
63
|
+
must_be_escaped<<starter
|
64
|
+
when 1
|
65
|
+
type=FANCY_TYPES[rand(FANCY_TYPES.size)]
|
66
|
+
pair=PAIRS[rand(PAIRS.size)]
|
67
|
+
starter= "%"+type+pair[0]
|
68
|
+
ender= pair[1]
|
69
|
+
must_be_escaped<<pair.to_s
|
70
|
+
when 2
|
71
|
+
type=FANCY_TYPES[rand(FANCY_TYPES.size)]
|
72
|
+
q=rand_char_including ALLOWED_UNNESTING_FANCY
|
73
|
+
/w/i===type and /\s|\v/===q and q='"'
|
74
|
+
starter= "%"+type+q
|
75
|
+
ender=q
|
76
|
+
must_be_escaped<<q
|
77
|
+
end
|
78
|
+
|
79
|
+
if starter=="/" or type=='r'
|
80
|
+
must_be_escaped+="[]{}()?+*"
|
81
|
+
end
|
82
|
+
must_be_escaped+="\0" if type=='s'
|
83
|
+
ckey=must_be_escaped
|
84
|
+
ordinary=
|
85
|
+
CACHE[ckey]||=
|
86
|
+
/[^#{must_be_escaped.gsub(/./){"\\"+$&}}]/
|
87
|
+
|
88
|
+
interior=(1..rand(40)).map{|x|
|
89
|
+
rand_char_including ordinary
|
90
|
+
}.to_s
|
91
|
+
|
92
|
+
interior["\\"] and fail
|
93
|
+
|
94
|
+
disallow=''
|
95
|
+
bsonly=starter[-1,1]+ender
|
96
|
+
# disallow+='#' if /[\#\\\-]/===starter[-1,1]
|
97
|
+
# disallow+=starter[-1,1]+ender if type=='r' or starter=='/'
|
98
|
+
disallow+="\0" if type=='s'
|
99
|
+
disallow+=must_be_escaped.gsub('\\','') if type=='r' or starter=='/'
|
100
|
+
|
101
|
+
poslimit=interior.size+1
|
102
|
+
rand(5).times{
|
103
|
+
pos=rand poslimit
|
104
|
+
interior[pos,0]=rand_esc_seq disallow,bsonly
|
105
|
+
poslimit=pos
|
106
|
+
} unless starter[-1]==?\\
|
107
|
+
|
108
|
+
interior.gsub!(/\\[a-z]/i,'') if type=='r' or starter=='/'
|
109
|
+
|
110
|
+
starter[-1]==?\r and interior.gsub!(/\A\n+/,'')
|
111
|
+
|
112
|
+
starter[1]==?s and interior=='' and interior="x"
|
113
|
+
|
114
|
+
result= starter+interior+ender
|
115
|
+
|
116
|
+
begin
|
117
|
+
begin eval "BEGIN{break};proc() do #{result} end" end while false
|
118
|
+
rescue Exception
|
119
|
+
#puts %<failing string: eval "#{result.gsub(/./){'\\x'+$&[0].to_s(16)}}">
|
120
|
+
return strgen
|
121
|
+
end
|
122
|
+
|
123
|
+
return result
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
if __FILE__==$0
|
128
|
+
start=Time.now
|
129
|
+
i=0
|
130
|
+
10_000_000.times{|i|
|
131
|
+
begin
|
132
|
+
ss=Strgen.strgen
|
133
|
+
RubyLexerVsRuby.rubylexervsruby "-e#{i}", ss
|
134
|
+
rescue Exception
|
135
|
+
puts %<failing string: eval "#{ss.gsub(/./){'\\x'+$&[0].to_s(16)}}">
|
136
|
+
end
|
137
|
+
}
|
138
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
class Tarball
|
4
|
+
PROTOCOLS=%w[http https ftp]
|
5
|
+
EXTENSIONS=%w[tar zip rb tgz tbz2 tbz gem]
|
6
|
+
EXTRA_EXTENSIONS=%w[gz bz2 Z]
|
7
|
+
EXTRA_EXTENSIONS_REX="\\.(?:#{EXTRA_EXTENSIONS.join'|'})"
|
8
|
+
VERSIONTOO='' #was: "(?:[_-](.*))"
|
9
|
+
ENDINGS="\\.(?:#{EXTENSIONS.join('|')})(?:#{EXTRA_EXTENSIONS_REX})?"
|
10
|
+
TARBALL=%r<
|
11
|
+
\A(?:#{PROTOCOLS.join('|')})://
|
12
|
+
(?:[^/]+/)+
|
13
|
+
(.*)
|
14
|
+
#{VERSIONTOO}
|
15
|
+
#{ENDINGS}
|
16
|
+
\Z
|
17
|
+
>ixo
|
18
|
+
|
19
|
+
def Tarball.unpack1gem(gem)
|
20
|
+
dir=gem.dup
|
21
|
+
dir[/\.gem$/]=''
|
22
|
+
Dir.mkdir dir rescue nil
|
23
|
+
|
24
|
+
system "tar x -f #{gem} -C #{dir}"
|
25
|
+
files_in_dir=Dir[dir+"/*"]-[dir+"/data.tar.gz",dir+"/metadata.gz"]
|
26
|
+
files_in_dir.empty? or puts "gem archive toplevel contains extra files: #{files_in_dir.join(' ')}"
|
27
|
+
system "gunzip -f #{dir}/metadata.gz -c > #{dir}/metadata"
|
28
|
+
system "tar xz -f #{dir}/data.tar.gz -C #{dir}"
|
29
|
+
|
30
|
+
|
31
|
+
(File.unlink gem rescue nil) if File.exist? dir+"/metadata.gz" and File.exist? dir+"/data.tar.gz"
|
32
|
+
(File.unlink dir+"/metadata.gz" rescue nil) if File.exist? dir+"/metadata"
|
33
|
+
files_in_dir=Dir[dir+"/*"]-[dir+"/data.tar.gz",dir+"/metadata.gz",dir+"/metadata"]-files_in_dir
|
34
|
+
(File.unlink dir+"/data.tar.gz" rescue nil) unless files_in_dir.empty?
|
35
|
+
end
|
36
|
+
|
37
|
+
def Tarball.dl_and_unpack(cachedir,url)
|
38
|
+
projectname=url[%r{[^/]+\Z}]
|
39
|
+
localname=cachedir+projectname
|
40
|
+
projectname.sub!(/#{ENDINGS}\Z/o,'')+"/"
|
41
|
+
localdir= cachedir+projectname
|
42
|
+
#localdir=localname.sub(/#{ENDINGS}\Z/o,'')+"/"
|
43
|
+
|
44
|
+
if File.exist? localdir
|
45
|
+
puts "skipping already extant #{localdir}"
|
46
|
+
return
|
47
|
+
end
|
48
|
+
begin
|
49
|
+
open(localname,"w"){|disk|
|
50
|
+
open(url){|net|
|
51
|
+
while buf=net.read(40960)
|
52
|
+
disk.write buf
|
53
|
+
end
|
54
|
+
}
|
55
|
+
}
|
56
|
+
rescue Interrupt=>e
|
57
|
+
File.unlink localname rescue nil
|
58
|
+
raise if e.class==Interrupt
|
59
|
+
return
|
60
|
+
rescue Exception
|
61
|
+
File.unlink localname rescue nil
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
unpack(cachedir,localname,projectname,localdir)
|
66
|
+
end
|
67
|
+
|
68
|
+
def Tarball.unpack(cachedir,localname,
|
69
|
+
projectname=localname[/\/(.*)#{ENDINGS}\Z/,1],
|
70
|
+
localdir=localname.sub(/#{ENDINGS}\Z/o,'')+"/")
|
71
|
+
cachedir[-1]==?/ or fail
|
72
|
+
localname[0,cachedir.size]==cachedir or fail
|
73
|
+
|
74
|
+
if File.exist? localdir
|
75
|
+
puts "skipping already extant #{localdir}"
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
79
|
+
#rename .tgz,.tbz2? to the full form
|
80
|
+
case localname
|
81
|
+
when /\.tgz\Z/:
|
82
|
+
oldln=localname
|
83
|
+
localname=localname[0...-4]+".tar.gz"
|
84
|
+
when /\.tbz2?\Z/:
|
85
|
+
oldln=localname
|
86
|
+
localname=localname[0...-$&.size]+".tar.bz2"
|
87
|
+
end
|
88
|
+
File.rename oldln, localname if oldln
|
89
|
+
|
90
|
+
#remove any gz or bz2 whole-archive compression
|
91
|
+
case localname
|
92
|
+
when /\.bz2\Z/:
|
93
|
+
system "bunzip2 "+localname or return
|
94
|
+
localname=localname[0...-$&.size]
|
95
|
+
when /\.(gz|Z)\Z/:
|
96
|
+
system "gunzip -f "+localname or return
|
97
|
+
localname=localname[0...-$&.size]
|
98
|
+
end
|
99
|
+
|
100
|
+
#now actually unpack the archive
|
101
|
+
case localname
|
102
|
+
when /\.rb\Z/:
|
103
|
+
Dir.mkdir localdir
|
104
|
+
File.rename localname, localdir+localname[%r{[^/]+\Z}]
|
105
|
+
when /\.gem\Z/: unpack1gem localname
|
106
|
+
when /\.zip\Z/:
|
107
|
+
filelist=`unzip -L -l #{localname}`
|
108
|
+
if $?>>8 > 1
|
109
|
+
puts "invalid zip file #{localname}"
|
110
|
+
return
|
111
|
+
end
|
112
|
+
filelist=filelist.split("\n")[3...-2]
|
113
|
+
wellformed=!filelist.find{|entry|
|
114
|
+
entry[/\A\s*[^\s]+\s+[^\s]+\s+[^\s]+\s+\^?(.*)\Z/,1][0...projectname.size] != projectname
|
115
|
+
}
|
116
|
+
if wellformed
|
117
|
+
zipopts=" -d #{cachedir}"
|
118
|
+
else
|
119
|
+
zipopts=" -d #{localdir}"
|
120
|
+
Dir.mkdir localdir
|
121
|
+
end
|
122
|
+
puts "unzip -L #{localname} #{zipopts}"
|
123
|
+
system "unzip -L #{localname} #{zipopts}"
|
124
|
+
(File.unlink localname rescue nil) if $?>>8 <= 1
|
125
|
+
|
126
|
+
when /\.tar\Z/:
|
127
|
+
wellformed=!`tar tf #{localname}`.split("\n").find{|entry|
|
128
|
+
entry[0...projectname.size] != projectname
|
129
|
+
}
|
130
|
+
if wellformed
|
131
|
+
taropts=" -C #{cachedir}"
|
132
|
+
else
|
133
|
+
taropts=" -C #{localdir}"
|
134
|
+
Dir.mkdir localdir
|
135
|
+
end
|
136
|
+
system "tar xf #{localname} #{taropts}"
|
137
|
+
File.unlink localname rescue nil
|
138
|
+
|
139
|
+
else fail "unknown tarball type: #{localname}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module TestCases
|
2
|
+
# fail unless File.exist 'test/data/oneliners.rb' and File.exist 'test/data/stanzas.rb'
|
3
|
+
rldir=$:.find{|dir| File.exist? dir+'/test/data/oneliners.rb' and File.exist? dir+'/test/data/stanzas.rb' }
|
4
|
+
ONELINERS=IO.readlines(rldir+'/test/data/oneliners.rb').map{|x| x.chomp}.grep(/\A\s*[^#\s\n]/).reverse
|
5
|
+
STANZAS=IO.read(rldir+'/test/data/stanzas.rb').split("\n\n").grep(/./).reverse
|
6
|
+
STANZAS.each{|stanza| stanza<<"\n" }
|
7
|
+
ILLEGAL_ONELINERS=IO.readlines(rldir+'/test/data/illegal_oneliners.rb').map{|x| x.chomp}.grep(/\A\s*[^#\s\n]/).reverse
|
8
|
+
ILLEGAL_STANZAS=IO.read(rldir+'/test/data/illegal_stanzas.rb').split("\n\n").grep(/./).reverse
|
9
|
+
TESTCASES=ONELINERS+STANZAS
|
10
|
+
ILLEGAL_TESTCASES=ILLEGAL_ONELINERS+ILLEGAL_STANZAS
|
11
|
+
end
|
data/test/code/tokentest.rb
CHANGED
@@ -1,4 +1,22 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
=begin legal crap
|
3
|
+
rubylexer - a ruby lexer written in ruby
|
4
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
5
|
+
|
6
|
+
This library is free software; you can redistribute it and/or
|
7
|
+
modify it under the terms of the GNU Lesser General Public
|
8
|
+
License as published by the Free Software Foundation; either
|
9
|
+
version 2.1 of the License, or (at your option) any later version.
|
10
|
+
|
11
|
+
This library is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public
|
17
|
+
License along with this library; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
=end
|
2
20
|
$Debug=true
|
3
21
|
require "rubylexer"
|
4
22
|
require "getoptlong"
|
@@ -34,24 +52,52 @@ module SimpleVerify
|
|
34
52
|
end
|
35
53
|
|
36
54
|
class WToken; include SimpleVerify; end
|
37
|
-
class NewlineToken; include SimpleVerify; end
|
38
55
|
class IgnoreToken; include SimpleVerify; end
|
39
56
|
class MethNameToken; include SimpleVerify; end
|
40
57
|
|
58
|
+
class NewlineToken
|
59
|
+
include SimpleVerify
|
60
|
+
def verify_offset(fd)
|
61
|
+
super or fd.eof?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
41
65
|
class SymbolToken
|
42
66
|
def verify_offset(fd)
|
43
67
|
la=fd.read(2)
|
44
68
|
case la
|
45
|
-
when '%s':
|
46
|
-
|
69
|
+
when '%s':
|
70
|
+
quote=fd.read(1)
|
71
|
+
ender=RubyLexer::PAIRS[quote] || quote
|
72
|
+
body=@ident[2...-1]
|
73
|
+
when /^:(['"])/:
|
74
|
+
#stay right here
|
75
|
+
quote=ender=$1
|
76
|
+
body=@ident[2...-1]
|
77
|
+
when /^:/:
|
78
|
+
fd.pos-=1
|
79
|
+
body=@ident[1..-1]
|
47
80
|
else raise 'unrecognized symbol type'
|
48
81
|
end
|
49
|
-
|
82
|
+
|
83
|
+
bodyread=fd.read(body.length)
|
84
|
+
|
85
|
+
#punt if its too hard
|
86
|
+
if quote
|
87
|
+
bs="\\"
|
88
|
+
hardstuff= /[#{bs}#{quote}#{bs}#{ender}\#\\]/
|
89
|
+
return true if (body+bodyread).match(hardstuff)
|
90
|
+
end
|
50
91
|
|
92
|
+
if bodyread==body
|
93
|
+
return fd.read(1)==ender if ender
|
94
|
+
return true
|
95
|
+
end
|
51
96
|
end
|
52
97
|
end
|
53
98
|
|
54
99
|
class EoiToken
|
100
|
+
include SimpleVerify
|
55
101
|
def verify_offset(fd)
|
56
102
|
result=super(fd)
|
57
103
|
fd.eof?
|
@@ -78,8 +124,8 @@ end
|
|
78
124
|
class HerePlaceholderToken
|
79
125
|
def verify_offset(fd)
|
80
126
|
'<<'==fd.read(2) or return false
|
81
|
-
@dash and (
|
82
|
-
case ch=fd.
|
127
|
+
@dash and ('-'==fd.read(1) or return false)
|
128
|
+
case ch=fd.read(1)[0]
|
83
129
|
when ?', ?`, ?"
|
84
130
|
@quote==ch.chr and
|
85
131
|
fd.read(@ender.size)==@ender and
|
@@ -98,14 +144,38 @@ class StringToken
|
|
98
144
|
FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
|
99
145
|
'"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
|
100
146
|
def verify_offset(fd)
|
101
|
-
|
102
|
-
|
147
|
+
fd.read(open.size)==open or return false
|
148
|
+
# str=fd.read(2)
|
149
|
+
# @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
|
103
150
|
verify_subtoken_offsets(fd)
|
104
151
|
end
|
105
152
|
|
106
153
|
def verify_subtoken_offsets(fd)
|
107
154
|
#verify offsets of subtokens
|
108
|
-
|
155
|
+
@elems.each{|elem|
|
156
|
+
case elem
|
157
|
+
when String:
|
158
|
+
#get string data to compare against,
|
159
|
+
#translating dos newlines to unix.
|
160
|
+
#(buffer mgt is a PITA)
|
161
|
+
goal=elem.size
|
162
|
+
saw=fd.read(goal)
|
163
|
+
saw.gsub!("\r\n","\n")
|
164
|
+
now_at=nil
|
165
|
+
loop do
|
166
|
+
now_at=saw.size
|
167
|
+
saw.chomp!("\r") and fd.pos-=1 and now_at-=1
|
168
|
+
break if now_at>=goal
|
169
|
+
more=fd.read([goal-now_at,2].max)
|
170
|
+
more.gsub!("\r\n","\n")
|
171
|
+
saw<<more
|
172
|
+
end
|
173
|
+
#assert now_at<=goal+1 #not needed
|
174
|
+
saw[goal..-1]='' unless goal==now_at
|
175
|
+
saw==elem or return false
|
176
|
+
else elem.verify_offset(fd) or raise LexerError
|
177
|
+
end
|
178
|
+
}
|
109
179
|
return true
|
110
180
|
end
|
111
181
|
|
@@ -127,7 +197,7 @@ class RubyCode
|
|
127
197
|
}
|
128
198
|
assert nexttok.nil?
|
129
199
|
assert thistok.object_id==@ident.last.object_id
|
130
|
-
assert WToken===thistok
|
200
|
+
assert(( WToken===thistok or EoiToken===thistok&&thistok.error ))
|
131
201
|
fd.pos=endpos
|
132
202
|
end
|
133
203
|
|
@@ -150,40 +220,52 @@ end
|
|
150
220
|
# end
|
151
221
|
#end
|
152
222
|
end
|
223
|
+
|
153
224
|
public
|
154
225
|
|
226
|
+
|
155
227
|
def check_offset(tok,file=nil,endpos=nil)
|
156
|
-
|
157
|
-
|
228
|
+
#the errors detected here are now reduced to warnings....
|
229
|
+
file||=@original_file
|
230
|
+
String===file and file=file.to_sequence
|
231
|
+
allow_ooo= @moretokens&&@moretokens[0]&&@moretokens[0].allow_ooo_offset unless endpos
|
232
|
+
endpos||=((@moretokens.empty?)? input_position : @moretokens[0].offset)
|
158
233
|
oldpos=file.pos
|
159
234
|
|
160
235
|
assert Integer===tok.offset
|
161
236
|
assert Integer===endpos
|
162
|
-
endpos
|
163
|
-
|
164
|
-
|
237
|
+
if endpos<tok.offset and !allow_ooo
|
238
|
+
$stderr.puts "expected #{endpos} to be >= #{tok.offset} token #{tok.to_s.gsub("\n","\n ")}:#{tok.class}"
|
239
|
+
end
|
165
240
|
|
166
241
|
file.pos=tok.offset
|
167
|
-
tok.verify_offset(file) or
|
242
|
+
tok.verify_offset(file) or
|
243
|
+
$stderr.puts "couldn't check offset of token #{tok.class}: #{tok.to_s.gsub("\n","\n ")} at #{tok.offset}"
|
168
244
|
case tok
|
169
245
|
when RubyLexer::StringToken,RubyLexer::NumberToken,
|
170
|
-
RubyLexer::HereBodyToken,RubyLexer::SymbolToken
|
171
|
-
|
246
|
+
RubyLexer::HereBodyToken,RubyLexer::SymbolToken,
|
247
|
+
RubyLexer::HerePlaceholderToken,
|
248
|
+
RubyLexer::FileAndLineToken: #do nothing
|
249
|
+
else
|
250
|
+
file.pos==endpos or allow_ooo or
|
251
|
+
$stderr.puts "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok.to_s.gsub("\n","\n ") }"
|
172
252
|
end
|
173
253
|
file.pos=oldpos
|
254
|
+
return
|
174
255
|
end
|
175
256
|
|
176
257
|
|
177
258
|
|
178
259
|
|
179
260
|
|
180
|
-
|
181
261
|
def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
|
182
262
|
input ||= File.open(name)
|
183
263
|
if output!=$stdout
|
184
264
|
output=File.open(output,'w')
|
185
265
|
end
|
186
266
|
|
267
|
+
input=input.read if IO===input and not File===input
|
268
|
+
|
187
269
|
fd=input
|
188
270
|
#File.open(name) {|fd|
|
189
271
|
lxr=lexertype.new(name,fd,1)
|
@@ -213,7 +295,7 @@ if __FILE__==$0
|
|
213
295
|
sep,line,showzw='',1,0
|
214
296
|
# lexertype= RumaLexer if defined? RumaLexer
|
215
297
|
lexertype=RubyLexer
|
216
|
-
insertnils=fd=name=nil
|
298
|
+
insertnils=fd=name=loop=nil
|
217
299
|
pprinter=RubyLexer::SimpleTokenPrinter
|
218
300
|
|
219
301
|
opts=GetoptLong.new \
|
@@ -222,27 +304,36 @@ if __FILE__==$0
|
|
222
304
|
["--keepws","-k", GetoptLong::NO_ARGUMENT],
|
223
305
|
["--maxws","-m", GetoptLong::NO_ARGUMENT],
|
224
306
|
["--implicit","-i", GetoptLong::NO_ARGUMENT],
|
225
|
-
["--implicit-all", GetoptLong::NO_ARGUMENT]
|
307
|
+
["--implicit-all", GetoptLong::NO_ARGUMENT],
|
308
|
+
["--loop", GetoptLong::NO_ARGUMENT]
|
226
309
|
|
227
310
|
saweval=nil
|
228
311
|
opts.each do|opt,arg|
|
229
312
|
case opt
|
230
313
|
when '--eval' then
|
231
314
|
tokentest('-e',lexertype,pprinter.new(sep,line,showzw),arg)
|
232
|
-
saweval=
|
315
|
+
saweval=arg
|
233
316
|
# when '--ruby' then lexertype=RubyLexer
|
234
317
|
when '--keepws' then pprinter= RubyLexer::KeepWsTokenPrinter
|
235
318
|
when '--maxws' then pprinter= RubyLexer::KeepWsTokenPrinter;sep=' '
|
236
319
|
when '--implicit' then showzw=1
|
237
320
|
when '--implicit-all' then showzw=2
|
321
|
+
when '--loop' then loop=true
|
238
322
|
else raise :impossible
|
239
323
|
end
|
240
324
|
end
|
241
325
|
|
242
326
|
pprinter =pprinter.new(sep,line,showzw)
|
243
327
|
|
244
|
-
|
245
|
-
|
328
|
+
begin
|
329
|
+
if ARGV.empty?
|
330
|
+
saweval ?
|
331
|
+
tokentest('-e',lexertype,pprinter,saweval) :
|
332
|
+
tokentest('-',lexertype,pprinter,$stdin)
|
333
|
+
else
|
334
|
+
ARGV.each{|fn| tokentest(fn,lexertype,pprinter) }
|
335
|
+
end
|
246
336
|
# ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer #filename with _rb are special hack
|
337
|
+
end while loop
|
247
338
|
|
248
339
|
end
|