ms-sequest 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/History +5 -0
- data/README.rdoc +8 -1
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/srf_to_search.rb +1 -35
- data/bin/srf_to_sqt.rb +1 -28
- data/lib/ms/sequest/srf/search.rb +127 -77
- data/lib/ms/sequest/srf/sqt.rb +192 -136
- data/spec/ms/sequest/srf/search_spec.rb +29 -8
- data/spec/ms/sequest/srf/sqt_spec.rb +63 -26
- data/spec/spec_helper.rb +1 -19
- metadata +4 -4
data/History
CHANGED
data/README.rdoc
CHANGED
@@ -1,9 +1,16 @@
|
|
1
|
-
=
|
1
|
+
= ms-sequest
|
2
2
|
|
3
3
|
An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
|
4
4
|
|
5
|
+
== {API}[http://yardoc.org/docs/jtprince-ms-sequest]
|
6
|
+
|
5
7
|
== Examples
|
6
8
|
|
9
|
+
Provides two executables for extracting information from an Srf file (run without file for usage):
|
10
|
+
|
11
|
+
srf_to_sqt.rb file.srf # => file.sqt
|
12
|
+
srf_to_search.rb file.srf # => file.mgf (also can make .dta files)
|
13
|
+
|
7
14
|
=== Ms::Sequest::Srf
|
8
15
|
|
9
16
|
Can read and convert Bioworks Sequest Results Files (SRF).
|
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ gemspec = Gem::Specification.new do |s|
|
|
10
10
|
s.name = NAME
|
11
11
|
s.authors = ["John T. Prince"]
|
12
12
|
s.email = "jtprince@gmail.com"
|
13
|
-
s.homepage = "http://
|
13
|
+
s.homepage = "http://github.com/jtprince/" + NAME
|
14
14
|
s.summary = "An mspire library supporting SEQUEST, Bioworks, SQT, etc"
|
15
15
|
s.description = "reads .SRF, .SQT and supports conversions"
|
16
16
|
s.rubyforge_project = 'mspire'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.14
|
data/bin/srf_to_search.rb
CHANGED
@@ -1,41 +1,7 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
|
-
require 'optparse'
|
5
4
|
require 'ms/sequest/srf/search'
|
6
5
|
|
7
|
-
|
8
|
-
:format => 'mgf'
|
9
|
-
}
|
6
|
+
Ms::Sequest::Srf::Search.commandline(ARGV, File.basename(__FILE__)}
|
10
7
|
|
11
|
-
opts = OptionParser.new do |op|
|
12
|
-
op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
|
13
|
-
op.separator "outputs: <file>.mgf"
|
14
|
-
op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
|
15
|
-
end
|
16
|
-
|
17
|
-
if ARGV.size == 0
|
18
|
-
puts opts
|
19
|
-
exit
|
20
|
-
end
|
21
|
-
|
22
|
-
format = opt[:format]
|
23
|
-
|
24
|
-
ARGV.each do |srf_file|
|
25
|
-
base = srf_file.sub(/\.srf$/i, '')
|
26
|
-
newfile =
|
27
|
-
case format
|
28
|
-
when 'dta'
|
29
|
-
base
|
30
|
-
when 'mgf'
|
31
|
-
base << '.' << format
|
32
|
-
end
|
33
|
-
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
34
|
-
# options just speed up reading since we don't need .out info anyway
|
35
|
-
case format
|
36
|
-
when 'mgf'
|
37
|
-
srf.to_mgf(newfile)
|
38
|
-
when 'dta'
|
39
|
-
srf.to_dta_files(newfile)
|
40
|
-
end
|
41
|
-
end
|
data/bin/srf_to_sqt.rb
CHANGED
@@ -3,33 +3,6 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'ms/sequest/srf/sqt'
|
5
5
|
|
6
|
-
|
7
|
-
:filter => true
|
8
|
-
}
|
9
|
-
opts = OptionParser.new do |op|
|
10
|
-
op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
|
11
|
-
op.separator "output: <file>.sqt ..."
|
12
|
-
op.separator ""
|
13
|
-
op.separator "options:"
|
14
|
-
op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
|
15
|
-
op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
|
16
|
-
op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
|
17
|
-
op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
|
18
|
-
op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
|
19
|
-
end
|
20
|
-
opts.parse!
|
21
|
-
|
22
|
-
if ARGV.size == 0
|
23
|
-
puts(opts) || exit
|
24
|
-
end
|
25
|
-
|
26
|
-
ARGV.each do |srf_file|
|
27
|
-
base = srf_file.chomp(File.extname(srf_file))
|
28
|
-
outfile = base + '.sqt'
|
29
|
-
|
30
|
-
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
31
|
-
srf.to_sqt(outfile, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
|
32
|
-
end
|
33
|
-
|
6
|
+
Ms::Sequest::Srf::Sqt.commandline(ARGV, File.basename(__FILE__))
|
34
7
|
|
35
8
|
|
@@ -7,100 +7,150 @@ require 'ms/mass'
|
|
7
7
|
module Ms
|
8
8
|
module Sequest
|
9
9
|
class Srf
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
10
|
+
module Search
|
11
|
+
# Writes an MGF file to given filename or base_name + '.mgf' if no
|
12
|
+
# filename given.
|
13
|
+
#
|
14
|
+
# This mimicks the output of merge.pl from mascot The only difference is
|
15
|
+
# that this does not include the "\r\n" that is found after the peak
|
16
|
+
# lists, instead, it uses "\n" throughout the file (thinking that this
|
17
|
+
# is preferable to mixing newline styles!)
|
18
|
+
def to_mgf(filename=nil)
|
19
|
+
filename =
|
20
|
+
if filename ; filename
|
21
|
+
else
|
22
|
+
base_name + '.mgf'
|
23
|
+
end
|
24
|
+
h_plus = Ms::Mass::MASCOT_H_PLUS
|
25
|
+
File.open(filename, 'wb') do |out|
|
26
|
+
dta_files.zip(index) do |dta, i_ar|
|
27
|
+
chrg = dta.charge
|
28
|
+
out.print "BEGIN IONS\n"
|
29
|
+
out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
|
30
|
+
out.print "CHARGE=#{chrg}+\n"
|
31
|
+
out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
|
32
|
+
peak_ar = dta.peaks.unpack('e*')
|
33
|
+
(0...(peak_ar.size)).step(2) do |i|
|
34
|
+
out.print( peak_ar[i,2].join(' '), "\n")
|
35
|
+
end
|
36
|
+
out.print "END IONS\n"
|
37
|
+
out.print "\n"
|
35
38
|
end
|
36
|
-
out.print "END IONS\n"
|
37
|
-
out.print "\n"
|
38
39
|
end
|
39
40
|
end
|
40
|
-
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
42
|
+
# not given an out_folder, will make one with the basename
|
43
|
+
# compress may be: :zip, :tgz, or nil (no compression)
|
44
|
+
# :zip requires gem rubyzip to be installed and is *very* bloated
|
45
|
+
# as it writes out all the files first!
|
46
|
+
# :tgz requires gem archive-tar-minitar to be installed
|
47
|
+
def to_dta(out_folder=nil, compress=nil)
|
48
|
+
outdir =
|
49
|
+
if out_folder ; out_folder
|
50
|
+
else base_name
|
51
|
+
end
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
53
|
+
case compress
|
54
|
+
when :tgz
|
55
|
+
begin
|
56
|
+
require 'archive/tar/minitar'
|
57
|
+
rescue LoadError
|
58
|
+
abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
|
59
|
+
end
|
60
|
+
require 'archive/targz' # my own simplified interface!
|
61
|
+
require 'zlib'
|
62
|
+
names = index.map do |i_ar|
|
63
|
+
[outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
|
64
|
+
end
|
65
|
+
#Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
|
66
66
|
|
67
|
-
|
67
|
+
tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
|
68
68
|
|
69
|
-
|
70
|
-
|
71
|
-
|
69
|
+
Archive::Tar::Minitar::Output.open(tgz) do |outp|
|
70
|
+
dta_files.each_with_index do |dta_file, i|
|
71
|
+
Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
|
72
|
+
end
|
72
73
|
end
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
74
|
+
when :zip
|
75
|
+
begin
|
76
|
+
require 'zip/zipfilesystem'
|
77
|
+
rescue LoadError
|
78
|
+
abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
|
79
|
+
end
|
80
|
+
#begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
|
81
|
+
Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
|
82
|
+
dta_files.zip(index) do |dta,i_ar|
|
83
|
+
#zfs.mkdir(outdir)
|
84
|
+
zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
|
85
|
+
dta.write_dta_file(out)
|
86
|
+
#zfs.commit
|
87
|
+
end
|
87
88
|
end
|
88
89
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
90
|
+
else # no compression
|
91
|
+
FileUtils.mkpath(outdir)
|
92
|
+
Dir.chdir(outdir) do
|
93
|
+
dta_files.zip(index) do |dta,i_ar|
|
94
|
+
File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
|
95
|
+
dta.write_dta_file(out)
|
96
|
+
end
|
96
97
|
end
|
97
98
|
end
|
98
99
|
end
|
99
100
|
end
|
100
|
-
end
|
101
|
+
end # Search
|
102
|
+
|
103
|
+
include Search
|
101
104
|
|
102
105
|
end # Srf
|
103
106
|
end # Sequest
|
104
107
|
end # Ms
|
105
108
|
|
106
109
|
|
110
|
+
require 'optparse'
|
111
|
+
module Ms::Sequest::Srf::Search
|
112
|
+
def self.commandline(argv, progname=$0)
|
113
|
+
opt = {
|
114
|
+
:format => 'mgf'
|
115
|
+
}
|
116
|
+
opts = OptionParser.new do |op|
|
117
|
+
op.banner = "usage: #{File.basename(__FILE__)} <file>.srf ..."
|
118
|
+
op.separator "outputs: <file>.mgf ..."
|
119
|
+
op.on("-f", "--format <mgf|dta>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
|
120
|
+
op.on("-o", "--outfiles <String,...>", Array, "comma list of output files or directories") {|v| opt[:outfiles] = v }
|
121
|
+
end
|
122
|
+
|
123
|
+
opts.parse!(argv)
|
124
|
+
|
125
|
+
if argv.size == 0
|
126
|
+
puts(opts) || exit
|
127
|
+
end
|
128
|
+
|
129
|
+
format = opt[:format]
|
130
|
+
|
131
|
+
raise "if outfiles specified, needs the same number of files as input files" unless argv.size == opt[:outfiles].size
|
132
|
+
|
133
|
+
argv.each_with_index do |srf_file,i|
|
134
|
+
base = srf_file.sub(/\.srf$/i, '')
|
135
|
+
newfile =
|
136
|
+
if opt[:outfiles]
|
137
|
+
opt[:outfiles][i]
|
138
|
+
else
|
139
|
+
case format
|
140
|
+
when 'dta'
|
141
|
+
base
|
142
|
+
when 'mgf'
|
143
|
+
base << '.' << format
|
144
|
+
end
|
145
|
+
end
|
146
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
|
147
|
+
# options just speed up reading since we don't need .out info anyway
|
148
|
+
case format
|
149
|
+
when 'mgf'
|
150
|
+
srf.to_mgf(newfile)
|
151
|
+
when 'dta'
|
152
|
+
srf.to_dta(newfile)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
data/lib/ms/sequest/srf/sqt.rb
CHANGED
@@ -1,168 +1,224 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
require 'ms/calc'
|
3
3
|
require 'ms/sequest'
|
4
4
|
require 'ms/sequest/srf'
|
5
5
|
require 'ms/sequest/sqt'
|
6
6
|
|
7
|
+
|
7
8
|
module Ms
|
8
9
|
module Sequest
|
9
10
|
class Srf
|
11
|
+
|
12
|
+
module Sqt
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
# etc...
|
15
|
-
def to_sqt(out_filename=nil, opts={})
|
16
|
-
# default rounding precision (Decimal Places)
|
17
|
-
tic_dp = 2
|
18
|
-
mh_dp = 7
|
19
|
-
xcorr_dp = 5
|
20
|
-
sp_dp = 2
|
21
|
-
dcn_dp = 5
|
22
|
-
|
23
|
-
defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
|
24
|
-
opt = defaults.merge(opts)
|
25
|
-
|
26
|
-
outfile =
|
27
|
-
if out_filename
|
28
|
-
out_filename
|
29
|
-
else
|
30
|
-
base_name + '.sqt'
|
31
|
-
end
|
32
|
-
invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
|
33
|
-
fmt =
|
34
|
-
if params.fragment_mass_type == 'average' ; 'AVG'
|
35
|
-
else ; 'MONO'
|
36
|
-
end
|
37
|
-
pmt =
|
38
|
-
if params.precursor_mass_type == 'average' ; 'AVG'
|
39
|
-
else ; 'MONO'
|
40
|
-
end
|
14
|
+
def self.commandline(argv)
|
15
|
+
require 'optparse'
|
16
|
+
|
41
17
|
|
42
|
-
mass_index = params.mass_index
|
43
|
-
static_mods = params.static_mods.map do |k,v|
|
44
|
-
key = k.split(/_/)[1]
|
45
|
-
if key.size == 1
|
46
|
-
key + '=' + (mass_index[key] + v.to_f).to_s
|
47
|
-
else
|
48
|
-
key + '=' + v
|
49
|
-
end
|
50
18
|
end
|
51
19
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
20
|
+
# the out_filename will be the base_name + .sqt unless 'out_filename' is
|
21
|
+
# defined
|
22
|
+
# :round => round floating point numbers
|
23
|
+
# etc...
|
24
|
+
def to_sqt(out_filename=nil, opts={})
|
25
|
+
# default rounding precision (Decimal Places)
|
26
|
+
tic_dp = 2
|
27
|
+
mh_dp = 7
|
28
|
+
xcorr_dp = 5
|
29
|
+
sp_dp = 2
|
30
|
+
dcn_dp = 5
|
31
|
+
|
32
|
+
defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
|
33
|
+
opt = defaults.merge(opts)
|
34
|
+
|
35
|
+
outfile =
|
36
|
+
if out_filename
|
37
|
+
out_filename
|
38
|
+
else
|
39
|
+
base_name + '.sqt'
|
40
|
+
end
|
41
|
+
invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
|
42
|
+
fmt =
|
43
|
+
if params.fragment_mass_type == 'average' ; 'AVG'
|
44
|
+
else ; 'MONO'
|
45
|
+
end
|
46
|
+
pmt =
|
47
|
+
if params.precursor_mass_type == 'average' ; 'AVG'
|
48
|
+
else ; 'MONO'
|
49
|
+
end
|
50
|
+
|
51
|
+
mass_index = params.mass_index
|
52
|
+
static_mods = params.static_mods.map do |k,v|
|
53
|
+
key = k.split(/_/)[1]
|
54
|
+
if key.size == 1
|
55
|
+
key + '=' + (mass_index[key] + v.to_f).to_s
|
56
|
+
else
|
57
|
+
key + '=' + v
|
58
|
+
end
|
69
59
|
end
|
70
|
-
end
|
71
60
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
when '1' ; 'mmu'
|
76
|
-
when '2' ; 'ppm'
|
61
|
+
dynamic_mods = []
|
62
|
+
header.modifications.scan(/\((.*?)\)/) do |match|
|
63
|
+
dynamic_mods << match.first.sub(/ /,'=')
|
77
64
|
end
|
65
|
+
plural = {
|
66
|
+
'StaticMod' => static_mods,
|
67
|
+
'DynamicMod' => dynamic_mods, # example as diff mod
|
68
|
+
'Comment' => ['Created from Bioworks .srf file']
|
69
|
+
}
|
78
70
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
'Alg-PreMassTol' => params.peptide_mass_tolerance,
|
87
|
-
'Alg-FragMassTol' => params.fragment_ion_tolerance,
|
88
|
-
'Alg-PreMassUnits' => apmu, ## mine
|
89
|
-
'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
|
90
|
-
'Alg-Enzyme' => header.enzyme.split(':').last,
|
91
|
-
'Alg-MSModel' => header.model,
|
92
|
-
}
|
93
|
-
|
94
|
-
if opt[:db_info]
|
95
|
-
if File.exist?(db_filename)
|
96
|
-
reply = Ms::Sequest::Sqt.db_info(db_filename)
|
97
|
-
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
98
|
-
hh[label] = val
|
71
|
+
db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
|
72
|
+
db_filename_in_sqt = db_filename
|
73
|
+
if opt[:new_db_path]
|
74
|
+
db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
|
75
|
+
if opt[:update_db_path]
|
76
|
+
db_filename_in_sqt = File.expand_path(db_filename)
|
77
|
+
warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
|
99
78
|
end
|
100
|
-
else
|
101
|
-
warn "file #{db_filename} does not exist, no extra db info in header!"
|
102
79
|
end
|
103
|
-
end
|
104
80
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
81
|
+
apmu =
|
82
|
+
case params.peptide_mass_units
|
83
|
+
when '0' ; 'amu'
|
84
|
+
when '1' ; 'mmu'
|
85
|
+
when '2' ; 'ppm'
|
86
|
+
end
|
110
87
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
88
|
+
hh = {
|
89
|
+
'SQTGenerator' => "mspire: ms-sequest",
|
90
|
+
'SQTGeneratorVersion' => Ms::Sequest::VERSION,
|
91
|
+
'Database' => db_filename_in_sqt,
|
92
|
+
'FragmentMasses' => fmt,
|
93
|
+
'PrecursorMasses' => pmt,
|
94
|
+
'StartTime' => '', # Bioworks 3.2 also leaves this blank...
|
95
|
+
'Alg-PreMassTol' => params.peptide_mass_tolerance,
|
96
|
+
'Alg-FragMassTol' => params.fragment_ion_tolerance,
|
97
|
+
'Alg-PreMassUnits' => apmu, ## mine
|
98
|
+
'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
|
99
|
+
'Alg-Enzyme' => header.enzyme.split(':').last,
|
100
|
+
'Alg-MSModel' => header.model,
|
101
|
+
}
|
102
|
+
|
103
|
+
if opt[:db_info]
|
104
|
+
if File.exist?(db_filename)
|
105
|
+
reply = Ms::Sequest::Sqt.db_info(db_filename)
|
106
|
+
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
107
|
+
hh[label] = val
|
108
|
+
end
|
109
|
+
else
|
110
|
+
warn "file #{db_filename} does not exist, no extra db info in header!"
|
122
111
|
end
|
123
112
|
end
|
124
113
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
out_file_lowest_sp = out_file.lowest_sp
|
139
|
-
if opt[:round]
|
140
|
-
dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
|
141
|
-
out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
|
142
|
-
out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
|
114
|
+
has_hits = (self.out_files.size > 0)
|
115
|
+
if has_hits
|
116
|
+
# somewhat redundant with above, but we can get this without a db present!
|
117
|
+
hh['DBLocusCount'] = self.out_files.first.db_locus_count
|
118
|
+
end
|
119
|
+
|
120
|
+
File.open(outfile, 'w') do |out|
|
121
|
+
# print the header:
|
122
|
+
invariant_ordering.each do |iv|
|
123
|
+
out.puts ['H', iv, hh.delete(iv)].join("\t")
|
124
|
+
end
|
125
|
+
hh.each do |k,v|
|
126
|
+
out.puts ['H', k, v].join("\t")
|
143
127
|
end
|
128
|
+
plural.each do |k,vals|
|
129
|
+
vals.each do |val|
|
130
|
+
out.puts ['H', k, val].join("\t")
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
##### SPECTRA
|
135
|
+
time_to_process = '0.0'
|
136
|
+
#########################################
|
137
|
+
# NEED TO FIGURE OUT: (in spectra guy)
|
138
|
+
# * Lowest Sp value for top 500 spectra
|
139
|
+
# * Number of sequences matching this precursor ion
|
140
|
+
#########################################
|
144
141
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
142
|
+
manual_validation_status = 'U'
|
143
|
+
self.out_files.zip(dta_files) do |out_file, dta_file|
|
144
|
+
# don't have the time to process (using 0.0 like bioworks 3.2)
|
145
|
+
dta_file_mh = dta_file.mh
|
146
|
+
out_file_total_inten = out_file.total_inten
|
147
|
+
out_file_lowest_sp = out_file.lowest_sp
|
151
148
|
if opt[:round]
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
hit_sp = Ms::Calc.round(hit_sp, sp_dp)
|
149
|
+
dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
|
150
|
+
out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
|
151
|
+
out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
|
156
152
|
end
|
157
|
-
|
158
|
-
out.puts ['
|
159
|
-
|
160
|
-
|
153
|
+
|
154
|
+
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
155
|
+
out_file.hits.each_with_index do |hit,index|
|
156
|
+
hit_mh = hit.mh
|
157
|
+
hit_deltacn_orig_updated = hit.deltacn_orig_updated
|
158
|
+
hit_xcorr = hit.xcorr
|
159
|
+
hit_sp = hit.sp
|
160
|
+
if opt[:round]
|
161
|
+
hit_mh = Ms::Calc.round(hit_mh, mh_dp)
|
162
|
+
hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
|
163
|
+
hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
|
164
|
+
hit_sp = Ms::Calc.round(hit_sp, sp_dp)
|
165
|
+
end
|
166
|
+
# note that the rank is determined by the order..
|
167
|
+
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
168
|
+
hit.prots.each do |prot|
|
169
|
+
out.puts ['L', prot.first_entry].join("\t")
|
170
|
+
end
|
161
171
|
end
|
162
172
|
end
|
163
|
-
end
|
164
|
-
end #
|
165
|
-
end #
|
173
|
+
end # close the filehandle
|
174
|
+
end # method
|
175
|
+
end # Sqt
|
176
|
+
include Sqt
|
166
177
|
end # Srf
|
167
178
|
end # Sequest
|
168
179
|
end # Ms
|
180
|
+
|
181
|
+
|
182
|
+
require 'optparse'
|
183
|
+
|
184
|
+
module Ms::Sequest::Srf::Sqt
|
185
|
+
def self.commandline(argv, progname=$0)
|
186
|
+
opt = {
|
187
|
+
:filter => true
|
188
|
+
}
|
189
|
+
opts = OptionParser.new do |op|
|
190
|
+
op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
|
191
|
+
op.separator "output: <file>.sqt ..."
|
192
|
+
op.separator ""
|
193
|
+
op.separator "options:"
|
194
|
+
op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
|
195
|
+
op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
|
196
|
+
op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
|
197
|
+
op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
|
198
|
+
op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
|
199
|
+
op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
|
200
|
+
end
|
201
|
+
opts.parse!(argv)
|
202
|
+
|
203
|
+
if argv.size == 0
|
204
|
+
puts(opts) || exit
|
205
|
+
end
|
206
|
+
|
207
|
+
raise "if outfiles specified, outfiles must be same size as number of input files" unless opt[:outfiles].size == argv.size
|
208
|
+
|
209
|
+
argv.each_with_index do |srf_file,i|
|
210
|
+
outfile =
|
211
|
+
if opt[:outfiles]
|
212
|
+
opt[:outfiles][i]
|
213
|
+
else
|
214
|
+
base = srf_file.chomp(File.extname(srf_file))
|
215
|
+
base + '.sqt'
|
216
|
+
end
|
217
|
+
|
218
|
+
srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
219
|
+
srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
|
@@ -6,10 +6,10 @@ require 'fileutils'
|
|
6
6
|
require 'ms/sequest/srf'
|
7
7
|
require 'ms/sequest/srf/search'
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
10
|
+
Mgf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
|
11
|
+
Dta_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
|
12
|
+
shared 'an srf to ms2 search converter' do
|
13
13
|
|
14
14
|
def del(file)
|
15
15
|
if File.exist?(file)
|
@@ -22,8 +22,8 @@ describe 'converting a large srf to an ms search format' do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
it 'converts to mgf' do
|
25
|
-
@output =
|
26
|
-
@
|
25
|
+
@output = Mgf_output
|
26
|
+
@convert_to_mgf.call
|
27
27
|
ok File.exist?(@output)
|
28
28
|
output = IO.read(@output)
|
29
29
|
# tests are just frozen right now, not checked for accuracy
|
@@ -33,8 +33,8 @@ describe 'converting a large srf to an ms search format' do
|
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'generates .dta files' do
|
36
|
-
@output =
|
37
|
-
@
|
36
|
+
@output = Dta_output
|
37
|
+
@convert_to_dta.call
|
38
38
|
ok File.exist?(@output)
|
39
39
|
ok File.directory?(@output)
|
40
40
|
# frozen (not verified):
|
@@ -51,3 +51,24 @@ describe 'converting a large srf to an ms search format' do
|
|
51
51
|
|
52
52
|
end
|
53
53
|
|
54
|
+
|
55
|
+
describe 'converting an srf to ms2 search format: programmatic' do
|
56
|
+
@srf = Ms::Sequest::Srf.new(Srf_file)
|
57
|
+
|
58
|
+
@convert_to_mgf = lambda { @srf.to_mgf(Mgf_output) }
|
59
|
+
@convert_to_dta = lambda { @srf.to_dta(Dta_output) }
|
60
|
+
|
61
|
+
behaves_like 'an srf to ms2 search converter'
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
describe 'converting an srf to ms2 search format: commandline' do
|
66
|
+
|
67
|
+
def commandline_lambda(string)
|
68
|
+
lambda { Ms::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
|
69
|
+
end
|
70
|
+
|
71
|
+
@convert_to_mgf = commandline_lambda "#{Srf_file} -o #{Mgf_output}"
|
72
|
+
@convert_to_dta = commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
|
73
|
+
behaves_like 'an srf to ms2 search converter'
|
74
|
+
end
|
@@ -26,7 +26,7 @@ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
|
|
26
26
|
SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
|
27
27
|
|
28
28
|
|
29
|
-
|
29
|
+
SpecHelperOtherLines =<<END
|
30
30
|
S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
|
31
31
|
S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
|
32
32
|
M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
|
@@ -39,12 +39,15 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
|
|
39
39
|
L gi|90111124|ref|NP_414904.2|
|
40
40
|
END
|
41
41
|
|
42
|
-
|
42
|
+
Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
|
43
|
+
Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
shared 'an srf to sqt converter' do
|
46
|
+
|
47
|
+
before do
|
48
|
+
@original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
|
49
|
+
@output = Srf_output
|
50
|
+
end
|
48
51
|
|
49
52
|
def del(file)
|
50
53
|
if File.exist?(file)
|
@@ -83,7 +86,7 @@ describe 'converting a large srf to sqt' do
|
|
83
86
|
end
|
84
87
|
|
85
88
|
it 'converts without bothering with the database' do
|
86
|
-
@
|
89
|
+
@basic_conversion.call
|
87
90
|
ok File.exist?(@output)
|
88
91
|
lines = File.readlines(@output)
|
89
92
|
lines.size.is 80910
|
@@ -96,25 +99,9 @@ describe 'converting a large srf to sqt' do
|
|
96
99
|
del(@output)
|
97
100
|
end
|
98
101
|
|
99
|
-
|
100
|
-
# requires some knowledge of how the database file is extracted
|
101
|
-
# internally
|
102
|
-
wacky_path = '/not/a/real/path/wacky.fasta'
|
103
|
-
@srf.header.db_filename = wacky_path
|
104
|
-
my_error_string = ''
|
105
|
-
StringIO.open(my_error_string, 'w') do |strio|
|
106
|
-
$stderr = strio
|
107
|
-
@srf.to_sqt(@output, :db_info => true)
|
108
|
-
end
|
109
|
-
ok my_error_string.include?(wacky_path)
|
110
|
-
@srf.header.db_filename = @original_db_filename
|
111
|
-
$stderr = STDERR
|
112
|
-
ok File.exists?(@output)
|
113
|
-
IO.readlines(@output).size.is 80910
|
114
|
-
del(@output)
|
115
|
-
end
|
102
|
+
|
116
103
|
it 'can get db info with correct path' do
|
117
|
-
@
|
104
|
+
@with_new_db_path.call
|
118
105
|
ok File.exist?(@output)
|
119
106
|
lines = IO.readlines(@output)
|
120
107
|
has_md5 = lines.any? do |line|
|
@@ -130,8 +117,9 @@ describe 'converting a large srf to sqt' do
|
|
130
117
|
lines.size.is 80912
|
131
118
|
del(@output)
|
132
119
|
end
|
120
|
+
|
133
121
|
it 'can update the Database' do
|
134
|
-
@
|
122
|
+
@update_the_db_path.call
|
135
123
|
regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
|
136
124
|
updated_db = IO.readlines(@output).any? do |line|
|
137
125
|
line =~ regexp
|
@@ -139,4 +127,53 @@ describe 'converting a large srf to sqt' do
|
|
139
127
|
ok updated_db
|
140
128
|
del(@output)
|
141
129
|
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "programmatic interface srf to sqt" do
|
134
|
+
|
135
|
+
@srf = Ms::Sequest::Srf.new(Srf_file)
|
136
|
+
|
137
|
+
@basic_conversion = lambda { @srf.to_sqt(Srf_output) }
|
138
|
+
@with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
|
139
|
+
@update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
|
140
|
+
|
141
|
+
before do
|
142
|
+
@output = Srf_output
|
143
|
+
end
|
144
|
+
|
145
|
+
behaves_like "an srf to sqt converter"
|
146
|
+
|
147
|
+
# this requires programmatic interface to manipulate the object for this
|
148
|
+
# test
|
149
|
+
it 'warns if the db path is incorrect and we want to update db info' do
|
150
|
+
# requires some knowledge of how the database file is extracted
|
151
|
+
# internally
|
152
|
+
wacky_path = '/not/a/real/path/wacky.fasta'
|
153
|
+
@srf.header.db_filename = wacky_path
|
154
|
+
my_error_string = ''
|
155
|
+
StringIO.open(my_error_string, 'w') do |strio|
|
156
|
+
$stderr = strio
|
157
|
+
@srf.to_sqt(@output, :db_info => true)
|
158
|
+
end
|
159
|
+
ok my_error_string.include?(wacky_path)
|
160
|
+
@srf.header.db_filename = @original_db_filename
|
161
|
+
$stderr = STDERR
|
162
|
+
ok File.exists?(@output)
|
163
|
+
IO.readlines(@output).size.is 80910
|
164
|
+
del(@output)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
describe "command-line interface srf to sqt" do
|
169
|
+
def commandline_lambda(string)
|
170
|
+
lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
|
171
|
+
end
|
172
|
+
|
173
|
+
base_cmd = "#{Srf_file} -o #{Srf_output}"
|
174
|
+
@basic_conversion = commandline_lambda(base_cmd)
|
175
|
+
@with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
|
176
|
+
@update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
|
177
|
+
|
178
|
+
behaves_like "an srf to sqt converter"
|
142
179
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,27 +2,9 @@
|
|
2
2
|
require 'rubygems'
|
3
3
|
require 'spec/more'
|
4
4
|
|
5
|
-
# This is already defined in our module
|
6
|
-
#TESTFILES = File.expand_path(File.dirname(__FILE__)) + '/testfiles'
|
7
|
-
|
8
5
|
Bacon.summary_on_exit
|
9
6
|
|
10
|
-
#
|
11
|
-
# class Context
|
12
|
-
# def hash_match(hash, obj)
|
13
|
-
# hash.each do |k,v|
|
14
|
-
# if v.is_a?(Hash)
|
15
|
-
# hash_match(v, obj.send(k.to_sym))
|
16
|
-
# else
|
17
|
-
# puts "#{k}: #{v} but was #{obj.send(k.to_sym)}" if obj.send(k.to_sym) != v
|
18
|
-
# obj.send(k.to_sym).should.equal v
|
19
|
-
# end
|
20
|
-
# end
|
21
|
-
# end
|
22
|
-
# end
|
23
|
-
#end
|
24
|
-
|
25
|
-
|
7
|
+
# is this already defined??
|
26
8
|
TESTFILES = File.expand_path(File.dirname(__FILE__)) + "/testfiles"
|
27
9
|
|
28
10
|
begin
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 14
|
9
|
+
version: 0.0.14
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John T. Prince
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-08-
|
17
|
+
date: 2010-08-24 00:00:00 -06:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -134,7 +134,7 @@ files:
|
|
134
134
|
- spec/testfiles/small.sqt
|
135
135
|
- spec/testfiles/small2.sqt
|
136
136
|
has_rdoc: true
|
137
|
-
homepage: http://
|
137
|
+
homepage: http://github.com/jtprince/ms-sequest
|
138
138
|
licenses: []
|
139
139
|
|
140
140
|
post_install_message:
|