mspire-sequest 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'mspire/sequest'
|
2
|
+
require 'mspire/sequest/srf'
|
3
|
+
require 'mspire/sequest/sqt'
|
4
|
+
|
5
|
+
|
6
|
+
module Mspire
|
7
|
+
module Sequest
|
8
|
+
class Srf
|
9
|
+
|
10
|
+
module Sqt
|
11
|
+
|
12
|
+
# the out_filename will be the base_name + .sqt unless 'out_filename' is
|
13
|
+
# defined
|
14
|
+
# :round => round floating point numbers
|
15
|
+
# etc...
|
16
|
+
def to_sqt(out_filename=nil, opts={})
|
17
|
+
# default rounding precision (Decimal Places)
|
18
|
+
tic_dp = 2
|
19
|
+
mh_dp = 7
|
20
|
+
xcorr_dp = 5
|
21
|
+
sp_dp = 2
|
22
|
+
dcn_dp = 5
|
23
|
+
|
24
|
+
defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
|
25
|
+
opt = defaults.merge(opts)
|
26
|
+
|
27
|
+
outfile =
|
28
|
+
if out_filename
|
29
|
+
out_filename
|
30
|
+
else
|
31
|
+
base_name + '.sqt'
|
32
|
+
end
|
33
|
+
invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
|
34
|
+
fmt =
|
35
|
+
if params.fragment_mass_type == 'average' ; 'AVG'
|
36
|
+
else ; 'MONO'
|
37
|
+
end
|
38
|
+
pmt =
|
39
|
+
if params.precursor_mass_type == 'average' ; 'AVG'
|
40
|
+
else ; 'MONO'
|
41
|
+
end
|
42
|
+
|
43
|
+
mass_index = params.mass_index
|
44
|
+
static_mods = params.static_mods.map do |k,v|
|
45
|
+
key = k.split(/_/)[1]
|
46
|
+
if key.size == 1
|
47
|
+
key + '=' + (mass_index[key] + v.to_f).to_s
|
48
|
+
else
|
49
|
+
key + '=' + v
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
dynamic_mods = []
|
54
|
+
header.modifications.scan(/\((.*?)\)/) do |match|
|
55
|
+
dynamic_mods << match.first.sub(/ /,'=')
|
56
|
+
end
|
57
|
+
plural = {
|
58
|
+
'StaticMod' => static_mods,
|
59
|
+
'DynamicMod' => dynamic_mods, # example as diff mod
|
60
|
+
'Comment' => ['Created from Bioworks .srf file']
|
61
|
+
}
|
62
|
+
|
63
|
+
db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
|
64
|
+
db_filename_in_sqt = db_filename
|
65
|
+
if opt[:new_db_path]
|
66
|
+
db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
|
67
|
+
if opt[:update_db_path]
|
68
|
+
db_filename_in_sqt = File.expand_path(db_filename)
|
69
|
+
warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
apmu =
|
74
|
+
case params.peptide_mass_units
|
75
|
+
when '0' ; 'amu'
|
76
|
+
when '1' ; 'mmu'
|
77
|
+
when '2' ; 'ppm'
|
78
|
+
end
|
79
|
+
|
80
|
+
hh = {
|
81
|
+
'SQTGenerator' => "mspire: ms-sequest",
|
82
|
+
'SQTGeneratorVersion' => Mspire::Sequest::VERSION,
|
83
|
+
'Database' => db_filename_in_sqt,
|
84
|
+
'FragmentMasses' => fmt,
|
85
|
+
'PrecursorMasses' => pmt,
|
86
|
+
'StartTime' => '', # Bioworks 3.2 also leaves this blank...
|
87
|
+
'Alg-PreMassTol' => params.peptide_mass_tolerance,
|
88
|
+
'Alg-FragMassTol' => params.fragment_ion_tolerance,
|
89
|
+
'Alg-PreMassUnits' => apmu, ## mine
|
90
|
+
'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
|
91
|
+
'Alg-Enzyme' => header.enzyme.split(':').last,
|
92
|
+
'Alg-MSModel' => header.model,
|
93
|
+
}
|
94
|
+
|
95
|
+
if opt[:db_info]
|
96
|
+
if File.exist?(db_filename)
|
97
|
+
reply = Mspire::Sequest::Sqt.db_info(db_filename)
|
98
|
+
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
99
|
+
hh[label] = val
|
100
|
+
end
|
101
|
+
else
|
102
|
+
warn "file #{db_filename} does not exist, no extra db info in header!"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
has_hits = (self.out_files.size > 0)
|
107
|
+
if has_hits
|
108
|
+
# somewhat redundant with above, but we can get this without a db present!
|
109
|
+
hh['DBLocusCount'] = self.out_files.first.db_locus_count
|
110
|
+
end
|
111
|
+
|
112
|
+
File.open(outfile, 'w') do |out|
|
113
|
+
# print the header:
|
114
|
+
invariant_ordering.each do |iv|
|
115
|
+
out.puts ['H', iv, hh.delete(iv)].join("\t")
|
116
|
+
end
|
117
|
+
hh.each do |k,v|
|
118
|
+
out.puts ['H', k, v].join("\t")
|
119
|
+
end
|
120
|
+
plural.each do |k,vals|
|
121
|
+
vals.each do |val|
|
122
|
+
out.puts ['H', k, val].join("\t")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
##### SPECTRA
|
127
|
+
time_to_process = '0.0'
|
128
|
+
#########################################
|
129
|
+
# NEED TO FIGURE OUT: (in spectra guy)
|
130
|
+
# * Lowest Sp value for top 500 spectra
|
131
|
+
# * Number of sequences matching this precursor ion
|
132
|
+
#########################################
|
133
|
+
|
134
|
+
manual_validation_status = 'U'
|
135
|
+
self.out_files.zip(dta_files) do |out_file, dta_file|
|
136
|
+
# don't have the time to process (using 0.0 like bioworks 3.2)
|
137
|
+
dta_file_mh = dta_file.mh
|
138
|
+
out_file_total_inten = out_file.total_inten
|
139
|
+
out_file_lowest_sp = out_file.lowest_sp
|
140
|
+
if opt[:round]
|
141
|
+
dta_file_mh = dta_file_mh.round(mh_dp)
|
142
|
+
out_file_total_inten = out_file_total_inten.round(tic_dp)
|
143
|
+
out_file_lowest_sp = out_file_lowest_sp.round(sp_dp)
|
144
|
+
end
|
145
|
+
|
146
|
+
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
147
|
+
out_file.hits.each_with_index do |hit,index|
|
148
|
+
hit_mh = hit.mh
|
149
|
+
hit_deltacn_orig_updated = hit.deltacn_orig_updated
|
150
|
+
hit_xcorr = hit.xcorr
|
151
|
+
hit_sp = hit.sp
|
152
|
+
if opt[:round]
|
153
|
+
hit_mh = hit_mh.round(mh_dp)
|
154
|
+
hit_deltacn_orig_updated = hit_deltacn_orig_updated.round(dcn_dp)
|
155
|
+
hit_xcorr = hit_xcorr.round(xcorr_dp)
|
156
|
+
hit_sp = hit_sp.round(sp_dp)
|
157
|
+
end
|
158
|
+
# note that the rank is determined by the order..
|
159
|
+
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
160
|
+
hit.proteins.each do |prot|
|
161
|
+
out.puts ['L', prot.first_entry].join("\t")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end # close the filehandle
|
166
|
+
end # method
|
167
|
+
end # Sqt
|
168
|
+
include Sqt
|
169
|
+
end # Srf
|
170
|
+
end # Sequest
|
171
|
+
end # MS
|
172
|
+
|
173
|
+
|
174
|
+
require 'optparse'
|
175
|
+
|
176
|
+
module Mspire::Sequest::Srf::Sqt
|
177
|
+
def self.commandline(argv, progname=$0)
|
178
|
+
opt = {
|
179
|
+
:filter => true
|
180
|
+
}
|
181
|
+
opts = OptionParser.new do |op|
|
182
|
+
op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
|
183
|
+
op.separator "output: <file>.sqt ..."
|
184
|
+
op.separator ""
|
185
|
+
op.separator "options:"
|
186
|
+
op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
|
187
|
+
op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
|
188
|
+
op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
|
189
|
+
op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
|
190
|
+
op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
|
191
|
+
op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
|
192
|
+
end
|
193
|
+
opts.parse!(argv)
|
194
|
+
|
195
|
+
if argv.size == 0
|
196
|
+
puts(opts) || exit
|
197
|
+
end
|
198
|
+
|
199
|
+
if opt[:outfiles] && (opt[:outfiles].size != argv.size)
|
200
|
+
raise "if outfiles specified, outfiles must be same size as number of input files"
|
201
|
+
end
|
202
|
+
|
203
|
+
argv.each_with_index do |srf_file,i|
|
204
|
+
outfile =
|
205
|
+
if opt[:outfiles]
|
206
|
+
opt[:outfiles][i]
|
207
|
+
else
|
208
|
+
base = srf_file.chomp(File.extname(srf_file))
|
209
|
+
base + '.sqt'
|
210
|
+
end
|
211
|
+
|
212
|
+
srf = Mspire::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
213
|
+
srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
|