mspire-sequest 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +30 -0
- data/.gitmodules +9 -0
- data/History +79 -0
- data/LICENSE +22 -0
- data/README.rdoc +85 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/bin/srf_to_pepxml.rb +7 -0
- data/bin/srf_to_search.rb +7 -0
- data/bin/srf_to_sqt.rb +8 -0
- data/lib/mspire/sequest/params.rb +331 -0
- data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
- data/lib/mspire/sequest/pepxml/params.rb +32 -0
- data/lib/mspire/sequest/sqt.rb +393 -0
- data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
- data/lib/mspire/sequest/srf/pepxml.rb +333 -0
- data/lib/mspire/sequest/srf/search.rb +158 -0
- data/lib/mspire/sequest/srf/sqt.rb +218 -0
- data/lib/mspire/sequest/srf.rb +715 -0
- data/lib/mspire/sequest.rb +6 -0
- data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
- data/spec/mspire/sequest/params_spec.rb +135 -0
- data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
- data/spec/mspire/sequest/pepxml_spec.rb +311 -0
- data/spec/mspire/sequest/sqt_spec.rb +51 -0
- data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
- data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
- data/spec/mspire/sequest/srf/search_spec.rb +131 -0
- data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
- data/spec/mspire/sequest/srf_spec.rb +113 -0
- data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/spec/testfiles/bioworks31.params +77 -0
- data/spec/testfiles/bioworks32.params +62 -0
- data/spec/testfiles/bioworks33.params +63 -0
- data/spec/testfiles/corrupted_900.srf +0 -0
- data/spec/testfiles/small.sqt +87 -0
- data/spec/testfiles/small2.sqt +176 -0
- metadata +185 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
require 'mspire/sequest'
|
2
|
+
require 'mspire/sequest/srf'
|
3
|
+
require 'mspire/sequest/sqt'
|
4
|
+
|
5
|
+
|
6
|
+
module Mspire
|
7
|
+
module Sequest
|
8
|
+
class Srf
|
9
|
+
|
10
|
+
module Sqt
|
11
|
+
|
12
|
+
# the out_filename will be the base_name + .sqt unless 'out_filename' is
|
13
|
+
# defined
|
14
|
+
# :round => round floating point numbers
|
15
|
+
# etc...
|
16
|
+
def to_sqt(out_filename=nil, opts={})
|
17
|
+
# default rounding precision (Decimal Places)
|
18
|
+
tic_dp = 2
|
19
|
+
mh_dp = 7
|
20
|
+
xcorr_dp = 5
|
21
|
+
sp_dp = 2
|
22
|
+
dcn_dp = 5
|
23
|
+
|
24
|
+
defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
|
25
|
+
opt = defaults.merge(opts)
|
26
|
+
|
27
|
+
outfile =
|
28
|
+
if out_filename
|
29
|
+
out_filename
|
30
|
+
else
|
31
|
+
base_name + '.sqt'
|
32
|
+
end
|
33
|
+
invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
|
34
|
+
fmt =
|
35
|
+
if params.fragment_mass_type == 'average' ; 'AVG'
|
36
|
+
else ; 'MONO'
|
37
|
+
end
|
38
|
+
pmt =
|
39
|
+
if params.precursor_mass_type == 'average' ; 'AVG'
|
40
|
+
else ; 'MONO'
|
41
|
+
end
|
42
|
+
|
43
|
+
mass_index = params.mass_index
|
44
|
+
static_mods = params.static_mods.map do |k,v|
|
45
|
+
key = k.split(/_/)[1]
|
46
|
+
if key.size == 1
|
47
|
+
key + '=' + (mass_index[key] + v.to_f).to_s
|
48
|
+
else
|
49
|
+
key + '=' + v
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
dynamic_mods = []
|
54
|
+
header.modifications.scan(/\((.*?)\)/) do |match|
|
55
|
+
dynamic_mods << match.first.sub(/ /,'=')
|
56
|
+
end
|
57
|
+
plural = {
|
58
|
+
'StaticMod' => static_mods,
|
59
|
+
'DynamicMod' => dynamic_mods, # example as diff mod
|
60
|
+
'Comment' => ['Created from Bioworks .srf file']
|
61
|
+
}
|
62
|
+
|
63
|
+
db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
|
64
|
+
db_filename_in_sqt = db_filename
|
65
|
+
if opt[:new_db_path]
|
66
|
+
db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
|
67
|
+
if opt[:update_db_path]
|
68
|
+
db_filename_in_sqt = File.expand_path(db_filename)
|
69
|
+
warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
apmu =
|
74
|
+
case params.peptide_mass_units
|
75
|
+
when '0' ; 'amu'
|
76
|
+
when '1' ; 'mmu'
|
77
|
+
when '2' ; 'ppm'
|
78
|
+
end
|
79
|
+
|
80
|
+
hh = {
|
81
|
+
'SQTGenerator' => "mspire: ms-sequest",
|
82
|
+
'SQTGeneratorVersion' => Mspire::Sequest::VERSION,
|
83
|
+
'Database' => db_filename_in_sqt,
|
84
|
+
'FragmentMasses' => fmt,
|
85
|
+
'PrecursorMasses' => pmt,
|
86
|
+
'StartTime' => '', # Bioworks 3.2 also leaves this blank...
|
87
|
+
'Alg-PreMassTol' => params.peptide_mass_tolerance,
|
88
|
+
'Alg-FragMassTol' => params.fragment_ion_tolerance,
|
89
|
+
'Alg-PreMassUnits' => apmu, ## mine
|
90
|
+
'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
|
91
|
+
'Alg-Enzyme' => header.enzyme.split(':').last,
|
92
|
+
'Alg-MSModel' => header.model,
|
93
|
+
}
|
94
|
+
|
95
|
+
if opt[:db_info]
|
96
|
+
if File.exist?(db_filename)
|
97
|
+
reply = Mspire::Sequest::Sqt.db_info(db_filename)
|
98
|
+
%w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
|
99
|
+
hh[label] = val
|
100
|
+
end
|
101
|
+
else
|
102
|
+
warn "file #{db_filename} does not exist, no extra db info in header!"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
has_hits = (self.out_files.size > 0)
|
107
|
+
if has_hits
|
108
|
+
# somewhat redundant with above, but we can get this without a db present!
|
109
|
+
hh['DBLocusCount'] = self.out_files.first.db_locus_count
|
110
|
+
end
|
111
|
+
|
112
|
+
File.open(outfile, 'w') do |out|
|
113
|
+
# print the header:
|
114
|
+
invariant_ordering.each do |iv|
|
115
|
+
out.puts ['H', iv, hh.delete(iv)].join("\t")
|
116
|
+
end
|
117
|
+
hh.each do |k,v|
|
118
|
+
out.puts ['H', k, v].join("\t")
|
119
|
+
end
|
120
|
+
plural.each do |k,vals|
|
121
|
+
vals.each do |val|
|
122
|
+
out.puts ['H', k, val].join("\t")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
##### SPECTRA
|
127
|
+
time_to_process = '0.0'
|
128
|
+
#########################################
|
129
|
+
# NEED TO FIGURE OUT: (in spectra guy)
|
130
|
+
# * Lowest Sp value for top 500 spectra
|
131
|
+
# * Number of sequences matching this precursor ion
|
132
|
+
#########################################
|
133
|
+
|
134
|
+
manual_validation_status = 'U'
|
135
|
+
self.out_files.zip(dta_files) do |out_file, dta_file|
|
136
|
+
# don't have the time to process (using 0.0 like bioworks 3.2)
|
137
|
+
dta_file_mh = dta_file.mh
|
138
|
+
out_file_total_inten = out_file.total_inten
|
139
|
+
out_file_lowest_sp = out_file.lowest_sp
|
140
|
+
if opt[:round]
|
141
|
+
dta_file_mh = dta_file_mh.round(mh_dp)
|
142
|
+
out_file_total_inten = out_file_total_inten.round(tic_dp)
|
143
|
+
out_file_lowest_sp = out_file_lowest_sp.round(sp_dp)
|
144
|
+
end
|
145
|
+
|
146
|
+
out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
|
147
|
+
out_file.hits.each_with_index do |hit,index|
|
148
|
+
hit_mh = hit.mh
|
149
|
+
hit_deltacn_orig_updated = hit.deltacn_orig_updated
|
150
|
+
hit_xcorr = hit.xcorr
|
151
|
+
hit_sp = hit.sp
|
152
|
+
if opt[:round]
|
153
|
+
hit_mh = hit_mh.round(mh_dp)
|
154
|
+
hit_deltacn_orig_updated = hit_deltacn_orig_updated.round(dcn_dp)
|
155
|
+
hit_xcorr = hit_xcorr.round(xcorr_dp)
|
156
|
+
hit_sp = hit_sp.round(sp_dp)
|
157
|
+
end
|
158
|
+
# note that the rank is determined by the order..
|
159
|
+
out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
|
160
|
+
hit.proteins.each do |prot|
|
161
|
+
out.puts ['L', prot.first_entry].join("\t")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end # close the filehandle
|
166
|
+
end # method
|
167
|
+
end # Sqt
|
168
|
+
include Sqt
|
169
|
+
end # Srf
|
170
|
+
end # Sequest
|
171
|
+
end # MS
|
172
|
+
|
173
|
+
|
174
|
+
require 'optparse'
|
175
|
+
|
176
|
+
module Mspire::Sequest::Srf::Sqt
|
177
|
+
def self.commandline(argv, progname=$0)
|
178
|
+
opt = {
|
179
|
+
:filter => true
|
180
|
+
}
|
181
|
+
opts = OptionParser.new do |op|
|
182
|
+
op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
|
183
|
+
op.separator "output: <file>.sqt ..."
|
184
|
+
op.separator ""
|
185
|
+
op.separator "options:"
|
186
|
+
op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
|
187
|
+
op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
|
188
|
+
op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
|
189
|
+
op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
|
190
|
+
op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
|
191
|
+
op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
|
192
|
+
end
|
193
|
+
opts.parse!(argv)
|
194
|
+
|
195
|
+
if argv.size == 0
|
196
|
+
puts(opts) || exit
|
197
|
+
end
|
198
|
+
|
199
|
+
if opt[:outfiles] && (opt[:outfiles].size != argv.size)
|
200
|
+
raise "if outfiles specified, outfiles must be same size as number of input files"
|
201
|
+
end
|
202
|
+
|
203
|
+
argv.each_with_index do |srf_file,i|
|
204
|
+
outfile =
|
205
|
+
if opt[:outfiles]
|
206
|
+
opt[:outfiles][i]
|
207
|
+
else
|
208
|
+
base = srf_file.chomp(File.extname(srf_file))
|
209
|
+
base + '.sqt'
|
210
|
+
end
|
211
|
+
|
212
|
+
srf = Mspire::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
|
213
|
+
srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
|