mspire-sequest 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/.autotest +30 -0
  2. data/.gitmodules +9 -0
  3. data/History +79 -0
  4. data/LICENSE +22 -0
  5. data/README.rdoc +85 -0
  6. data/Rakefile +52 -0
  7. data/VERSION +1 -0
  8. data/bin/srf_to_pepxml.rb +7 -0
  9. data/bin/srf_to_search.rb +7 -0
  10. data/bin/srf_to_sqt.rb +8 -0
  11. data/lib/mspire/sequest/params.rb +331 -0
  12. data/lib/mspire/sequest/pepxml/modifications.rb +247 -0
  13. data/lib/mspire/sequest/pepxml/params.rb +32 -0
  14. data/lib/mspire/sequest/sqt.rb +393 -0
  15. data/lib/mspire/sequest/srf/pepxml/sequest.rb +21 -0
  16. data/lib/mspire/sequest/srf/pepxml.rb +333 -0
  17. data/lib/mspire/sequest/srf/search.rb +158 -0
  18. data/lib/mspire/sequest/srf/sqt.rb +218 -0
  19. data/lib/mspire/sequest/srf.rb +715 -0
  20. data/lib/mspire/sequest.rb +6 -0
  21. data/script/fasta_ipi_to_ncbi-ish.rb +29 -0
  22. data/spec/mspire/sequest/params_spec.rb +135 -0
  23. data/spec/mspire/sequest/pepxml/modifications_spec.rb +50 -0
  24. data/spec/mspire/sequest/pepxml_spec.rb +311 -0
  25. data/spec/mspire/sequest/sqt_spec.rb +51 -0
  26. data/spec/mspire/sequest/sqt_spec_helper.rb +34 -0
  27. data/spec/mspire/sequest/srf/pepxml_spec.rb +89 -0
  28. data/spec/mspire/sequest/srf/search_spec.rb +131 -0
  29. data/spec/mspire/sequest/srf/sqt_spec.rb +228 -0
  30. data/spec/mspire/sequest/srf_spec.rb +113 -0
  31. data/spec/mspire/sequest/srf_spec_helper.rb +172 -0
  32. data/spec/spec_helper.rb +22 -0
  33. data/spec/testfiles/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  34. data/spec/testfiles/bioworks31.params +77 -0
  35. data/spec/testfiles/bioworks32.params +62 -0
  36. data/spec/testfiles/bioworks33.params +63 -0
  37. data/spec/testfiles/corrupted_900.srf +0 -0
  38. data/spec/testfiles/small.sqt +87 -0
  39. data/spec/testfiles/small2.sqt +176 -0
  40. metadata +185 -0
@@ -0,0 +1,333 @@
1
+ require 'mspire/ident/pepxml'
2
+ require 'mspire/sequest/pepxml/modifications'
3
+ require 'mspire/ident/pepxml/spectrum_query'
4
+ require 'mspire/ident/pepxml/search_result'
5
+ require 'mspire/ident/pepxml/search_hit'
6
+ #require 'mspire/msrun'
7
+ require 'mspire/sequest/srf'
8
+ #require 'mspire/sequest/pepxml'
9
+
10
+ class Mspire::Sequest::Srf
11
+ module Pepxml
12
+
13
+ # A hash with the following *symbol* keys may be set:
14
+ #
15
+ # Run Info
16
+ # *:ms_model*:: nil
17
+ # *:ms_ionization*:: 'ESI'
18
+ # *:ms_detector*:: 'UNKNOWN'
19
+ # *:ms_mass_analyzer*:: nil - <i>typically extracted from the srf file and matched with <b>ModelToMsAnalyzer</b></i>
20
+ # *:ms_manufacturer*:: 'Thermo'
21
+ #
22
+ # Raw data
23
+ # *:mz_dir*:: nil - <i>path to the mz[X]ML directory, defaults to the directory the srf file is contained in. mz[X]ML data must be available to embed retention times</i>
24
+ # *:raw_data*:: \['.mzML', '.mzXML'\] - <i>preferred extension for raw data</i>
25
+ #
26
+ # Database
27
+ # *:db_seq_type*:: 'AA' - <i>AA or NA</i>
28
+ # *:db_dir*:: nil - <i>the directory the fasta file used for the search is housed in. A valid pepxml file must point to a valid fasta file!</i>
29
+ # *:db_residue_size*:: nil - <i>An integer for the number of residues in the database. if true, calculates the size of the fasta database.</i>
30
+ # *:db_name:: nil
31
+ # *:db_orig_url*:: nil
32
+ # *:db_release_date*:: nil
33
+ # *:db_release_id*:: nil
34
+ #
35
+ # Search Hits
36
+ # *:num_hits*:: 1 - <i>the top number of hits to include</i>
37
+ # *:retention_times*:: false - <i>include retention times in the file (requires mz_dir to be set)</i>
38
+ # *:deltacn_orig*:: false - <i>when true, the original SEQUEST deltacn values are used. If false, Bioworks deltacn values are used which are derived by taking the original deltacn of the following hit. This gives the top ranking hit an informative deltacn but makes the deltacn meaningless for other hits.</i>
39
+ #
40
+ # *:pepxml_version*:: Mspire::Ident::Pepxml::DEFAULT_PEPXML_VERSION, - <i>Integer to set the pepxml version. The converter and xml output attempts to produce xml specific to the version.</i>
41
+ # *:verbose*:: true - <i>set to false to quiet warnings</i>
42
+ DEFAULT_OPTIONS = {
43
+ :ms_model => nil,
44
+ :ms_ionization => 'ESI',
45
+ :ms_detector => 'UNKNOWN',
46
+ :ms_mass_analyzer => nil,
47
+ :ms_manufacturer => 'Thermo',
48
+
49
+ :mz_dir => nil,
50
+ #:raw_data => [".mzXML", '.mzML'],
51
+ :raw_data => ['.mzML', '.mzXML'],
52
+
53
+ :db_seq_type => 'AA',
54
+ :db_dir => nil,
55
+ :db_residue_size => nil,
56
+ :db_name => nil,
57
+ :db_orig_url => nil,
58
+ :db_release_date => nil,
59
+ :db_release_id => nil,
60
+
61
+ :num_hits => 1,
62
+ :retention_times => false,
63
+ :deltacn_orig => false,
64
+
65
+ :pepxml_version => Mspire::Ident::Pepxml::DEFAULT_PEPXML_VERSION,
66
+ :verbose => true,
67
+ }
68
+
69
+ # An array of regexp to string pairs. The regexps are matched against the
70
+ # model (srf.header.model) and the corresponding string will be used as
71
+ # the mass analyzer.
72
+ #
73
+ # /Orbitrap/:: 'Orbitrap'
74
+ # /LCQ Deca XP/:: 'Ion Trap'
75
+ # /LTQ/:: 'Ion Trap'
76
+ # /\w+/:: 'UNKNOWN'
77
+ ModelToMsAnalyzer = [
78
+ [/Orbitrap/, 'Orbitrap'],
79
+ [/LCQ Deca XP/, 'Ion Trap'],
80
+ [/LTQ/, 'Ion Trap'],
81
+ [/\w+/, 'UNKNOWN'],
82
+ ]
83
+
84
+ # returns an Mspire::Ident::Pepxml object. See that object for creating an
85
+ # xml string or writing to file.
86
+ def to_pepxml(opts={})
87
+ opt = DEFAULT_OPTIONS.merge(opts)
88
+ srf = self
89
+
90
+ # with newer pepxml version these are not required anymore
91
+ hidden_opts = {
92
+ # format of file storing the runner up peptides (if not present in
93
+ # pepXML) this was made optional after version 19
94
+ :out_data_type => "out", ## may be srf??
95
+ # runner up search hit data type extension (e.g. .tgz)
96
+ :out_data => ".srf",
97
+ }
98
+ opt.merge!(hidden_opts)
99
+
100
+ params = srf.params
101
+ header = srf.header
102
+
103
+ opt[:ms_model] ||= srf.header.model
104
+
105
+ unless opt[:ms_mass_analyzer]
106
+ ModelToMsAnalyzer.each do |regexp, val|
107
+ if opt[:ms_model].match(regexp)
108
+ opt[:ms_mass_analyzer] = val
109
+ break
110
+ end
111
+ end
112
+ end
113
+
114
+ # get the database name
115
+ db_filename = header.db_filename.sub(/\.hdr$/, '')
116
+ if opt[:db_dir]
117
+ db_filename = File.join(opt[:db_dir], db_filename.split(/[\/\\]+/).last)
118
+ end
119
+ if File.exist?(db_filename)
120
+ db_filename = File.expand_path(db_filename)
121
+ else
122
+ msg = ["!!! WARNING !!!"]
123
+ msg << "!!! Can't find database: #{db_filename}"
124
+ msg << "!!! pepxml *requires* that the db path be valid"
125
+ msg << "!!! make sure 1) the fasta file is available on this system"
126
+ msg << "!!! 2) you've specified a valid directory with --db-dir (or :db_dir)"
127
+ puts msg.join("\n") if opt[:verbose]
128
+ end
129
+
130
+ modifications_obj = Mspire::Sequest::Pepxml::Modifications.new(params, srf.header.modifications)
131
+ mass_index = params.mass_index(:precursor)
132
+ h_plus = mass_index['h+']
133
+
134
+ opt[:mz_dir] ||= srf.resident_dir
135
+ found_ext = opt[:raw_data].find do |raw_data|
136
+ Dir[File.join(opt[:mz_dir], srf.base_name_noext + raw_data)].first
137
+ end
138
+ opt[:raw_data] = [found_ext] if found_ext
139
+
140
+ scan_to_ret_time =
141
+ if opt[:retention_times]
142
+ raise NotImplementedError, "will implement shortly"
143
+ #mz_file = Dir[File.join(opt[:mz_dir], srf.base_name_noext + opt[:raw_data].first)].first
144
+ #if mz_file
145
+ # Mspire::Msrun.scans_to_times(mz_file)
146
+ #else
147
+ # warn "turning retention_times off since no valid mz[X]ML file was found!!!"
148
+ # opt[:retention_times] = false
149
+ # nil
150
+ #end
151
+ end
152
+
153
+ summary_xml_filename = srf.base_name_noext + '.xml'
154
+
155
+ pepxml = Mspire::Ident::Pepxml.new do |msms_pipeline_analysis|
156
+ msms_pipeline_analysis.merge!(:summary_xml => summary_xml_filename, :pepxml_version => opt[:pepxml_version]) do |msms_run_summary|
157
+ # prep the sample enzyme and search_summary
158
+ msms_run_summary.merge!(
159
+ :base_name => File.join(opt[:mz_dir], srf.base_name_noext),
160
+ :ms_manufacturer => opt[:ms_manufacturer],
161
+ :ms_model => opt[:ms_model],
162
+ :ms_ionization => opt[:ms_ionization],
163
+ :ms_mass_analyzer => opt[:ms_mass_analyzer],
164
+ :ms_detector => opt[:ms_detector],
165
+ :raw_data => opt[:raw_data].first,
166
+ :raw_data_type => opt[:raw_data].first,
167
+ ) do |sample_enzyme, search_summary, spectrum_queries|
168
+ enzyme_data = Hash[ [:offset, :cut, :no_cut].zip(params.enzyme_specificity) ]
169
+ # if the offset is 1, it is C terminal, offset == 0 then it is N
170
+ # terminal
171
+ enzyme_data[:sense] =
172
+ case enzyme_data.delete(:offset)
173
+ when 1 ; 'C'
174
+ when 0 ; 'N'
175
+ else
176
+ raise "pepxml cannot deal with enzymes that don't have an offset of 0 or 1"
177
+ end
178
+
179
+ enzyme_data[:name] = params.enzyme
180
+ sample_enzyme.merge!( enzyme_data )
181
+ sample_enzyme.name = opt[:enzyme] if opt[:enzyme]
182
+ search_summary.merge!(
183
+ :base_name=> srf.resident_dir + '/' + srf.base_name_noext,
184
+ :search_engine => 'SEQUEST',
185
+ :precursor_mass_type => params.precursor_mass_type,
186
+ :fragment_mass_type => params.fragment_mass_type,
187
+ :out_data_type => opt[:out_data_type],
188
+ :out_data => opt[:out_data],
189
+ ) do |search_database, enzymatic_search_constraint, modifications_ar, parameters_hash|
190
+ search_database.merge!(:local_path => db_filename, :seq_type => opt[:db_seq_type], :database_name => opt[:db_name], :orig_database_url => opt[:db_orig_url], :database_release_date => opt[:db_release_date], :database_release_identifier => opt[:db_release_id])
191
+
192
+ case opt[:db_residue_size]
193
+ when Integer
194
+ search_database.size_of_residues = opt[:db_residue_size]
195
+ when true
196
+ search_database.set_size_of_residues!
197
+ end
198
+
199
+ enzymatic_search_constraint.merge!(
200
+ :enzyme => opt[:enzyme] ? opt[:enzyme] : params.enzyme,
201
+ :max_num_internal_cleavages => params.max_num_internal_cleavages,
202
+ :min_number_termini => params.min_number_termini,
203
+ )
204
+ modifications_ar.replace(modifications_obj.modifications)
205
+ parameters_hash.merge!(params.opts)
206
+ end
207
+
208
+ spec_queries = srf.dta_files.zip(srf.out_files, index).map do |dta_file,out_file,i_ar|
209
+ precursor_neutral_mass = dta_file.mh - h_plus
210
+
211
+ search_hits = out_file.hits[0,opt[:num_hits]].each_with_index.map do |pep,i|
212
+ (prev_aa, pure_aaseq, next_aa) = Mspire::Ident::Peptide.prepare_sequence(pep.sequence)
213
+ calc_neutral_pep_mass = pep.mh - h_plus
214
+ sh = Mspire::Ident::Pepxml::SearchHit.new(
215
+ :hit_rank => i+1,
216
+ :peptide => pure_aaseq,
217
+ :peptide_prev_aa => prev_aa,
218
+ :peptide_next_aa => next_aa,
219
+ :protein => pep.proteins.first.reference.split(' ')[0],
220
+ :num_tot_proteins => pep.proteins.size,
221
+ :num_matched_ions => pep.ions_matched,
222
+ :tot_num_ions => pep.ions_total,
223
+ :calc_neutral_pep_mass => calc_neutral_pep_mass,
224
+ :massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
225
+ :num_tol_term => sample_enzyme.num_tol_term(prev_aa, pure_aaseq, next_aa),
226
+ :num_missed_cleavages => sample_enzyme.num_missed_cleavages(pure_aaseq),
227
+ :modification_info => modifications_obj.modification_info(Mspire::Ident::Peptide.split_sequence(pep.sequence)[1])
228
+ ) do |search_scores|
229
+ if opt[:deltacn_orig]
230
+ deltacn = pep.deltacn_orig
231
+ deltacnstar = nil
232
+ else
233
+ deltacn = pep.deltacn
234
+ deltacn = 1.0 if deltacn == 1.1
235
+ deltacnstar = out_file.hits[i+1].nil? ? '1' : '0'
236
+ end
237
+ search_scores.merge!( :xcorr => pep.xcorr, :deltacn => deltacn,
238
+ :spscore => pep.sp, :sprank => pep.rsp)
239
+ search_scores[:deltacnstar] = deltacnstar if deltacnstar
240
+ end
241
+ end
242
+
243
+ sr = Mspire::Ident::Pepxml::SearchResult.new(:search_hits => search_hits)
244
+
245
+ ret_time =
246
+ if opt[:retention_times]
247
+ (first_scan, last_scan) = i_ar[0,2]
248
+ if first_scan==last_scan
249
+ scan_to_ret_time[i_ar[0]]
250
+ else
251
+ times = ((i_ar[0])..(i_ar[1])).step(1).map {|i| scan_to_ret_time[i] }.compact
252
+ times.inject(&:+) / times.size.to_f
253
+ end
254
+ end
255
+ Mspire::Ident::Pepxml::SpectrumQuery.new(
256
+ :spectrum => [srf.base_name_noext, *i_ar].join('.'), :start_scan => i_ar[0], :end_scan => i_ar[1],
257
+ :precursor_neutral_mass => dta_file.mh - h_plus, :assumed_charge => i_ar[2],
258
+ :retention_time_sec => ret_time,
259
+ :search_results => [sr],
260
+ )
261
+ end
262
+ spectrum_queries.replace(spec_queries)
263
+ end
264
+ end
265
+ end
266
+ pepxml
267
+ end # to_pepxml
268
+ end # Srf::Pepxml
269
+ include Pepxml
270
+ end # Srf
271
+
272
+
273
+ require 'trollop'
274
+
275
+ module Mspire::Sequest::Srf::Pepxml
276
+ def self.commandline(argv, progname=$0)
277
+ opts = Trollop::Parser.new do
278
+ banner %Q{
279
+ usage: #{progname} [OPTIONS] <file>.srf ...
280
+ output: <file>.xml ...
281
+ }.lines.map(&:lstrip).join
282
+
283
+ text ""
284
+ text "major options:"
285
+ opt :db_dir, "The dir holding the DB if different than in Srf. (pepxml requires a valid database path)", :type => :string
286
+ opt :enzyme, "overide the enzyme name embedded in the params file", :type => :string
287
+ opt :mz_dir, "directory holding mz[X]ML files (defaults to the folder holding the srf file)", :type => :string
288
+ opt :retention_times, "include retention times (requires mz-dir)"
289
+ opt :deltacn_orig, "use original deltacn values created by SEQUEST. By default, the top hit gets the next hit's original deltacn."
290
+ opt :no_filter, "do not filter hits by peptide_mass_tolerance (per sequest params)"
291
+ opt :num_hits, "include N top hits", :default => 1
292
+ opt :outdirs, "list of output directories", :type => :strings
293
+ opt :quiet, "do not print warnings, etc."
294
+
295
+ text ""
296
+ text "minor options:"
297
+ opt :pepxml_version, 'schema version number to use', :default => Mspire::Ident::Pepxml::DEFAULT_PEPXML_VERSION
298
+ opt :ms_model, 'mass spectrometer model', :type => :string
299
+ opt :ms_ionization, 'type of ms ionization', :default => 'ESI'
300
+ opt :ms_detector, 'ms detector', :default => 'UNKNOWN'
301
+ opt :ms_mass_analyzer, 'ms mass analyzer', :type => :string
302
+ opt :ms_manufacturer, 'ms manufacturer', :default => 'Thermo'
303
+ opt :raw_data, 'preferred extension for raw data', :default => '.mzML'
304
+ opt :db_seq_type, "'AA' or 'NA'", :default => 'AA'
305
+ opt :db_residue_size, 'calculate the size of the fasta file'
306
+ opt :db_name, 'the database name', :type => :string
307
+ opt :db_orig_url, 'original database url', :type => :string
308
+ opt :db_release_date, 'database release date', :type => :string
309
+ opt :db_release_id, 'the database release identifier', :type => :string
310
+ end
311
+
312
+ opt = opts.parse argv
313
+ opts.educate && exit if argv.empty?
314
+
315
+ Trollop.die :outdirs, "outdirs must be same size as number of input files" if opt.outdirs && opt.outdirs.size != argv.size
316
+ opt[:filter] = !opt.delete(:no_filter)
317
+ opt[:outdirs] ||= []
318
+ opt[:raw_data] = [opt[:raw_data]] if opt[:raw_data]
319
+ opt[:verbose] = !opt[:quiet]
320
+
321
+ argv.zip(opt.delete(:outdirs)) do |srf_file,outdir|
322
+ outdir ||= File.dirname(srf_file)
323
+ srf = Mspire::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
324
+ pepxml = srf.to_pepxml(opt)
325
+ outfile = pepxml.to_xml(outdir)
326
+ puts "wrote file: #{outfile}" if opt[:verbose]
327
+ end
328
+ end
329
+ end
330
+
331
+
332
+
333
+
@@ -0,0 +1,158 @@
1
+
2
+ require 'mspire/sequest/srf'
3
+ require 'mspire/mass'
4
+
5
+ # These are for outputting formats used in MS/MS Search engines
6
+
7
+ module Mspire
8
+ module Sequest
9
+ class Srf
10
+ module Search
11
+ # Writes an MGF file to given filename or base_name + '.mgf' if no
12
+ # filename given.
13
+ #
14
+ # This mimicks the output of merge.pl from mascot The only difference is
15
+ # that this does not include the "\r\n" that is found after the peak
16
+ # lists, instead, it uses "\n" throughout the file (thinking that this
17
+ # is preferable to mixing newline styles!)
18
+ def to_mgf(filename=nil)
19
+ filename =
20
+ if filename ; filename
21
+ else
22
+ base_name + '.mgf'
23
+ end
24
+ h_plus = Mspire::Mass::H_PLUS
25
+ File.open(filename, 'wb') do |out|
26
+ dta_files.zip(index) do |dta, i_ar|
27
+ chrg = dta.charge
28
+ out.print "BEGIN IONS\n"
29
+ out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
+ out.print "CHARGE=#{chrg}+\n"
31
+ out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
+ peak_ar = dta.peaks.unpack('e*')
33
+ (0...(peak_ar.size)).step(2) do |i|
34
+ out.print( peak_ar[i,2].join(' '), "\n")
35
+ end
36
+ out.print "END IONS\n"
37
+ out.print "\n"
38
+ end
39
+ end
40
+ end
41
+
42
+ # not given an out_folder, will make one with the basename
43
+ # compress may be: :zip, :tgz, or nil (no compression)
44
+ # :zip requires gem rubyzip to be installed and is *very* bloated
45
+ # as it writes out all the files first!
46
+ # :tgz requires gem archive-tar-minitar to be installed
47
+ def to_dta(out_folder=nil, compress=nil)
48
+ outdir =
49
+ if out_folder ; out_folder
50
+ else base_name
51
+ end
52
+
53
+ case compress
54
+ when :tgz
55
+ begin
56
+ require 'archive/tar/minitar'
57
+ rescue LoadError
58
+ abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
+ end
60
+ require 'archive/targz' # my own simplified interface!
61
+ require 'zlib'
62
+ names = index.map do |i_ar|
63
+ [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
+ end
65
+ #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
66
+
67
+ tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
68
+
69
+ Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
+ dta_files.each_with_index do |dta_file, i|
71
+ Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
72
+ end
73
+ end
74
+ when :zip
75
+ begin
76
+ require 'zip/zipfilesystem'
77
+ rescue LoadError
78
+ abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
+ end
80
+ #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
+ Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
+ dta_files.zip(index) do |dta,i_ar|
83
+ #zfs.mkdir(outdir)
84
+ zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
+ dta.write_dta_file(out)
86
+ #zfs.commit
87
+ end
88
+ end
89
+ end
90
+ else # no compression
91
+ FileUtils.mkpath(outdir)
92
+ Dir.chdir(outdir) do
93
+ dta_files.zip(index) do |dta,i_ar|
94
+ File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
+ dta.write_dta_file(out)
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end # Search
102
+
103
+ include Search
104
+
105
+ end # Srf
106
+ end # Sequest
107
+ end # MS
108
+
109
+
110
+ require 'optparse'
111
+ module Mspire::Sequest::Srf::Search
112
+ def self.commandline(argv, progname=$0)
113
+ opt = {
114
+ :format => 'mgf'
115
+ }
116
+ opts = OptionParser.new do |op|
117
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.srf ..."
118
+ op.separator "outputs: <file>.mgf ..."
119
+ op.on("-f", "--format <mgf|dta>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
120
+ op.on("-o", "--outfiles <String,...>", Array, "comma list of output files or directories") {|v| opt[:outfiles] = v }
121
+ end
122
+
123
+ opts.parse!(argv)
124
+
125
+ if argv.size == 0
126
+ puts(opts) || exit
127
+ end
128
+
129
+ format = opt[:format]
130
+
131
+ if opt[:outfiles] && (opt[:outfiles].size != argv.size)
132
+ raise "if outfiles specified, needs the same number of files as input files"
133
+ end
134
+
135
+ argv.each_with_index do |srf_file,i|
136
+ base = srf_file.chomp(File.extname(srf_file))
137
+ newfile =
138
+ if opt[:outfiles]
139
+ opt[:outfiles][i]
140
+ else
141
+ case format
142
+ when 'dta'
143
+ base
144
+ when 'mgf'
145
+ base << '.' << format
146
+ end
147
+ end
148
+ srf = Mspire::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
149
+ # options just speed up reading since we don't need .out info anyway
150
+ case format
151
+ when 'mgf'
152
+ srf.to_mgf(newfile)
153
+ when 'dta'
154
+ srf.to_dta(newfile)
155
+ end
156
+ end
157
+ end
158
+ end