ms-sequest 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
data/History CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.0.14 / 2010-08-24
2
+
3
+ * Merged commandline programs into lib heirarchy for testing
4
+ * Wrote specs for commandline programs (shared spec with programmatic interface)
5
+
1
6
  == 0.0.13 / 2010-08-16
2
7
 
3
8
  * compatible with ruby 1.9
@@ -1,9 +1,16 @@
1
- = {ms-sequest}[http://jtprince.github.com/ms-template/rdoc/]
1
+ = ms-sequest
2
2
 
3
3
  An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
4
4
 
5
+ == {API}[http://yardoc.org/docs/jtprince-ms-sequest]
6
+
5
7
  == Examples
6
8
 
9
+ Provides two executables for extracting information from an Srf file (run without file for usage):
10
+
11
+ srf_to_sqt.rb file.srf # => file.sqt
12
+ srf_to_search.rb file.srf # => file.mgf (also can make .dta files)
13
+
7
14
  === Ms::Sequest::Srf
8
15
 
9
16
  Can read and convert Bioworks Sequest Results Files (SRF).
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ gemspec = Gem::Specification.new do |s|
10
10
  s.name = NAME
11
11
  s.authors = ["John T. Prince"]
12
12
  s.email = "jtprince@gmail.com"
13
- s.homepage = "http://jtprince.github.com/" + NAME
13
+ s.homepage = "http://github.com/jtprince/" + NAME
14
14
  s.summary = "An mspire library supporting SEQUEST, Bioworks, SQT, etc"
15
15
  s.description = "reads .SRF, .SQT and supports conversions"
16
16
  s.rubyforge_project = 'mspire'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.13
1
+ 0.0.14
@@ -1,41 +1,7 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
3
  require 'rubygems'
4
- require 'optparse'
5
4
  require 'ms/sequest/srf/search'
6
5
 
7
- opt = {
8
- :format => 'mgf'
9
- }
6
+ Ms::Sequest::Srf::Search.commandline(ARGV, File.basename(__FILE__)}
10
7
 
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
13
- op.separator "outputs: <file>.mgf"
14
- op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
15
- end
16
-
17
- if ARGV.size == 0
18
- puts opts
19
- exit
20
- end
21
-
22
- format = opt[:format]
23
-
24
- ARGV.each do |srf_file|
25
- base = srf_file.sub(/\.srf$/i, '')
26
- newfile =
27
- case format
28
- when 'dta'
29
- base
30
- when 'mgf'
31
- base << '.' << format
32
- end
33
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
34
- # options just speed up reading since we don't need .out info anyway
35
- case format
36
- when 'mgf'
37
- srf.to_mgf(newfile)
38
- when 'dta'
39
- srf.to_dta_files(newfile)
40
- end
41
- end
@@ -3,33 +3,6 @@
3
3
  require 'rubygems'
4
4
  require 'ms/sequest/srf/sqt'
5
5
 
6
- opt = {
7
- :filter => true
8
- }
9
- opts = OptionParser.new do |op|
10
- op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
11
- op.separator "output: <file>.sqt ..."
12
- op.separator ""
13
- op.separator "options:"
14
- op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
15
- op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
16
- op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
17
- op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
18
- op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
19
- end
20
- opts.parse!
21
-
22
- if ARGV.size == 0
23
- puts(opts) || exit
24
- end
25
-
26
- ARGV.each do |srf_file|
27
- base = srf_file.chomp(File.extname(srf_file))
28
- outfile = base + '.sqt'
29
-
30
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
31
- srf.to_sqt(outfile, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
32
- end
33
-
6
+ Ms::Sequest::Srf::Sqt.commandline(ARGV, File.basename(__FILE__))
34
7
 
35
8
 
@@ -7,100 +7,150 @@ require 'ms/mass'
7
7
  module Ms
8
8
  module Sequest
9
9
  class Srf
10
-
11
- # Writes an MGF file to given filename or base_name + '.mgf' if no
12
- # filename given.
13
- #
14
- # This mimicks the output of merge.pl from mascot The only difference is
15
- # that this does not include the "\r\n" that is found after the peak
16
- # lists, instead, it uses "\n" throughout the file (thinking that this
17
- # is preferable to mixing newline styles!)
18
- def to_mgf(filename=nil)
19
- filename =
20
- if filename ; filename
21
- else
22
- base_name + '.mgf'
23
- end
24
- h_plus = Ms::Mass::MASCOT_H_PLUS
25
- File.open(filename, 'wb') do |out|
26
- dta_files.zip(index) do |dta, i_ar|
27
- chrg = dta.charge
28
- out.print "BEGIN IONS\n"
29
- out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
- out.print "CHARGE=#{chrg}+\n"
31
- out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
- peak_ar = dta.peaks.unpack('e*')
33
- (0...(peak_ar.size)).step(2) do |i|
34
- out.print( peak_ar[i,2].join(' '), "\n")
10
+ module Search
11
+ # Writes an MGF file to given filename or base_name + '.mgf' if no
12
+ # filename given.
13
+ #
14
+ # This mimicks the output of merge.pl from mascot The only difference is
15
+ # that this does not include the "\r\n" that is found after the peak
16
+ # lists, instead, it uses "\n" throughout the file (thinking that this
17
+ # is preferable to mixing newline styles!)
18
+ def to_mgf(filename=nil)
19
+ filename =
20
+ if filename ; filename
21
+ else
22
+ base_name + '.mgf'
23
+ end
24
+ h_plus = Ms::Mass::MASCOT_H_PLUS
25
+ File.open(filename, 'wb') do |out|
26
+ dta_files.zip(index) do |dta, i_ar|
27
+ chrg = dta.charge
28
+ out.print "BEGIN IONS\n"
29
+ out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
+ out.print "CHARGE=#{chrg}+\n"
31
+ out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
+ peak_ar = dta.peaks.unpack('e*')
33
+ (0...(peak_ar.size)).step(2) do |i|
34
+ out.print( peak_ar[i,2].join(' '), "\n")
35
+ end
36
+ out.print "END IONS\n"
37
+ out.print "\n"
35
38
  end
36
- out.print "END IONS\n"
37
- out.print "\n"
38
39
  end
39
40
  end
40
- end
41
41
 
42
- # not given an out_folder, will make one with the basename
43
- # compress may be: :zip, :tgz, or nil (no compression)
44
- # :zip requires gem rubyzip to be installed and is *very* bloated
45
- # as it writes out all the files first!
46
- # :tgz requires gem archive-tar-minitar to be installed
47
- def to_dta(out_folder=nil, compress=nil)
48
- outdir =
49
- if out_folder ; out_folder
50
- else base_name
51
- end
42
+ # not given an out_folder, will make one with the basename
43
+ # compress may be: :zip, :tgz, or nil (no compression)
44
+ # :zip requires gem rubyzip to be installed and is *very* bloated
45
+ # as it writes out all the files first!
46
+ # :tgz requires gem archive-tar-minitar to be installed
47
+ def to_dta(out_folder=nil, compress=nil)
48
+ outdir =
49
+ if out_folder ; out_folder
50
+ else base_name
51
+ end
52
52
 
53
- case compress
54
- when :tgz
55
- begin
56
- require 'archive/tar/minitar'
57
- rescue LoadError
58
- abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
- end
60
- require 'archive/targz' # my own simplified interface!
61
- require 'zlib'
62
- names = index.map do |i_ar|
63
- [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
- end
65
- #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
53
+ case compress
54
+ when :tgz
55
+ begin
56
+ require 'archive/tar/minitar'
57
+ rescue LoadError
58
+ abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
+ end
60
+ require 'archive/targz' # my own simplified interface!
61
+ require 'zlib'
62
+ names = index.map do |i_ar|
63
+ [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
+ end
65
+ #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
66
66
 
67
- tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
67
+ tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
68
68
 
69
- Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
- dta_files.each_with_index do |dta_file, i|
71
- Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
69
+ Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
+ dta_files.each_with_index do |dta_file, i|
71
+ Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
72
+ end
72
73
  end
73
- end
74
- when :zip
75
- begin
76
- require 'zip/zipfilesystem'
77
- rescue LoadError
78
- abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
- end
80
- #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
- Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
- dta_files.zip(index) do |dta,i_ar|
83
- #zfs.mkdir(outdir)
84
- zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
- dta.write_dta_file(out)
86
- #zfs.commit
74
+ when :zip
75
+ begin
76
+ require 'zip/zipfilesystem'
77
+ rescue LoadError
78
+ abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
+ end
80
+ #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
+ Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
+ dta_files.zip(index) do |dta,i_ar|
83
+ #zfs.mkdir(outdir)
84
+ zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
+ dta.write_dta_file(out)
86
+ #zfs.commit
87
+ end
87
88
  end
88
89
  end
89
- end
90
- else # no compression
91
- FileUtils.mkpath(outdir)
92
- Dir.chdir(outdir) do
93
- dta_files.zip(index) do |dta,i_ar|
94
- File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
- dta.write_dta_file(out)
90
+ else # no compression
91
+ FileUtils.mkpath(outdir)
92
+ Dir.chdir(outdir) do
93
+ dta_files.zip(index) do |dta,i_ar|
94
+ File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
+ dta.write_dta_file(out)
96
+ end
96
97
  end
97
98
  end
98
99
  end
99
100
  end
100
- end
101
+ end # Search
102
+
103
+ include Search
101
104
 
102
105
  end # Srf
103
106
  end # Sequest
104
107
  end # Ms
105
108
 
106
109
 
110
+ require 'optparse'
111
+ module Ms::Sequest::Srf::Search
112
+ def self.commandline(argv, progname=$0)
113
+ opt = {
114
+ :format => 'mgf'
115
+ }
116
+ opts = OptionParser.new do |op|
117
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.srf ..."
118
+ op.separator "outputs: <file>.mgf ..."
119
+ op.on("-f", "--format <mgf|dta>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
120
+ op.on("-o", "--outfiles <String,...>", Array, "comma list of output files or directories") {|v| opt[:outfiles] = v }
121
+ end
122
+
123
+ opts.parse!(argv)
124
+
125
+ if argv.size == 0
126
+ puts(opts) || exit
127
+ end
128
+
129
+ format = opt[:format]
130
+
131
+ raise "if outfiles specified, needs the same number of files as input files" unless argv.size == opt[:outfiles].size
132
+
133
+ argv.each_with_index do |srf_file,i|
134
+ base = srf_file.sub(/\.srf$/i, '')
135
+ newfile =
136
+ if opt[:outfiles]
137
+ opt[:outfiles][i]
138
+ else
139
+ case format
140
+ when 'dta'
141
+ base
142
+ when 'mgf'
143
+ base << '.' << format
144
+ end
145
+ end
146
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
147
+ # options just speed up reading since we don't need .out info anyway
148
+ case format
149
+ when 'mgf'
150
+ srf.to_mgf(newfile)
151
+ when 'dta'
152
+ srf.to_dta(newfile)
153
+ end
154
+ end
155
+ end
156
+ end
@@ -1,168 +1,224 @@
1
- require 'tap/task'
1
+
2
2
  require 'ms/calc'
3
3
  require 'ms/sequest'
4
4
  require 'ms/sequest/srf'
5
5
  require 'ms/sequest/sqt'
6
6
 
7
+
7
8
  module Ms
8
9
  module Sequest
9
10
  class Srf
11
+
12
+ module Sqt
10
13
 
11
- # the out_filename will be the base_name + .sqt unless 'out_filename' is
12
- # defined
13
- # :round => round floating point numbers
14
- # etc...
15
- def to_sqt(out_filename=nil, opts={})
16
- # default rounding precision (Decimal Places)
17
- tic_dp = 2
18
- mh_dp = 7
19
- xcorr_dp = 5
20
- sp_dp = 2
21
- dcn_dp = 5
22
-
23
- defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
24
- opt = defaults.merge(opts)
25
-
26
- outfile =
27
- if out_filename
28
- out_filename
29
- else
30
- base_name + '.sqt'
31
- end
32
- invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
33
- fmt =
34
- if params.fragment_mass_type == 'average' ; 'AVG'
35
- else ; 'MONO'
36
- end
37
- pmt =
38
- if params.precursor_mass_type == 'average' ; 'AVG'
39
- else ; 'MONO'
40
- end
14
+ def self.commandline(argv)
15
+ require 'optparse'
16
+
41
17
 
42
- mass_index = params.mass_index
43
- static_mods = params.static_mods.map do |k,v|
44
- key = k.split(/_/)[1]
45
- if key.size == 1
46
- key + '=' + (mass_index[key] + v.to_f).to_s
47
- else
48
- key + '=' + v
49
- end
50
18
  end
51
19
 
52
- dynamic_mods = []
53
- header.modifications.scan(/\((.*?)\)/) do |match|
54
- dynamic_mods << match.first.sub(/ /,'=')
55
- end
56
- plural = {
57
- 'StaticMod' => static_mods,
58
- 'DynamicMod' => dynamic_mods, # example as diff mod
59
- 'Comment' => ['Created from Bioworks .srf file']
60
- }
61
-
62
- db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
63
- db_filename_in_sqt = db_filename
64
- if opt[:new_db_path]
65
- db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
66
- if opt[:update_db_path]
67
- db_filename_in_sqt = File.expand_path(db_filename)
68
- warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
20
+ # the out_filename will be the base_name + .sqt unless 'out_filename' is
21
+ # defined
22
+ # :round => round floating point numbers
23
+ # etc...
24
+ def to_sqt(out_filename=nil, opts={})
25
+ # default rounding precision (Decimal Places)
26
+ tic_dp = 2
27
+ mh_dp = 7
28
+ xcorr_dp = 5
29
+ sp_dp = 2
30
+ dcn_dp = 5
31
+
32
+ defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
33
+ opt = defaults.merge(opts)
34
+
35
+ outfile =
36
+ if out_filename
37
+ out_filename
38
+ else
39
+ base_name + '.sqt'
40
+ end
41
+ invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
42
+ fmt =
43
+ if params.fragment_mass_type == 'average' ; 'AVG'
44
+ else ; 'MONO'
45
+ end
46
+ pmt =
47
+ if params.precursor_mass_type == 'average' ; 'AVG'
48
+ else ; 'MONO'
49
+ end
50
+
51
+ mass_index = params.mass_index
52
+ static_mods = params.static_mods.map do |k,v|
53
+ key = k.split(/_/)[1]
54
+ if key.size == 1
55
+ key + '=' + (mass_index[key] + v.to_f).to_s
56
+ else
57
+ key + '=' + v
58
+ end
69
59
  end
70
- end
71
60
 
72
- apmu =
73
- case params.peptide_mass_units
74
- when '0' ; 'amu'
75
- when '1' ; 'mmu'
76
- when '2' ; 'ppm'
61
+ dynamic_mods = []
62
+ header.modifications.scan(/\((.*?)\)/) do |match|
63
+ dynamic_mods << match.first.sub(/ /,'=')
77
64
  end
65
+ plural = {
66
+ 'StaticMod' => static_mods,
67
+ 'DynamicMod' => dynamic_mods, # example as diff mod
68
+ 'Comment' => ['Created from Bioworks .srf file']
69
+ }
78
70
 
79
- hh = {
80
- 'SQTGenerator' => "mspire: ms-sequest",
81
- 'SQTGeneratorVersion' => Ms::Sequest::VERSION,
82
- 'Database' => db_filename_in_sqt,
83
- 'FragmentMasses' => fmt,
84
- 'PrecursorMasses' => pmt,
85
- 'StartTime' => '', # Bioworks 3.2 also leaves this blank...
86
- 'Alg-PreMassTol' => params.peptide_mass_tolerance,
87
- 'Alg-FragMassTol' => params.fragment_ion_tolerance,
88
- 'Alg-PreMassUnits' => apmu, ## mine
89
- 'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
90
- 'Alg-Enzyme' => header.enzyme.split(':').last,
91
- 'Alg-MSModel' => header.model,
92
- }
93
-
94
- if opt[:db_info]
95
- if File.exist?(db_filename)
96
- reply = Ms::Sequest::Sqt.db_info(db_filename)
97
- %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
98
- hh[label] = val
71
+ db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
72
+ db_filename_in_sqt = db_filename
73
+ if opt[:new_db_path]
74
+ db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
75
+ if opt[:update_db_path]
76
+ db_filename_in_sqt = File.expand_path(db_filename)
77
+ warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
99
78
  end
100
- else
101
- warn "file #{db_filename} does not exist, no extra db info in header!"
102
79
  end
103
- end
104
80
 
105
- has_hits = (self.out_files.size > 0)
106
- if has_hits
107
- # somewhat redundant with above, but we can get this without a db present!
108
- hh['DBLocusCount'] = self.out_files.first.db_locus_count
109
- end
81
+ apmu =
82
+ case params.peptide_mass_units
83
+ when '0' ; 'amu'
84
+ when '1' ; 'mmu'
85
+ when '2' ; 'ppm'
86
+ end
110
87
 
111
- File.open(outfile, 'w') do |out|
112
- # print the header:
113
- invariant_ordering.each do |iv|
114
- out.puts ['H', iv, hh.delete(iv)].join("\t")
115
- end
116
- hh.each do |k,v|
117
- out.puts ['H', k, v].join("\t")
118
- end
119
- plural.each do |k,vals|
120
- vals.each do |val|
121
- out.puts ['H', k, val].join("\t")
88
+ hh = {
89
+ 'SQTGenerator' => "mspire: ms-sequest",
90
+ 'SQTGeneratorVersion' => Ms::Sequest::VERSION,
91
+ 'Database' => db_filename_in_sqt,
92
+ 'FragmentMasses' => fmt,
93
+ 'PrecursorMasses' => pmt,
94
+ 'StartTime' => '', # Bioworks 3.2 also leaves this blank...
95
+ 'Alg-PreMassTol' => params.peptide_mass_tolerance,
96
+ 'Alg-FragMassTol' => params.fragment_ion_tolerance,
97
+ 'Alg-PreMassUnits' => apmu, ## mine
98
+ 'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
99
+ 'Alg-Enzyme' => header.enzyme.split(':').last,
100
+ 'Alg-MSModel' => header.model,
101
+ }
102
+
103
+ if opt[:db_info]
104
+ if File.exist?(db_filename)
105
+ reply = Ms::Sequest::Sqt.db_info(db_filename)
106
+ %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
107
+ hh[label] = val
108
+ end
109
+ else
110
+ warn "file #{db_filename} does not exist, no extra db info in header!"
122
111
  end
123
112
  end
124
113
 
125
- ##### SPECTRA
126
- time_to_process = '0.0'
127
- #########################################
128
- # NEED TO FIGURE OUT: (in spectra guy)
129
- # * Lowest Sp value for top 500 spectra
130
- # * Number of sequences matching this precursor ion
131
- #########################################
132
-
133
- manual_validation_status = 'U'
134
- self.out_files.zip(dta_files) do |out_file, dta_file|
135
- # don't have the time to process (using 0.0 like bioworks 3.2)
136
- dta_file_mh = dta_file.mh
137
- out_file_total_inten = out_file.total_inten
138
- out_file_lowest_sp = out_file.lowest_sp
139
- if opt[:round]
140
- dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
141
- out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
142
- out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
114
+ has_hits = (self.out_files.size > 0)
115
+ if has_hits
116
+ # somewhat redundant with above, but we can get this without a db present!
117
+ hh['DBLocusCount'] = self.out_files.first.db_locus_count
118
+ end
119
+
120
+ File.open(outfile, 'w') do |out|
121
+ # print the header:
122
+ invariant_ordering.each do |iv|
123
+ out.puts ['H', iv, hh.delete(iv)].join("\t")
124
+ end
125
+ hh.each do |k,v|
126
+ out.puts ['H', k, v].join("\t")
143
127
  end
128
+ plural.each do |k,vals|
129
+ vals.each do |val|
130
+ out.puts ['H', k, val].join("\t")
131
+ end
132
+ end
133
+
134
+ ##### SPECTRA
135
+ time_to_process = '0.0'
136
+ #########################################
137
+ # NEED TO FIGURE OUT: (in spectra guy)
138
+ # * Lowest Sp value for top 500 spectra
139
+ # * Number of sequences matching this precursor ion
140
+ #########################################
144
141
 
145
- out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
146
- out_file.hits.each_with_index do |hit,index|
147
- hit_mh = hit.mh
148
- hit_deltacn_orig_updated = hit.deltacn_orig_updated
149
- hit_xcorr = hit.xcorr
150
- hit_sp = hit.sp
142
+ manual_validation_status = 'U'
143
+ self.out_files.zip(dta_files) do |out_file, dta_file|
144
+ # don't have the time to process (using 0.0 like bioworks 3.2)
145
+ dta_file_mh = dta_file.mh
146
+ out_file_total_inten = out_file.total_inten
147
+ out_file_lowest_sp = out_file.lowest_sp
151
148
  if opt[:round]
152
- hit_mh = Ms::Calc.round(hit_mh, mh_dp)
153
- hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
154
- hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
155
- hit_sp = Ms::Calc.round(hit_sp, sp_dp)
149
+ dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
150
+ out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
151
+ out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
156
152
  end
157
- # note that the rank is determined by the order..
158
- out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
159
- hit.prots.each do |prot|
160
- out.puts ['L', prot.first_entry].join("\t")
153
+
154
+ out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
155
+ out_file.hits.each_with_index do |hit,index|
156
+ hit_mh = hit.mh
157
+ hit_deltacn_orig_updated = hit.deltacn_orig_updated
158
+ hit_xcorr = hit.xcorr
159
+ hit_sp = hit.sp
160
+ if opt[:round]
161
+ hit_mh = Ms::Calc.round(hit_mh, mh_dp)
162
+ hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
163
+ hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
164
+ hit_sp = Ms::Calc.round(hit_sp, sp_dp)
165
+ end
166
+ # note that the rank is determined by the order..
167
+ out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
168
+ hit.prots.each do |prot|
169
+ out.puts ['L', prot.first_entry].join("\t")
170
+ end
161
171
  end
162
172
  end
163
- end
164
- end # close the filehandle
165
- end # method
173
+ end # close the filehandle
174
+ end # method
175
+ end # Sqt
176
+ include Sqt
166
177
  end # Srf
167
178
  end # Sequest
168
179
  end # Ms
180
+
181
+
182
+ require 'optparse'
183
+
184
+ module Ms::Sequest::Srf::Sqt
185
+ def self.commandline(argv, progname=$0)
186
+ opt = {
187
+ :filter => true
188
+ }
189
+ opts = OptionParser.new do |op|
190
+ op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
191
+ op.separator "output: <file>.sqt ..."
192
+ op.separator ""
193
+ op.separator "options:"
194
+ op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
195
+ op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
196
+ op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
197
+ op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
198
+ op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
199
+ op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
200
+ end
201
+ opts.parse!(argv)
202
+
203
+ if argv.size == 0
204
+ puts(opts) || exit
205
+ end
206
+
207
+ raise "if outfiles specified, outfiles must be same size as number of input files" unless opt[:outfiles].size == argv.size
208
+
209
+ argv.each_with_index do |srf_file,i|
210
+ outfile =
211
+ if opt[:outfiles]
212
+ opt[:outfiles][i]
213
+ else
214
+ base = srf_file.chomp(File.extname(srf_file))
215
+ base + '.sqt'
216
+ end
217
+
218
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
219
+ srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
220
+ end
221
+ end
222
+ end
223
+
224
+
@@ -6,10 +6,10 @@ require 'fileutils'
6
6
  require 'ms/sequest/srf'
7
7
  require 'ms/sequest/srf/search'
8
8
 
9
- describe 'converting a large srf to an ms search format' do
10
-
11
- @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
12
- @srf = Ms::Sequest::Srf.new(@file)
9
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
10
+ Mgf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
11
+ Dta_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
12
+ shared 'an srf to ms2 search converter' do
13
13
 
14
14
  def del(file)
15
15
  if File.exist?(file)
@@ -22,8 +22,8 @@ describe 'converting a large srf to an ms search format' do
22
22
  end
23
23
 
24
24
  it 'converts to mgf' do
25
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
26
- @srf.to_mgf(@output)
25
+ @output = Mgf_output
26
+ @convert_to_mgf.call
27
27
  ok File.exist?(@output)
28
28
  output = IO.read(@output)
29
29
  # tests are just frozen right now, not checked for accuracy
@@ -33,8 +33,8 @@ describe 'converting a large srf to an ms search format' do
33
33
  end
34
34
 
35
35
  it 'generates .dta files' do
36
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
37
- @srf.to_dta(@output)
36
+ @output = Dta_output
37
+ @convert_to_dta.call
38
38
  ok File.exist?(@output)
39
39
  ok File.directory?(@output)
40
40
  # frozen (not verified):
@@ -51,3 +51,24 @@ describe 'converting a large srf to an ms search format' do
51
51
 
52
52
  end
53
53
 
54
+
55
+ describe 'converting an srf to ms2 search format: programmatic' do
56
+ @srf = Ms::Sequest::Srf.new(Srf_file)
57
+
58
+ @convert_to_mgf = lambda { @srf.to_mgf(Mgf_output) }
59
+ @convert_to_dta = lambda { @srf.to_dta(Dta_output) }
60
+
61
+ behaves_like 'an srf to ms2 search converter'
62
+
63
+ end
64
+
65
+ describe 'converting an srf to ms2 search format: commandline' do
66
+
67
+ def commandline_lambda(string)
68
+ lambda { Ms::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
69
+ end
70
+
71
+ @convert_to_mgf = commandline_lambda "#{Srf_file} -o #{Mgf_output}"
72
+ @convert_to_dta = commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
73
+ behaves_like 'an srf to ms2 search converter'
74
+ end
@@ -26,7 +26,7 @@ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
26
26
  SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
27
27
 
28
28
 
29
- SpecHelperOtherLines =<<END
29
+ SpecHelperOtherLines =<<END
30
30
  S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
31
31
  S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
32
32
  M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
@@ -39,12 +39,15 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
39
39
  L gi|90111124|ref|NP_414904.2|
40
40
  END
41
41
 
42
- describe 'converting a large srf to sqt' do
42
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
43
+ Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
43
44
 
44
- @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
- @srf = Ms::Sequest::Srf.new(@file)
47
- @original_db_filename = @srf.header.db_filename
45
+ shared 'an srf to sqt converter' do
46
+
47
+ before do
48
+ @original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
49
+ @output = Srf_output
50
+ end
48
51
 
49
52
  def del(file)
50
53
  if File.exist?(file)
@@ -83,7 +86,7 @@ describe 'converting a large srf to sqt' do
83
86
  end
84
87
 
85
88
  it 'converts without bothering with the database' do
86
- @srf.to_sqt(@output)
89
+ @basic_conversion.call
87
90
  ok File.exist?(@output)
88
91
  lines = File.readlines(@output)
89
92
  lines.size.is 80910
@@ -96,25 +99,9 @@ describe 'converting a large srf to sqt' do
96
99
  del(@output)
97
100
  end
98
101
 
99
- it 'warns if the db path is incorrect and we want to update db info' do
100
- # requires some knowledge of how the database file is extracted
101
- # internally
102
- wacky_path = '/not/a/real/path/wacky.fasta'
103
- @srf.header.db_filename = wacky_path
104
- my_error_string = ''
105
- StringIO.open(my_error_string, 'w') do |strio|
106
- $stderr = strio
107
- @srf.to_sqt(@output, :db_info => true)
108
- end
109
- ok my_error_string.include?(wacky_path)
110
- @srf.header.db_filename = @original_db_filename
111
- $stderr = STDERR
112
- ok File.exists?(@output)
113
- IO.readlines(@output).size.is 80910
114
- del(@output)
115
- end
102
+
116
103
  it 'can get db info with correct path' do
117
- @srf.to_sqt(@output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33')
104
+ @with_new_db_path.call
118
105
  ok File.exist?(@output)
119
106
  lines = IO.readlines(@output)
120
107
  has_md5 = lines.any? do |line|
@@ -130,8 +117,9 @@ describe 'converting a large srf to sqt' do
130
117
  lines.size.is 80912
131
118
  del(@output)
132
119
  end
120
+
133
121
  it 'can update the Database' do
134
- @srf.to_sqt(@output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true)
122
+ @update_the_db_path.call
135
123
  regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
136
124
  updated_db = IO.readlines(@output).any? do |line|
137
125
  line =~ regexp
@@ -139,4 +127,53 @@ describe 'converting a large srf to sqt' do
139
127
  ok updated_db
140
128
  del(@output)
141
129
  end
130
+
131
+ end
132
+
133
+ describe "programmatic interface srf to sqt" do
134
+
135
+ @srf = Ms::Sequest::Srf.new(Srf_file)
136
+
137
+ @basic_conversion = lambda { @srf.to_sqt(Srf_output) }
138
+ @with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
139
+ @update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
+
141
+ before do
142
+ @output = Srf_output
143
+ end
144
+
145
+ behaves_like "an srf to sqt converter"
146
+
147
+ # this requires programmatic interface to manipulate the object for this
148
+ # test
149
+ it 'warns if the db path is incorrect and we want to update db info' do
150
+ # requires some knowledge of how the database file is extracted
151
+ # internally
152
+ wacky_path = '/not/a/real/path/wacky.fasta'
153
+ @srf.header.db_filename = wacky_path
154
+ my_error_string = ''
155
+ StringIO.open(my_error_string, 'w') do |strio|
156
+ $stderr = strio
157
+ @srf.to_sqt(@output, :db_info => true)
158
+ end
159
+ ok my_error_string.include?(wacky_path)
160
+ @srf.header.db_filename = @original_db_filename
161
+ $stderr = STDERR
162
+ ok File.exists?(@output)
163
+ IO.readlines(@output).size.is 80910
164
+ del(@output)
165
+ end
166
+ end
167
+
168
+ describe "command-line interface srf to sqt" do
169
+ def commandline_lambda(string)
170
+ lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
171
+ end
172
+
173
+ base_cmd = "#{Srf_file} -o #{Srf_output}"
174
+ @basic_conversion = commandline_lambda(base_cmd)
175
+ @with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
176
+ @update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
177
+
178
+ behaves_like "an srf to sqt converter"
142
179
  end
@@ -2,27 +2,9 @@
2
2
  require 'rubygems'
3
3
  require 'spec/more'
4
4
 
5
- # This is already defined in our module
6
- #TESTFILES = File.expand_path(File.dirname(__FILE__)) + '/testfiles'
7
-
8
5
  Bacon.summary_on_exit
9
6
 
10
- #module Bacon
11
- # class Context
12
- # def hash_match(hash, obj)
13
- # hash.each do |k,v|
14
- # if v.is_a?(Hash)
15
- # hash_match(v, obj.send(k.to_sym))
16
- # else
17
- # puts "#{k}: #{v} but was #{obj.send(k.to_sym)}" if obj.send(k.to_sym) != v
18
- # obj.send(k.to_sym).should.equal v
19
- # end
20
- # end
21
- # end
22
- # end
23
- #end
24
-
25
-
7
+ # is this already defined??
26
8
  TESTFILES = File.expand_path(File.dirname(__FILE__)) + "/testfiles"
27
9
 
28
10
  begin
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 13
9
- version: 0.0.13
8
+ - 14
9
+ version: 0.0.14
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-08-17 00:00:00 -06:00
17
+ date: 2010-08-24 00:00:00 -06:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -134,7 +134,7 @@ files:
134
134
  - spec/testfiles/small.sqt
135
135
  - spec/testfiles/small2.sqt
136
136
  has_rdoc: true
137
- homepage: http://jtprince.github.com/ms-sequest
137
+ homepage: http://github.com/jtprince/ms-sequest
138
138
  licenses: []
139
139
 
140
140
  post_install_message: