ms-sequest 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.0.14 / 2010-08-24
2
+
3
+ * Merged commandline programs into lib heirarchy for testing
4
+ * Wrote specs for commandline programs (shared spec with programmatic interface)
5
+
1
6
  == 0.0.13 / 2010-08-16
2
7
 
3
8
  * compatible with ruby 1.9
@@ -1,9 +1,16 @@
1
- = {ms-sequest}[http://jtprince.github.com/ms-template/rdoc/]
1
+ = ms-sequest
2
2
 
3
3
  An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
4
4
 
5
+ == {API}[http://yardoc.org/docs/jtprince-ms-sequest]
6
+
5
7
  == Examples
6
8
 
9
+ Provides two executables for extracting information from an Srf file (run without file for usage):
10
+
11
+ srf_to_sqt.rb file.srf # => file.sqt
12
+ srf_to_search.rb file.srf # => file.mgf (also can make .dta files)
13
+
7
14
  === Ms::Sequest::Srf
8
15
 
9
16
  Can read and convert Bioworks Sequest Results Files (SRF).
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ gemspec = Gem::Specification.new do |s|
10
10
  s.name = NAME
11
11
  s.authors = ["John T. Prince"]
12
12
  s.email = "jtprince@gmail.com"
13
- s.homepage = "http://jtprince.github.com/" + NAME
13
+ s.homepage = "http://github.com/jtprince/" + NAME
14
14
  s.summary = "An mspire library supporting SEQUEST, Bioworks, SQT, etc"
15
15
  s.description = "reads .SRF, .SQT and supports conversions"
16
16
  s.rubyforge_project = 'mspire'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.13
1
+ 0.0.14
@@ -1,41 +1,7 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
3
  require 'rubygems'
4
- require 'optparse'
5
4
  require 'ms/sequest/srf/search'
6
5
 
7
- opt = {
8
- :format => 'mgf'
9
- }
6
+ Ms::Sequest::Srf::Search.commandline(ARGV, File.basename(__FILE__)}
10
7
 
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
13
- op.separator "outputs: <file>.mgf"
14
- op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
15
- end
16
-
17
- if ARGV.size == 0
18
- puts opts
19
- exit
20
- end
21
-
22
- format = opt[:format]
23
-
24
- ARGV.each do |srf_file|
25
- base = srf_file.sub(/\.srf$/i, '')
26
- newfile =
27
- case format
28
- when 'dta'
29
- base
30
- when 'mgf'
31
- base << '.' << format
32
- end
33
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
34
- # options just speed up reading since we don't need .out info anyway
35
- case format
36
- when 'mgf'
37
- srf.to_mgf(newfile)
38
- when 'dta'
39
- srf.to_dta_files(newfile)
40
- end
41
- end
@@ -3,33 +3,6 @@
3
3
  require 'rubygems'
4
4
  require 'ms/sequest/srf/sqt'
5
5
 
6
- opt = {
7
- :filter => true
8
- }
9
- opts = OptionParser.new do |op|
10
- op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
11
- op.separator "output: <file>.sqt ..."
12
- op.separator ""
13
- op.separator "options:"
14
- op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
15
- op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
16
- op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
17
- op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
18
- op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
19
- end
20
- opts.parse!
21
-
22
- if ARGV.size == 0
23
- puts(opts) || exit
24
- end
25
-
26
- ARGV.each do |srf_file|
27
- base = srf_file.chomp(File.extname(srf_file))
28
- outfile = base + '.sqt'
29
-
30
- srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
31
- srf.to_sqt(outfile, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
32
- end
33
-
6
+ Ms::Sequest::Srf::Sqt.commandline(ARGV, File.basename(__FILE__))
34
7
 
35
8
 
@@ -7,100 +7,150 @@ require 'ms/mass'
7
7
  module Ms
8
8
  module Sequest
9
9
  class Srf
10
-
11
- # Writes an MGF file to given filename or base_name + '.mgf' if no
12
- # filename given.
13
- #
14
- # This mimicks the output of merge.pl from mascot The only difference is
15
- # that this does not include the "\r\n" that is found after the peak
16
- # lists, instead, it uses "\n" throughout the file (thinking that this
17
- # is preferable to mixing newline styles!)
18
- def to_mgf(filename=nil)
19
- filename =
20
- if filename ; filename
21
- else
22
- base_name + '.mgf'
23
- end
24
- h_plus = Ms::Mass::MASCOT_H_PLUS
25
- File.open(filename, 'wb') do |out|
26
- dta_files.zip(index) do |dta, i_ar|
27
- chrg = dta.charge
28
- out.print "BEGIN IONS\n"
29
- out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
- out.print "CHARGE=#{chrg}+\n"
31
- out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
- peak_ar = dta.peaks.unpack('e*')
33
- (0...(peak_ar.size)).step(2) do |i|
34
- out.print( peak_ar[i,2].join(' '), "\n")
10
+ module Search
11
+ # Writes an MGF file to given filename or base_name + '.mgf' if no
12
+ # filename given.
13
+ #
14
+ # This mimicks the output of merge.pl from mascot The only difference is
15
+ # that this does not include the "\r\n" that is found after the peak
16
+ # lists, instead, it uses "\n" throughout the file (thinking that this
17
+ # is preferable to mixing newline styles!)
18
+ def to_mgf(filename=nil)
19
+ filename =
20
+ if filename ; filename
21
+ else
22
+ base_name + '.mgf'
23
+ end
24
+ h_plus = Ms::Mass::MASCOT_H_PLUS
25
+ File.open(filename, 'wb') do |out|
26
+ dta_files.zip(index) do |dta, i_ar|
27
+ chrg = dta.charge
28
+ out.print "BEGIN IONS\n"
29
+ out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
30
+ out.print "CHARGE=#{chrg}+\n"
31
+ out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
32
+ peak_ar = dta.peaks.unpack('e*')
33
+ (0...(peak_ar.size)).step(2) do |i|
34
+ out.print( peak_ar[i,2].join(' '), "\n")
35
+ end
36
+ out.print "END IONS\n"
37
+ out.print "\n"
35
38
  end
36
- out.print "END IONS\n"
37
- out.print "\n"
38
39
  end
39
40
  end
40
- end
41
41
 
42
- # not given an out_folder, will make one with the basename
43
- # compress may be: :zip, :tgz, or nil (no compression)
44
- # :zip requires gem rubyzip to be installed and is *very* bloated
45
- # as it writes out all the files first!
46
- # :tgz requires gem archive-tar-minitar to be installed
47
- def to_dta(out_folder=nil, compress=nil)
48
- outdir =
49
- if out_folder ; out_folder
50
- else base_name
51
- end
42
+ # not given an out_folder, will make one with the basename
43
+ # compress may be: :zip, :tgz, or nil (no compression)
44
+ # :zip requires gem rubyzip to be installed and is *very* bloated
45
+ # as it writes out all the files first!
46
+ # :tgz requires gem archive-tar-minitar to be installed
47
+ def to_dta(out_folder=nil, compress=nil)
48
+ outdir =
49
+ if out_folder ; out_folder
50
+ else base_name
51
+ end
52
52
 
53
- case compress
54
- when :tgz
55
- begin
56
- require 'archive/tar/minitar'
57
- rescue LoadError
58
- abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
- end
60
- require 'archive/targz' # my own simplified interface!
61
- require 'zlib'
62
- names = index.map do |i_ar|
63
- [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
- end
65
- #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
53
+ case compress
54
+ when :tgz
55
+ begin
56
+ require 'archive/tar/minitar'
57
+ rescue LoadError
58
+ abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
59
+ end
60
+ require 'archive/targz' # my own simplified interface!
61
+ require 'zlib'
62
+ names = index.map do |i_ar|
63
+ [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
64
+ end
65
+ #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
66
66
 
67
- tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
67
+ tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
68
68
 
69
- Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
- dta_files.each_with_index do |dta_file, i|
71
- Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
69
+ Archive::Tar::Minitar::Output.open(tgz) do |outp|
70
+ dta_files.each_with_index do |dta_file, i|
71
+ Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
72
+ end
72
73
  end
73
- end
74
- when :zip
75
- begin
76
- require 'zip/zipfilesystem'
77
- rescue LoadError
78
- abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
- end
80
- #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
- Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
- dta_files.zip(index) do |dta,i_ar|
83
- #zfs.mkdir(outdir)
84
- zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
- dta.write_dta_file(out)
86
- #zfs.commit
74
+ when :zip
75
+ begin
76
+ require 'zip/zipfilesystem'
77
+ rescue LoadError
78
+ abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
79
+ end
80
+ #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
81
+ Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
82
+ dta_files.zip(index) do |dta,i_ar|
83
+ #zfs.mkdir(outdir)
84
+ zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
85
+ dta.write_dta_file(out)
86
+ #zfs.commit
87
+ end
87
88
  end
88
89
  end
89
- end
90
- else # no compression
91
- FileUtils.mkpath(outdir)
92
- Dir.chdir(outdir) do
93
- dta_files.zip(index) do |dta,i_ar|
94
- File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
- dta.write_dta_file(out)
90
+ else # no compression
91
+ FileUtils.mkpath(outdir)
92
+ Dir.chdir(outdir) do
93
+ dta_files.zip(index) do |dta,i_ar|
94
+ File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
95
+ dta.write_dta_file(out)
96
+ end
96
97
  end
97
98
  end
98
99
  end
99
100
  end
100
- end
101
+ end # Search
102
+
103
+ include Search
101
104
 
102
105
  end # Srf
103
106
  end # Sequest
104
107
  end # Ms
105
108
 
106
109
 
110
+ require 'optparse'
111
+ module Ms::Sequest::Srf::Search
112
+ def self.commandline(argv, progname=$0)
113
+ opt = {
114
+ :format => 'mgf'
115
+ }
116
+ opts = OptionParser.new do |op|
117
+ op.banner = "usage: #{File.basename(__FILE__)} <file>.srf ..."
118
+ op.separator "outputs: <file>.mgf ..."
119
+ op.on("-f", "--format <mgf|dta>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
120
+ op.on("-o", "--outfiles <String,...>", Array, "comma list of output files or directories") {|v| opt[:outfiles] = v }
121
+ end
122
+
123
+ opts.parse!(argv)
124
+
125
+ if argv.size == 0
126
+ puts(opts) || exit
127
+ end
128
+
129
+ format = opt[:format]
130
+
131
+ raise "if outfiles specified, needs the same number of files as input files" unless argv.size == opt[:outfiles].size
132
+
133
+ argv.each_with_index do |srf_file,i|
134
+ base = srf_file.sub(/\.srf$/i, '')
135
+ newfile =
136
+ if opt[:outfiles]
137
+ opt[:outfiles][i]
138
+ else
139
+ case format
140
+ when 'dta'
141
+ base
142
+ when 'mgf'
143
+ base << '.' << format
144
+ end
145
+ end
146
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
147
+ # options just speed up reading since we don't need .out info anyway
148
+ case format
149
+ when 'mgf'
150
+ srf.to_mgf(newfile)
151
+ when 'dta'
152
+ srf.to_dta(newfile)
153
+ end
154
+ end
155
+ end
156
+ end
@@ -1,168 +1,224 @@
1
- require 'tap/task'
1
+
2
2
  require 'ms/calc'
3
3
  require 'ms/sequest'
4
4
  require 'ms/sequest/srf'
5
5
  require 'ms/sequest/sqt'
6
6
 
7
+
7
8
  module Ms
8
9
  module Sequest
9
10
  class Srf
11
+
12
+ module Sqt
10
13
 
11
- # the out_filename will be the base_name + .sqt unless 'out_filename' is
12
- # defined
13
- # :round => round floating point numbers
14
- # etc...
15
- def to_sqt(out_filename=nil, opts={})
16
- # default rounding precision (Decimal Places)
17
- tic_dp = 2
18
- mh_dp = 7
19
- xcorr_dp = 5
20
- sp_dp = 2
21
- dcn_dp = 5
22
-
23
- defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
24
- opt = defaults.merge(opts)
25
-
26
- outfile =
27
- if out_filename
28
- out_filename
29
- else
30
- base_name + '.sqt'
31
- end
32
- invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
33
- fmt =
34
- if params.fragment_mass_type == 'average' ; 'AVG'
35
- else ; 'MONO'
36
- end
37
- pmt =
38
- if params.precursor_mass_type == 'average' ; 'AVG'
39
- else ; 'MONO'
40
- end
14
+ def self.commandline(argv)
15
+ require 'optparse'
16
+
41
17
 
42
- mass_index = params.mass_index
43
- static_mods = params.static_mods.map do |k,v|
44
- key = k.split(/_/)[1]
45
- if key.size == 1
46
- key + '=' + (mass_index[key] + v.to_f).to_s
47
- else
48
- key + '=' + v
49
- end
50
18
  end
51
19
 
52
- dynamic_mods = []
53
- header.modifications.scan(/\((.*?)\)/) do |match|
54
- dynamic_mods << match.first.sub(/ /,'=')
55
- end
56
- plural = {
57
- 'StaticMod' => static_mods,
58
- 'DynamicMod' => dynamic_mods, # example as diff mod
59
- 'Comment' => ['Created from Bioworks .srf file']
60
- }
61
-
62
- db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
63
- db_filename_in_sqt = db_filename
64
- if opt[:new_db_path]
65
- db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
66
- if opt[:update_db_path]
67
- db_filename_in_sqt = File.expand_path(db_filename)
68
- warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
20
+ # the out_filename will be the base_name + .sqt unless 'out_filename' is
21
+ # defined
22
+ # :round => round floating point numbers
23
+ # etc...
24
+ def to_sqt(out_filename=nil, opts={})
25
+ # default rounding precision (Decimal Places)
26
+ tic_dp = 2
27
+ mh_dp = 7
28
+ xcorr_dp = 5
29
+ sp_dp = 2
30
+ dcn_dp = 5
31
+
32
+ defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
33
+ opt = defaults.merge(opts)
34
+
35
+ outfile =
36
+ if out_filename
37
+ out_filename
38
+ else
39
+ base_name + '.sqt'
40
+ end
41
+ invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
42
+ fmt =
43
+ if params.fragment_mass_type == 'average' ; 'AVG'
44
+ else ; 'MONO'
45
+ end
46
+ pmt =
47
+ if params.precursor_mass_type == 'average' ; 'AVG'
48
+ else ; 'MONO'
49
+ end
50
+
51
+ mass_index = params.mass_index
52
+ static_mods = params.static_mods.map do |k,v|
53
+ key = k.split(/_/)[1]
54
+ if key.size == 1
55
+ key + '=' + (mass_index[key] + v.to_f).to_s
56
+ else
57
+ key + '=' + v
58
+ end
69
59
  end
70
- end
71
60
 
72
- apmu =
73
- case params.peptide_mass_units
74
- when '0' ; 'amu'
75
- when '1' ; 'mmu'
76
- when '2' ; 'ppm'
61
+ dynamic_mods = []
62
+ header.modifications.scan(/\((.*?)\)/) do |match|
63
+ dynamic_mods << match.first.sub(/ /,'=')
77
64
  end
65
+ plural = {
66
+ 'StaticMod' => static_mods,
67
+ 'DynamicMod' => dynamic_mods, # example as diff mod
68
+ 'Comment' => ['Created from Bioworks .srf file']
69
+ }
78
70
 
79
- hh = {
80
- 'SQTGenerator' => "mspire: ms-sequest",
81
- 'SQTGeneratorVersion' => Ms::Sequest::VERSION,
82
- 'Database' => db_filename_in_sqt,
83
- 'FragmentMasses' => fmt,
84
- 'PrecursorMasses' => pmt,
85
- 'StartTime' => '', # Bioworks 3.2 also leaves this blank...
86
- 'Alg-PreMassTol' => params.peptide_mass_tolerance,
87
- 'Alg-FragMassTol' => params.fragment_ion_tolerance,
88
- 'Alg-PreMassUnits' => apmu, ## mine
89
- 'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
90
- 'Alg-Enzyme' => header.enzyme.split(':').last,
91
- 'Alg-MSModel' => header.model,
92
- }
93
-
94
- if opt[:db_info]
95
- if File.exist?(db_filename)
96
- reply = Ms::Sequest::Sqt.db_info(db_filename)
97
- %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
98
- hh[label] = val
71
+ db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
72
+ db_filename_in_sqt = db_filename
73
+ if opt[:new_db_path]
74
+ db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
75
+ if opt[:update_db_path]
76
+ db_filename_in_sqt = File.expand_path(db_filename)
77
+ warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
99
78
  end
100
- else
101
- warn "file #{db_filename} does not exist, no extra db info in header!"
102
79
  end
103
- end
104
80
 
105
- has_hits = (self.out_files.size > 0)
106
- if has_hits
107
- # somewhat redundant with above, but we can get this without a db present!
108
- hh['DBLocusCount'] = self.out_files.first.db_locus_count
109
- end
81
+ apmu =
82
+ case params.peptide_mass_units
83
+ when '0' ; 'amu'
84
+ when '1' ; 'mmu'
85
+ when '2' ; 'ppm'
86
+ end
110
87
 
111
- File.open(outfile, 'w') do |out|
112
- # print the header:
113
- invariant_ordering.each do |iv|
114
- out.puts ['H', iv, hh.delete(iv)].join("\t")
115
- end
116
- hh.each do |k,v|
117
- out.puts ['H', k, v].join("\t")
118
- end
119
- plural.each do |k,vals|
120
- vals.each do |val|
121
- out.puts ['H', k, val].join("\t")
88
+ hh = {
89
+ 'SQTGenerator' => "mspire: ms-sequest",
90
+ 'SQTGeneratorVersion' => Ms::Sequest::VERSION,
91
+ 'Database' => db_filename_in_sqt,
92
+ 'FragmentMasses' => fmt,
93
+ 'PrecursorMasses' => pmt,
94
+ 'StartTime' => '', # Bioworks 3.2 also leaves this blank...
95
+ 'Alg-PreMassTol' => params.peptide_mass_tolerance,
96
+ 'Alg-FragMassTol' => params.fragment_ion_tolerance,
97
+ 'Alg-PreMassUnits' => apmu, ## mine
98
+ 'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
99
+ 'Alg-Enzyme' => header.enzyme.split(':').last,
100
+ 'Alg-MSModel' => header.model,
101
+ }
102
+
103
+ if opt[:db_info]
104
+ if File.exist?(db_filename)
105
+ reply = Ms::Sequest::Sqt.db_info(db_filename)
106
+ %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
107
+ hh[label] = val
108
+ end
109
+ else
110
+ warn "file #{db_filename} does not exist, no extra db info in header!"
122
111
  end
123
112
  end
124
113
 
125
- ##### SPECTRA
126
- time_to_process = '0.0'
127
- #########################################
128
- # NEED TO FIGURE OUT: (in spectra guy)
129
- # * Lowest Sp value for top 500 spectra
130
- # * Number of sequences matching this precursor ion
131
- #########################################
132
-
133
- manual_validation_status = 'U'
134
- self.out_files.zip(dta_files) do |out_file, dta_file|
135
- # don't have the time to process (using 0.0 like bioworks 3.2)
136
- dta_file_mh = dta_file.mh
137
- out_file_total_inten = out_file.total_inten
138
- out_file_lowest_sp = out_file.lowest_sp
139
- if opt[:round]
140
- dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
141
- out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
142
- out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
114
+ has_hits = (self.out_files.size > 0)
115
+ if has_hits
116
+ # somewhat redundant with above, but we can get this without a db present!
117
+ hh['DBLocusCount'] = self.out_files.first.db_locus_count
118
+ end
119
+
120
+ File.open(outfile, 'w') do |out|
121
+ # print the header:
122
+ invariant_ordering.each do |iv|
123
+ out.puts ['H', iv, hh.delete(iv)].join("\t")
124
+ end
125
+ hh.each do |k,v|
126
+ out.puts ['H', k, v].join("\t")
143
127
  end
128
+ plural.each do |k,vals|
129
+ vals.each do |val|
130
+ out.puts ['H', k, val].join("\t")
131
+ end
132
+ end
133
+
134
+ ##### SPECTRA
135
+ time_to_process = '0.0'
136
+ #########################################
137
+ # NEED TO FIGURE OUT: (in spectra guy)
138
+ # * Lowest Sp value for top 500 spectra
139
+ # * Number of sequences matching this precursor ion
140
+ #########################################
144
141
 
145
- out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
146
- out_file.hits.each_with_index do |hit,index|
147
- hit_mh = hit.mh
148
- hit_deltacn_orig_updated = hit.deltacn_orig_updated
149
- hit_xcorr = hit.xcorr
150
- hit_sp = hit.sp
142
+ manual_validation_status = 'U'
143
+ self.out_files.zip(dta_files) do |out_file, dta_file|
144
+ # don't have the time to process (using 0.0 like bioworks 3.2)
145
+ dta_file_mh = dta_file.mh
146
+ out_file_total_inten = out_file.total_inten
147
+ out_file_lowest_sp = out_file.lowest_sp
151
148
  if opt[:round]
152
- hit_mh = Ms::Calc.round(hit_mh, mh_dp)
153
- hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
154
- hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
155
- hit_sp = Ms::Calc.round(hit_sp, sp_dp)
149
+ dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
150
+ out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
151
+ out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
156
152
  end
157
- # note that the rank is determined by the order..
158
- out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
159
- hit.prots.each do |prot|
160
- out.puts ['L', prot.first_entry].join("\t")
153
+
154
+ out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
155
+ out_file.hits.each_with_index do |hit,index|
156
+ hit_mh = hit.mh
157
+ hit_deltacn_orig_updated = hit.deltacn_orig_updated
158
+ hit_xcorr = hit.xcorr
159
+ hit_sp = hit.sp
160
+ if opt[:round]
161
+ hit_mh = Ms::Calc.round(hit_mh, mh_dp)
162
+ hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
163
+ hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
164
+ hit_sp = Ms::Calc.round(hit_sp, sp_dp)
165
+ end
166
+ # note that the rank is determined by the order..
167
+ out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
168
+ hit.prots.each do |prot|
169
+ out.puts ['L', prot.first_entry].join("\t")
170
+ end
161
171
  end
162
172
  end
163
- end
164
- end # close the filehandle
165
- end # method
173
+ end # close the filehandle
174
+ end # method
175
+ end # Sqt
176
+ include Sqt
166
177
  end # Srf
167
178
  end # Sequest
168
179
  end # Ms
180
+
181
+
182
+ require 'optparse'
183
+
184
+ module Ms::Sequest::Srf::Sqt
185
+ def self.commandline(argv, progname=$0)
186
+ opt = {
187
+ :filter => true
188
+ }
189
+ opts = OptionParser.new do |op|
190
+ op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
191
+ op.separator "output: <file>.sqt ..."
192
+ op.separator ""
193
+ op.separator "options:"
194
+ op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
195
+ op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
196
+ op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
197
+ op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance", "(defined in sequest.params) to be included. Turns this off.") { opt[:filter] = false }
198
+ op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
199
+ op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
200
+ end
201
+ opts.parse!(argv)
202
+
203
+ if argv.size == 0
204
+ puts(opts) || exit
205
+ end
206
+
207
+ raise "if outfiles specified, outfiles must be same size as number of input files" unless opt[:outfiles].size == argv.size
208
+
209
+ argv.each_with_index do |srf_file,i|
210
+ outfile =
211
+ if opt[:outfiles]
212
+ opt[:outfiles][i]
213
+ else
214
+ base = srf_file.chomp(File.extname(srf_file))
215
+ base + '.sqt'
216
+ end
217
+
218
+ srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
219
+ srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
220
+ end
221
+ end
222
+ end
223
+
224
+
@@ -6,10 +6,10 @@ require 'fileutils'
6
6
  require 'ms/sequest/srf'
7
7
  require 'ms/sequest/srf/search'
8
8
 
9
- describe 'converting a large srf to an ms search format' do
10
-
11
- @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
12
- @srf = Ms::Sequest::Srf.new(@file)
9
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
10
+ Mgf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
11
+ Dta_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
12
+ shared 'an srf to ms2 search converter' do
13
13
 
14
14
  def del(file)
15
15
  if File.exist?(file)
@@ -22,8 +22,8 @@ describe 'converting a large srf to an ms search format' do
22
22
  end
23
23
 
24
24
  it 'converts to mgf' do
25
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
26
- @srf.to_mgf(@output)
25
+ @output = Mgf_output
26
+ @convert_to_mgf.call
27
27
  ok File.exist?(@output)
28
28
  output = IO.read(@output)
29
29
  # tests are just frozen right now, not checked for accuracy
@@ -33,8 +33,8 @@ describe 'converting a large srf to an ms search format' do
33
33
  end
34
34
 
35
35
  it 'generates .dta files' do
36
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
37
- @srf.to_dta(@output)
36
+ @output = Dta_output
37
+ @convert_to_dta.call
38
38
  ok File.exist?(@output)
39
39
  ok File.directory?(@output)
40
40
  # frozen (not verified):
@@ -51,3 +51,24 @@ describe 'converting a large srf to an ms search format' do
51
51
 
52
52
  end
53
53
 
54
+
55
+ describe 'converting an srf to ms2 search format: programmatic' do
56
+ @srf = Ms::Sequest::Srf.new(Srf_file)
57
+
58
+ @convert_to_mgf = lambda { @srf.to_mgf(Mgf_output) }
59
+ @convert_to_dta = lambda { @srf.to_dta(Dta_output) }
60
+
61
+ behaves_like 'an srf to ms2 search converter'
62
+
63
+ end
64
+
65
+ describe 'converting an srf to ms2 search format: commandline' do
66
+
67
+ def commandline_lambda(string)
68
+ lambda { Ms::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
69
+ end
70
+
71
+ @convert_to_mgf = commandline_lambda "#{Srf_file} -o #{Mgf_output}"
72
+ @convert_to_dta = commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
73
+ behaves_like 'an srf to ms2 search converter'
74
+ end
@@ -26,7 +26,7 @@ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
26
26
  SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
27
27
 
28
28
 
29
- SpecHelperOtherLines =<<END
29
+ SpecHelperOtherLines =<<END
30
30
  S 2 2 1 0.0 VELA 391.04541015625 3021.5419921875 0.0 0
31
31
  S 3 3 1 0.0 VELA 446.009033203125 1743.96911621094 0.0 122
32
32
  M 1 1 445.5769264522 0.0 0.245620265603065 16.6666660308838 1 6 R.SNSK.S U
@@ -39,12 +39,15 @@ M 10 17 1298.5350544522 0.235343858599663 0.823222815990448 151.717300415039 12
39
39
  L gi|90111124|ref|NP_414904.2|
40
40
  END
41
41
 
42
- describe 'converting a large srf to sqt' do
42
+ Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
43
+ Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
43
44
 
44
- @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
45
- @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
46
- @srf = Ms::Sequest::Srf.new(@file)
47
- @original_db_filename = @srf.header.db_filename
45
+ shared 'an srf to sqt converter' do
46
+
47
+ before do
48
+ @original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
49
+ @output = Srf_output
50
+ end
48
51
 
49
52
  def del(file)
50
53
  if File.exist?(file)
@@ -83,7 +86,7 @@ describe 'converting a large srf to sqt' do
83
86
  end
84
87
 
85
88
  it 'converts without bothering with the database' do
86
- @srf.to_sqt(@output)
89
+ @basic_conversion.call
87
90
  ok File.exist?(@output)
88
91
  lines = File.readlines(@output)
89
92
  lines.size.is 80910
@@ -96,25 +99,9 @@ describe 'converting a large srf to sqt' do
96
99
  del(@output)
97
100
  end
98
101
 
99
- it 'warns if the db path is incorrect and we want to update db info' do
100
- # requires some knowledge of how the database file is extracted
101
- # internally
102
- wacky_path = '/not/a/real/path/wacky.fasta'
103
- @srf.header.db_filename = wacky_path
104
- my_error_string = ''
105
- StringIO.open(my_error_string, 'w') do |strio|
106
- $stderr = strio
107
- @srf.to_sqt(@output, :db_info => true)
108
- end
109
- ok my_error_string.include?(wacky_path)
110
- @srf.header.db_filename = @original_db_filename
111
- $stderr = STDERR
112
- ok File.exists?(@output)
113
- IO.readlines(@output).size.is 80910
114
- del(@output)
115
- end
102
+
116
103
  it 'can get db info with correct path' do
117
- @srf.to_sqt(@output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33')
104
+ @with_new_db_path.call
118
105
  ok File.exist?(@output)
119
106
  lines = IO.readlines(@output)
120
107
  has_md5 = lines.any? do |line|
@@ -130,8 +117,9 @@ describe 'converting a large srf to sqt' do
130
117
  lines.size.is 80912
131
118
  del(@output)
132
119
  end
120
+
133
121
  it 'can update the Database' do
134
- @srf.to_sqt(@output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true)
122
+ @update_the_db_path.call
135
123
  regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
136
124
  updated_db = IO.readlines(@output).any? do |line|
137
125
  line =~ regexp
@@ -139,4 +127,53 @@ describe 'converting a large srf to sqt' do
139
127
  ok updated_db
140
128
  del(@output)
141
129
  end
130
+
131
+ end
132
+
133
+ describe "programmatic interface srf to sqt" do
134
+
135
+ @srf = Ms::Sequest::Srf.new(Srf_file)
136
+
137
+ @basic_conversion = lambda { @srf.to_sqt(Srf_output) }
138
+ @with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
139
+ @update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
140
+
141
+ before do
142
+ @output = Srf_output
143
+ end
144
+
145
+ behaves_like "an srf to sqt converter"
146
+
147
+ # this requires programmatic interface to manipulate the object for this
148
+ # test
149
+ it 'warns if the db path is incorrect and we want to update db info' do
150
+ # requires some knowledge of how the database file is extracted
151
+ # internally
152
+ wacky_path = '/not/a/real/path/wacky.fasta'
153
+ @srf.header.db_filename = wacky_path
154
+ my_error_string = ''
155
+ StringIO.open(my_error_string, 'w') do |strio|
156
+ $stderr = strio
157
+ @srf.to_sqt(@output, :db_info => true)
158
+ end
159
+ ok my_error_string.include?(wacky_path)
160
+ @srf.header.db_filename = @original_db_filename
161
+ $stderr = STDERR
162
+ ok File.exists?(@output)
163
+ IO.readlines(@output).size.is 80910
164
+ del(@output)
165
+ end
166
+ end
167
+
168
+ describe "command-line interface srf to sqt" do
169
+ def commandline_lambda(string)
170
+ lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
171
+ end
172
+
173
+ base_cmd = "#{Srf_file} -o #{Srf_output}"
174
+ @basic_conversion = commandline_lambda(base_cmd)
175
+ @with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
176
+ @update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
177
+
178
+ behaves_like "an srf to sqt converter"
142
179
  end
@@ -2,27 +2,9 @@
2
2
  require 'rubygems'
3
3
  require 'spec/more'
4
4
 
5
- # This is already defined in our module
6
- #TESTFILES = File.expand_path(File.dirname(__FILE__)) + '/testfiles'
7
-
8
5
  Bacon.summary_on_exit
9
6
 
10
- #module Bacon
11
- # class Context
12
- # def hash_match(hash, obj)
13
- # hash.each do |k,v|
14
- # if v.is_a?(Hash)
15
- # hash_match(v, obj.send(k.to_sym))
16
- # else
17
- # puts "#{k}: #{v} but was #{obj.send(k.to_sym)}" if obj.send(k.to_sym) != v
18
- # obj.send(k.to_sym).should.equal v
19
- # end
20
- # end
21
- # end
22
- # end
23
- #end
24
-
25
-
7
+ # is this already defined??
26
8
  TESTFILES = File.expand_path(File.dirname(__FILE__)) + "/testfiles"
27
9
 
28
10
  begin
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 13
9
- version: 0.0.13
8
+ - 14
9
+ version: 0.0.14
10
10
  platform: ruby
11
11
  authors:
12
12
  - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-08-17 00:00:00 -06:00
17
+ date: 2010-08-24 00:00:00 -06:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -134,7 +134,7 @@ files:
134
134
  - spec/testfiles/small.sqt
135
135
  - spec/testfiles/small2.sqt
136
136
  has_rdoc: true
137
- homepage: http://jtprince.github.com/ms-sequest
137
+ homepage: http://github.com/jtprince/ms-sequest
138
138
  licenses: []
139
139
 
140
140
  post_install_message: