RubyGems - ms-sequest - Versions diffs - 0.0.13 → 0.0.14 - Mend

ms-sequest 0.0.13 → 0.0.14

Files changed (12) hide show

data/History +5 -0
data/README.rdoc +8 -1
data/Rakefile +1 -1
data/VERSION +1 -1
data/bin/srf_to_search.rb +1 -35
data/bin/srf_to_sqt.rb +1 -28
data/lib/ms/sequest/srf/search.rb +127 -77
data/lib/ms/sequest/srf/sqt.rb +192 -136
data/spec/ms/sequest/srf/search_spec.rb +29 -8
data/spec/ms/sequest/srf/sqt_spec.rb +63 -26
data/spec/spec_helper.rb +1 -19
metadata +4 -4

data/History CHANGED

@@ -1,3 +1,8 @@
+== 0.0.14 / 2010-08-24
+* Merged commandline programs into lib heirarchy for testing
+* Wrote specs for commandline programs (shared spec with programmatic interface)
 == 0.0.13 / 2010-08-16
 * compatible with ruby 1.9

data/README.rdoc CHANGED

@@ -1,9 +1,16 @@
-= {ms-sequest}[http://jtprince.github.com/ms-template/rdoc/]
+= ms-sequest
 An {mspire}[http://mspire.rubyforge.org] library supporting SEQUEST, Bioworks, SQT and associated formats.
+== {API}[http://yardoc.org/docs/jtprince-ms-sequest]
 == Examples
+Provides two executables for extracting information from an Srf file (run without file for usage):
+    srf_to_sqt.rb file.srf     # => file.sqt
+    srf_to_search.rb file.srf  # => file.mgf  (also can make .dta files)
 === Ms::Sequest::Srf
 Can read and convert Bioworks Sequest Results Files (SRF).

data/Rakefile CHANGED

@@ -10,7 +10,7 @@ gemspec = Gem::Specification.new do |s|
   s.name = NAME
   s.authors = ["John T. Prince"]
   s.email = "jtprince@gmail.com"
-  s.homepage = "http://jtprince.github.com/" + NAME
+  s.homepage = "http://github.com/jtprince/" + NAME
   s.summary = "An mspire library supporting SEQUEST, Bioworks, SQT, etc"
   s.description = "reads .SRF, .SQT and supports conversions"
   s.rubyforge_project = 'mspire'

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.0.13
1	+ 0.0.14

data/bin/srf_to_search.rb CHANGED

@@ -1,41 +1,7 @@
 #!/usr/bin/ruby
 require 'rubygems'
-require 'optparse'
 require 'ms/sequest/srf/search'
-opt = {
-  :format => 'mgf'
-}
+Ms::Sequest::Srf::Search.commandline(ARGV, File.basename(__FILE__)}
-opts = OptionParser.new do |op|
-  op.banner = "usage: #{File.basename(__FILE__)} <file>.srf"
-  op.separator "outputs: <file>.mgf"
-  op.on("-f", "--format <mgf|dat>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
-end
-if ARGV.size == 0
-  puts opts
-  exit
-end
-format = opt[:format]
-ARGV.each do |srf_file|
-  base = srf_file.sub(/\.srf$/i, '')
-  newfile =
-    case format
-    when 'dta'
-      base
-    when 'mgf'
-      base << '.' << format
-    end
-  srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
-  # options just speed up reading since we don't need .out info anyway
-  case format
-  when 'mgf'
-    srf.to_mgf(newfile)
-  when 'dta'
-    srf.to_dta_files(newfile)
-  end
-end

data/bin/srf_to_sqt.rb CHANGED

@@ -3,33 +3,6 @@
 require 'rubygems'
 require 'ms/sequest/srf/sqt'
-opt = {
-  :filter => true
-}
-opts = OptionParser.new do |op|
-  op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
-  op.separator "output: <file>.sqt ..."
-  op.separator ""
-  op.separator "options:"
-  op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
-  op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
-  op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
-  op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance",  "(defined in sequest.params) to be included.  Turns this off.") { opt[:filter] = false }
-  op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
-end
-opts.parse!
-if ARGV.size == 0
-  puts(opts) || exit
-end
-ARGV.each do |srf_file|
-  base = srf_file.chomp(File.extname(srf_file))
-  outfile = base + '.sqt'
-  srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
-  srf.to_sqt(outfile, :db_info => db_info, :new_db_path => db_path, :update_db_path => db_update, :round => round)
-end
+Ms::Sequest::Srf::Sqt.commandline(ARGV, File.basename(__FILE__))

data/lib/ms/sequest/srf/search.rb CHANGED

@@ -7,100 +7,150 @@ require 'ms/mass'
 module Ms
   module Sequest
     class Srf
-      # Writes an MGF file to given filename or base_name + '.mgf' if no
-      # filename given.
-      #
-      # This mimicks the output of merge.pl from mascot The only difference is
-      # that this does not include the "\r\n" that is found after the peak
-      # lists, instead, it uses "\n" throughout the file (thinking that this
-      # is preferable to mixing newline styles!)
-      def to_mgf(filename=nil)
-        filename =
-          if filename ; filename
-          else
-            base_name + '.mgf'
-          end
-        h_plus = Ms::Mass::MASCOT_H_PLUS
-        File.open(filename, 'wb') do |out|
-          dta_files.zip(index) do |dta, i_ar|
-            chrg = dta.charge
-            out.print "BEGIN IONS\n"
-            out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
-            out.print "CHARGE=#{chrg}+\n"
-            out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
-            peak_ar = dta.peaks.unpack('e*')
-            (0...(peak_ar.size)).step(2) do |i|
-              out.print( peak_ar[i,2].join(' '), "\n")
+      module Search
+        # Writes an MGF file to given filename or base_name + '.mgf' if no
+        # filename given.
+        #
+        # This mimicks the output of merge.pl from mascot The only difference is
+        # that this does not include the "\r\n" that is found after the peak
+        # lists, instead, it uses "\n" throughout the file (thinking that this
+        # is preferable to mixing newline styles!)
+        def to_mgf(filename=nil)
+          filename =
+            if filename ; filename
+            else
+              base_name + '.mgf'
+            end
+          h_plus = Ms::Mass::MASCOT_H_PLUS
+          File.open(filename, 'wb') do |out|
+            dta_files.zip(index) do |dta, i_ar|
+              chrg = dta.charge
+              out.print "BEGIN IONS\n"
+              out.print "TITLE=#{[base_name, *i_ar].push('dta').join('.')}\n"
+              out.print "CHARGE=#{chrg}+\n"
+              out.print "PEPMASS=#{(dta.mh+((chrg-1)*h_plus))/chrg}\n"
+              peak_ar = dta.peaks.unpack('e*')
+              (0...(peak_ar.size)).step(2) do |i|
+                out.print( peak_ar[i,2].join(' '), "\n")
+              end
+              out.print "END IONS\n"
+              out.print "\n"
             end
-            out.print "END IONS\n"
-            out.print "\n"
           end
         end
-      end
-      # not given an out_folder, will make one with the basename
-      # compress may be: :zip, :tgz, or nil (no compression)
-      # :zip requires gem rubyzip to be installed and is *very* bloated
-      # as it writes out all the files first!
-      # :tgz requires gem archive-tar-minitar to be installed
-      def to_dta(out_folder=nil, compress=nil)
-        outdir =
-          if out_folder ; out_folder
-          else base_name
-          end
+        # not given an out_folder, will make one with the basename
+        # compress may be: :zip, :tgz, or nil (no compression)
+        # :zip requires gem rubyzip to be installed and is *very* bloated
+        # as it writes out all the files first!
+        # :tgz requires gem archive-tar-minitar to be installed
+        def to_dta(out_folder=nil, compress=nil)
+          outdir =
+            if out_folder ; out_folder
+            else base_name
+            end
-        case compress
-        when :tgz
-          begin
-            require 'archive/tar/minitar'
-          rescue LoadError
-            abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
-          end
-          require 'archive/targz'  # my own simplified interface!
-          require 'zlib'
-          names = index.map do |i_ar|
-            [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
-          end
-          #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
+          case compress
+          when :tgz
+            begin
+              require 'archive/tar/minitar'
+            rescue LoadError
+              abort "need gem 'archive-tar-minitar' installed' for tgz compression!\n#{$!}"
+            end
+            require 'archive/targz'  # my own simplified interface!
+            require 'zlib'
+            names = index.map do |i_ar|
+              [outdir, '/', [base_name, *i_ar].join('.'), '.dta'].join('')
+            end
+            #Archive::Targz.archive_as_files(outdir + '.tgz', names, dta_file_data)
-          tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
+            tgz = Zlib::GzipWriter.new(File.open(outdir + '.tgz', 'wb'))
-          Archive::Tar::Minitar::Output.open(tgz) do |outp|
-            dta_files.each_with_index do |dta_file, i|
-              Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
+            Archive::Tar::Minitar::Output.open(tgz) do |outp|
+              dta_files.each_with_index do |dta_file, i|
+                Archive::Tar::Minitar.pack_as_file(names[i], dta_file.to_dta_file_data, outp)
+              end
             end
-          end
-        when :zip
-          begin
-            require 'zip/zipfilesystem'
-          rescue LoadError
-            abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
-          end
-          #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
-          Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
-            dta_files.zip(index) do |dta,i_ar|
-              #zfs.mkdir(outdir)
-              zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
-                dta.write_dta_file(out)
-                #zfs.commit
+          when :zip
+            begin
+              require 'zip/zipfilesystem'
+            rescue LoadError
+              abort "need gem 'rubyzip' installed' for zip compression!\n#{$!}"
+            end
+            #begin ; require 'zip/zipfilesystem' ; rescue LoadError, "need gem 'rubyzip' installed' for zip compression!\n#{$!}" ; end
+            Zip::ZipFile.open(outdir + ".zip", Zip::ZipFile::CREATE) do |zfs|
+              dta_files.zip(index) do |dta,i_ar|
+                #zfs.mkdir(outdir)
+                zfs.get_output_stream(outdir + '/' + [base_name, *i_ar].join('.') + '.dta') do |out|
+                  dta.write_dta_file(out)
+                  #zfs.commit
+                end
               end
             end
-          end
-        else  # no compression
-          FileUtils.mkpath(outdir)
-          Dir.chdir(outdir) do
-            dta_files.zip(index) do |dta,i_ar|
-              File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
-                dta.write_dta_file(out)
+          else  # no compression
+            FileUtils.mkpath(outdir)
+            Dir.chdir(outdir) do
+              dta_files.zip(index) do |dta,i_ar|
+                File.open([base_name, *i_ar].join('.') << '.dta', 'wb') do |out|
+                  dta.write_dta_file(out)
+                end
               end
             end
           end
         end
-      end
+      end # Search
+      include Search
     end # Srf
   end # Sequest
 end # Ms
+require 'optparse'
+module Ms::Sequest::Srf::Search
+  def self.commandline(argv, progname=$0)
+    opt = {
+      :format => 'mgf'
+    }
+    opts = OptionParser.new do |op|
+      op.banner = "usage: #{File.basename(__FILE__)} <file>.srf ..."
+      op.separator "outputs: <file>.mgf ..."
+      op.on("-f", "--format <mgf|dta>", "the output format (default: #{opt[:format]})") {|v| opt[:format] = v }
+      op.on("-o", "--outfiles <String,...>", Array, "comma list of output files or directories") {|v| opt[:outfiles] = v }
+    end
+    opts.parse!(argv)
+    if argv.size == 0
+      puts(opts) || exit
+    end
+    format = opt[:format]
+    raise "if outfiles specified, needs the same number of files as input files" unless argv.size == opt[:outfiles].size
+    argv.each_with_index do |srf_file,i|
+      base = srf_file.sub(/\.srf$/i, '')
+      newfile =
+        if opt[:outfiles]
+          opt[:outfiles][i]
+        else
+          case format
+          when 'dta'
+            base
+          when 'mgf'
+            base << '.' << format
+          end
+        end
+      srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => false, :read_pephits => false )
+      # options just speed up reading since we don't need .out info anyway
+      case format
+      when 'mgf'
+        srf.to_mgf(newfile)
+      when 'dta'
+        srf.to_dta(newfile)
+      end
+    end
+  end
+end

data/lib/ms/sequest/srf/sqt.rb CHANGED

@@ -1,168 +1,224 @@
-require 'tap/task'
 require 'ms/calc'
 require 'ms/sequest'
 require 'ms/sequest/srf'
 require 'ms/sequest/sqt'
 module Ms
   module Sequest
     class Srf
+      module Sqt
-      # the out_filename will be the base_name + .sqt unless 'out_filename' is
-      # defined
-      # :round => round floating point numbers
-      # etc...
-      def to_sqt(out_filename=nil, opts={})
-        # default rounding precision (Decimal Places)
-        tic_dp = 2
-        mh_dp = 7
-        xcorr_dp = 5
-        sp_dp = 2
-        dcn_dp = 5
-        defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
-        opt = defaults.merge(opts)
-        outfile =
-          if out_filename
-            out_filename
-          else
-            base_name + '.sqt'
-          end
-        invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
-        fmt =
-          if params.fragment_mass_type == 'average' ; 'AVG'
-          else ; 'MONO'
-          end
-        pmt =
-          if params.precursor_mass_type == 'average' ; 'AVG'
-          else ; 'MONO'
-          end
+        def self.commandline(argv)
+          require 'optparse'
-        mass_index = params.mass_index
-        static_mods = params.static_mods.map do |k,v|
-          key =  k.split(/_/)[1]
-          if key.size == 1
-            key + '=' + (mass_index[key] + v.to_f).to_s
-          else
-            key + '=' + v
-          end
         end
-        dynamic_mods = []
-        header.modifications.scan(/\((.*?)\)/) do |match|
-          dynamic_mods << match.first.sub(/ /,'=')
-        end
-        plural = {
-          'StaticMod' => static_mods,
-          'DynamicMod' => dynamic_mods,  # example as diff mod
-          'Comment' => ['Created from Bioworks .srf file']
-        }
-        db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
-        db_filename_in_sqt = db_filename
-        if opt[:new_db_path]
-          db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
-          if opt[:update_db_path]
-            db_filename_in_sqt = File.expand_path(db_filename)
-            warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
+        # the out_filename will be the base_name + .sqt unless 'out_filename' is
+        # defined
+        # :round => round floating point numbers
+        # etc...
+        def to_sqt(out_filename=nil, opts={})
+          # default rounding precision (Decimal Places)
+          tic_dp = 2
+          mh_dp = 7
+          xcorr_dp = 5
+          sp_dp = 2
+          dcn_dp = 5
+          defaults = {:db_info=>false, :new_db_path=>nil, :update_db_path=>false, :round=>false}
+          opt = defaults.merge(opts)
+          outfile =
+            if out_filename
+              out_filename
+            else
+              base_name + '.sqt'
+            end
+          invariant_ordering = %w(SQTGenerator SQTGeneratorVersion Database FragmentMasses PrecursorMasses StartTime) # just for readability and consistency
+          fmt =
+            if params.fragment_mass_type == 'average' ; 'AVG'
+            else ; 'MONO'
+            end
+          pmt =
+            if params.precursor_mass_type == 'average' ; 'AVG'
+            else ; 'MONO'
+            end
+          mass_index = params.mass_index
+          static_mods = params.static_mods.map do |k,v|
+            key =  k.split(/_/)[1]
+            if key.size == 1
+              key + '=' + (mass_index[key] + v.to_f).to_s
+            else
+              key + '=' + v
+            end
           end
-        end
-        apmu =
-          case params.peptide_mass_units
-          when '0' ; 'amu'
-          when '1' ; 'mmu'
-          when '2' ; 'ppm'
+          dynamic_mods = []
+          header.modifications.scan(/\((.*?)\)/) do |match|
+            dynamic_mods << match.first.sub(/ /,'=')
           end
+          plural = {
+            'StaticMod' => static_mods,
+            'DynamicMod' => dynamic_mods,  # example as diff mod
+            'Comment' => ['Created from Bioworks .srf file']
+          }
-        hh =  {
-          'SQTGenerator' => "mspire: ms-sequest",
-          'SQTGeneratorVersion' => Ms::Sequest::VERSION,
-          'Database' => db_filename_in_sqt,
-          'FragmentMasses' => fmt,
-          'PrecursorMasses' => pmt,
-          'StartTime' => '',  # Bioworks 3.2 also leaves this blank...
-          'Alg-PreMassTol' => params.peptide_mass_tolerance,
-          'Alg-FragMassTol' => params.fragment_ion_tolerance,
-          'Alg-PreMassUnits' => apmu, ## mine
-          'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
-          'Alg-Enzyme' => header.enzyme.split(':').last,
-          'Alg-MSModel' => header.model,
-        }
-        if opt[:db_info]
-          if File.exist?(db_filename)
-            reply = Ms::Sequest::Sqt.db_info(db_filename)
-            %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
-              hh[label] = val
+          db_filename = header.db_filename.sub(/\.hdr$/, '') # remove the .hdr postfix
+          db_filename_in_sqt = db_filename
+          if opt[:new_db_path]
+            db_filename = File.join(opt[:new_db_path], File.basename(db_filename.gsub('\\', '/')))
+            if opt[:update_db_path]
+              db_filename_in_sqt = File.expand_path(db_filename)
+              warn "writing Database #{db_filename} to sqt, but it does not exist on this file system" unless File.exist?(db_filename)
             end
-          else
-            warn "file #{db_filename} does not exist, no extra db info in header!"
           end
-        end
-        has_hits = (self.out_files.size > 0)
-        if has_hits
-          # somewhat redundant with above, but we can get this without a db present!
-          hh['DBLocusCount'] = self.out_files.first.db_locus_count
-        end
+          apmu =
+            case params.peptide_mass_units
+            when '0' ; 'amu'
+            when '1' ; 'mmu'
+            when '2' ; 'ppm'
+            end
-        File.open(outfile, 'w') do |out|
-          # print the header:
-          invariant_ordering.each do |iv|
-            out.puts ['H', iv, hh.delete(iv)].join("\t")
-          end
-          hh.each do |k,v|
-            out.puts ['H', k, v].join("\t")
-          end
-          plural.each do |k,vals|
-            vals.each do |val|
-              out.puts ['H', k, val].join("\t")
+          hh =  {
+            'SQTGenerator' => "mspire: ms-sequest",
+            'SQTGeneratorVersion' => Ms::Sequest::VERSION,
+            'Database' => db_filename_in_sqt,
+            'FragmentMasses' => fmt,
+            'PrecursorMasses' => pmt,
+            'StartTime' => '',  # Bioworks 3.2 also leaves this blank...
+            'Alg-PreMassTol' => params.peptide_mass_tolerance,
+            'Alg-FragMassTol' => params.fragment_ion_tolerance,
+            'Alg-PreMassUnits' => apmu, ## mine
+            'Alg-IonSeries' => header.ion_series.split(':').last.lstrip,
+            'Alg-Enzyme' => header.enzyme.split(':').last,
+            'Alg-MSModel' => header.model,
+          }
+          if opt[:db_info]
+            if File.exist?(db_filename)
+              reply = Ms::Sequest::Sqt.db_info(db_filename)
+              %w(DBSeqLength DBLocusCount DBMD5Sum).zip(reply) do |label,val|
+                hh[label] = val
+              end
+            else
+              warn "file #{db_filename} does not exist, no extra db info in header!"
             end
           end
-          ##### SPECTRA
-          time_to_process = '0.0'
-          #########################################
-          # NEED TO FIGURE OUT: (in spectra guy)
-          #    * Lowest Sp value for top 500 spectra
-          #    * Number of sequences matching this precursor ion
-          #########################################
-          manual_validation_status = 'U'
-          self.out_files.zip(dta_files) do |out_file, dta_file|
-            # don't have the time to process (using 0.0 like bioworks 3.2)
-            dta_file_mh = dta_file.mh
-            out_file_total_inten = out_file.total_inten
-            out_file_lowest_sp = out_file.lowest_sp
-            if opt[:round]
-              dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
-              out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
-              out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
+          has_hits = (self.out_files.size > 0)
+          if has_hits
+            # somewhat redundant with above, but we can get this without a db present!
+            hh['DBLocusCount'] = self.out_files.first.db_locus_count
+          end
+          File.open(outfile, 'w') do |out|
+            # print the header:
+            invariant_ordering.each do |iv|
+              out.puts ['H', iv, hh.delete(iv)].join("\t")
+            end
+            hh.each do |k,v|
+              out.puts ['H', k, v].join("\t")
             end
+            plural.each do |k,vals|
+              vals.each do |val|
+                out.puts ['H', k, val].join("\t")
+              end
+            end
+            ##### SPECTRA
+            time_to_process = '0.0'
+            #########################################
+            # NEED TO FIGURE OUT: (in spectra guy)
+            #    * Lowest Sp value for top 500 spectra
+            #    * Number of sequences matching this precursor ion
+            #########################################
-            out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
-            out_file.hits.each_with_index do |hit,index|
-              hit_mh = hit.mh
-              hit_deltacn_orig_updated = hit.deltacn_orig_updated
-              hit_xcorr = hit.xcorr
-              hit_sp = hit.sp
+            manual_validation_status = 'U'
+            self.out_files.zip(dta_files) do |out_file, dta_file|
+              # don't have the time to process (using 0.0 like bioworks 3.2)
+              dta_file_mh = dta_file.mh
+              out_file_total_inten = out_file.total_inten
+              out_file_lowest_sp = out_file.lowest_sp
               if opt[:round]
-                hit_mh = Ms::Calc.round(hit_mh, mh_dp)
-                hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
-                hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
-                hit_sp = Ms::Calc.round(hit_sp, sp_dp)
+                dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
+                out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
+                out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
               end
-              # note that the rank is determined by the order..
-              out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
-              hit.prots.each do |prot|
-                out.puts ['L', prot.first_entry].join("\t")
+              out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
+              out_file.hits.each_with_index do |hit,index|
+                hit_mh = hit.mh
+                hit_deltacn_orig_updated = hit.deltacn_orig_updated
+                hit_xcorr = hit.xcorr
+                hit_sp = hit.sp
+                if opt[:round]
+                  hit_mh = Ms::Calc.round(hit_mh, mh_dp)
+                  hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
+                  hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
+                  hit_sp = Ms::Calc.round(hit_sp, sp_dp)
+                end
+                # note that the rank is determined by the order..
+                out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")
+                hit.prots.each do |prot|
+                  out.puts ['L', prot.first_entry].join("\t")
+                end
               end
             end
-          end
-        end # close the filehandle
-      end # method
+          end # close the filehandle
+        end # method
+      end # Sqt
+      include Sqt
     end # Srf
   end # Sequest
 end # Ms
+require 'optparse'
+module Ms::Sequest::Srf::Sqt
+  def self.commandline(argv, progname=$0)
+    opt = {
+      :filter => true
+    }
+    opts = OptionParser.new do |op|
+      op.banner = "usage: #{progname} [OPTIONS] <file>.srf ..."
+      op.separator "output: <file>.sqt ..."
+      op.separator ""
+      op.separator "options:"
+      op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt[:db_info] = v }
+      op.on("-p", "--db-path <String>", "If you need to specify the database path") {|v| opt[:new_db_path] = v }
+      op.on("-u", "--db-update", "update the sqt file to reflect --db_path") {|v| opt[:db_update] = v }
+      op.on("-n", "--no-filter", "by default, pephit must be within peptide_mass_tolerance",  "(defined in sequest.params) to be included.  Turns this off.") { opt[:filter] = false }
+      op.on("-o", "--outfiles <first,...>", Array, "Comma list of output filenames") {|v| opt[:outfiles] = v }
+      op.on("-r", "--round", "round floating point values reasonably") {|v| opt[:round] = v }
+    end
+    opts.parse!(argv)
+    if argv.size == 0
+      puts(opts) || exit
+    end
+    raise "if outfiles specified, outfiles must be same size as number of input files" unless opt[:outfiles].size == argv.size
+    argv.each_with_index do |srf_file,i|
+      outfile =
+        if opt[:outfiles]
+          opt[:outfiles][i]
+        else
+          base = srf_file.chomp(File.extname(srf_file))
+          base + '.sqt'
+        end
+      srf = Ms::Sequest::Srf.new(srf_file, :link_protein_hits => false, :filter_by_precursor_mass_tolerance => opt.delete(:filter))
+      srf.to_sqt(outfile, :db_info => opt[:db_info], :new_db_path => opt[:new_db_path], :update_db_path => opt[:db_update], :round => opt[:round])
+    end
+  end
+end

data/spec/ms/sequest/srf/search_spec.rb CHANGED

@@ -6,10 +6,10 @@ require 'fileutils'
 require 'ms/sequest/srf'
 require 'ms/sequest/srf/search'
-describe 'converting a large srf to an ms search format' do
-  @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
-  @srf = Ms::Sequest::Srf.new(@file)
+Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
+Mgf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
+Dta_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
+shared 'an srf to ms2 search converter' do
   def del(file)
     if File.exist?(file)
@@ -22,8 +22,8 @@ describe 'converting a large srf to an ms search format' do
   end
   it 'converts to mgf' do
-    @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.mgf.tmp'
-    @srf.to_mgf(@output)
+    @output = Mgf_output
+    @convert_to_mgf.call
     ok File.exist?(@output)
     output = IO.read(@output)
     # tests are just frozen right now, not checked for accuracy
@@ -33,8 +33,8 @@ describe 'converting a large srf to an ms search format' do
   end
   it 'generates .dta files' do
-    @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.dta.tmp'
-    @srf.to_dta(@output)
+    @output = Dta_output
+    @convert_to_dta.call
     ok File.exist?(@output)
     ok File.directory?(@output)
     # frozen (not verified):
@@ -51,3 +51,24 @@ describe 'converting a large srf to an ms search format' do
 end
+describe 'converting an srf to ms2 search format: programmatic' do
+  @srf = Ms::Sequest::Srf.new(Srf_file)
+  @convert_to_mgf = lambda { @srf.to_mgf(Mgf_output) }
+  @convert_to_dta = lambda { @srf.to_dta(Dta_output) }
+  behaves_like 'an srf to ms2 search converter'
+end
+describe 'converting an srf to ms2 search format: commandline' do
+  def commandline_lambda(string)
+    lambda { Ms::Sequest::Srf::Search.commandline(string.split(/\s+/)) }
+  end
+  @convert_to_mgf = commandline_lambda "#{Srf_file} -o #{Mgf_output}"
+  @convert_to_dta = commandline_lambda "#{Srf_file} -o #{Dta_output} -f dta"
+  behaves_like 'an srf to ms2 search converter'
+end

data/spec/ms/sequest/srf/sqt_spec.rb CHANGED

@@ -26,7 +26,7 @@ MoleculesStaticMods = ["C=160.1942", "Cterm=10.1230", "E=161.44398"]
 SpecHelperHeaderHash['StaticMod'] = MoleculesStaticMods
-  SpecHelperOtherLines =<<END
+SpecHelperOtherLines =<<END
 S	2	2	1	0.0	VELA	391.04541015625	3021.5419921875	0.0	0
 S	3	3	1	0.0	VELA	446.009033203125	1743.96911621094	0.0	122
 M	1	1	445.5769264522	0.0	0.245620265603065	16.6666660308838	1	6	R.SNSK.S	U
@@ -39,12 +39,15 @@ M	10	17	1298.5350544522	0.235343858599663	0.823222815990448	151.717300415039	12
 L	gi|90111124|ref|NP_414904.2|
 END
-describe 'converting a large srf to sqt' do
+Srf_file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
+Srf_output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
-  @file = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.srf'
-  @output = Ms::TESTDATA + '/sequest/opd1_static_diff_mods/000.sqt.tmp'
-  @srf = Ms::Sequest::Srf.new(@file)
-  @original_db_filename = @srf.header.db_filename
+shared 'an srf to sqt converter' do
+  before do
+    @original_db_filename = "C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta"
+    @output = Srf_output
+  end
   def del(file)
     if File.exist?(file)
@@ -83,7 +86,7 @@ describe 'converting a large srf to sqt' do
   end
   it 'converts without bothering with the database' do
-    @srf.to_sqt(@output)
+    @basic_conversion.call
     ok File.exist?(@output)
     lines = File.readlines(@output)
     lines.size.is 80910
@@ -96,25 +99,9 @@ describe 'converting a large srf to sqt' do
     del(@output)
   end
-  it 'warns if the db path is incorrect and we want to update db info' do
-    # requires some knowledge of how the database file is extracted
-    # internally
-    wacky_path = '/not/a/real/path/wacky.fasta'
-    @srf.header.db_filename = wacky_path
-    my_error_string = ''
-    StringIO.open(my_error_string, 'w') do |strio|
-      $stderr = strio
-      @srf.to_sqt(@output, :db_info => true)
-    end
-    ok my_error_string.include?(wacky_path)
-    @srf.header.db_filename = @original_db_filename
-    $stderr = STDERR
-    ok File.exists?(@output)
-    IO.readlines(@output).size.is 80910
-    del(@output)
-  end
   it 'can get db info with correct path' do
-    @srf.to_sqt(@output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33')
+    @with_new_db_path.call
     ok File.exist?(@output)
     lines = IO.readlines(@output)
     has_md5 = lines.any? do |line|
@@ -130,8 +117,9 @@ describe 'converting a large srf to sqt' do
     lines.size.is 80912
     del(@output)
   end
   it 'can update the Database' do
-    @srf.to_sqt(@output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true)
+    @update_the_db_path.call
     regexp = Regexp.new("Database\t/.*/opd1_2runs_2mods/sequest33/ecoli_K12_ncbi_20060321.fasta")
     updated_db = IO.readlines(@output).any? do |line|
       line =~ regexp
@@ -139,4 +127,53 @@ describe 'converting a large srf to sqt' do
     ok updated_db
     del(@output)
   end
+end
+describe "programmatic interface srf to sqt" do
+  @srf = Ms::Sequest::Srf.new(Srf_file)
+  @basic_conversion = lambda { @srf.to_sqt(Srf_output) }
+  @with_new_db_path = lambda { @srf.to_sqt(Srf_output, :db_info => true, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33') }
+  @update_the_db_path = lambda { @srf.to_sqt(Srf_output, :new_db_path => Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33', :update_db_path => true) }
+  before do
+    @output = Srf_output
+  end
+  behaves_like "an srf to sqt converter"
+  # this requires programmatic interface to manipulate the object for this
+  # test
+  it 'warns if the db path is incorrect and we want to update db info' do
+    # requires some knowledge of how the database file is extracted
+    # internally
+    wacky_path = '/not/a/real/path/wacky.fasta'
+    @srf.header.db_filename = wacky_path
+    my_error_string = ''
+    StringIO.open(my_error_string, 'w') do |strio|
+      $stderr = strio
+      @srf.to_sqt(@output, :db_info => true)
+    end
+    ok my_error_string.include?(wacky_path)
+    @srf.header.db_filename = @original_db_filename
+    $stderr = STDERR
+    ok File.exists?(@output)
+    IO.readlines(@output).size.is 80910
+    del(@output)
+  end
+end
+describe "command-line interface srf to sqt" do
+  def commandline_lambda(string)
+    lambda { Ms::Sequest::Srf::Sqt.commandline( string.split(/\s+/) ) }
+  end
+  base_cmd = "#{Srf_file} -o #{Srf_output}"
+  @basic_conversion = commandline_lambda(base_cmd)
+  @with_new_db_path = commandline_lambda(base_cmd + " --db-info --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'}")
+  @update_the_db_path = commandline_lambda(base_cmd + " --db-path #{Ms::TESTDATA + '/sequest/opd1_2runs_2mods/sequest33'} --db-update" )
+  behaves_like "an srf to sqt converter"
 end

data/spec/spec_helper.rb CHANGED

@@ -2,27 +2,9 @@
 require 'rubygems'
 require 'spec/more'
-# This is already defined in our module
-#TESTFILES = File.expand_path(File.dirname(__FILE__)) + '/testfiles'
 Bacon.summary_on_exit
-#module Bacon
-#  class Context
-#    def hash_match(hash, obj)
-#      hash.each do |k,v|
-#        if v.is_a?(Hash)
-#          hash_match(v, obj.send(k.to_sym))
-#        else
-#          puts "#{k}: #{v} but was #{obj.send(k.to_sym)}" if obj.send(k.to_sym) != v
-#          obj.send(k.to_sym).should.equal v
-#        end
-#      end
-#    end
-#  end
-#end
+# is this already defined??
 TESTFILES = File.expand_path(File.dirname(__FILE__)) + "/testfiles"
 begin

metadata CHANGED

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 0
-  - 13
-  version: 0.0.13
+  - 14
+  version: 0.0.14
 platform: ruby
 authors:
 - John T. Prince
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-08-17 00:00:00 -06:00
+date: 2010-08-24 00:00:00 -06:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -134,7 +134,7 @@ files:
 - spec/testfiles/small.sqt
 - spec/testfiles/small2.sqt
 has_rdoc: true
-homepage: http://jtprince.github.com/ms-sequest
+homepage: http://github.com/jtprince/ms-sequest
 licenses: []
 post_install_message: