RubyGems - ms-sequest - Versions diffs - 0.0.8 → 0.0.9 - Mend

ms-sequest 0.0.8 → 0.0.9

Files changed (6) hide show

data/History CHANGED Viewed

@@ -1,3 +1,8 @@
+== 0.0.9 / 2009-09-08
+* added capability to read srf files created by reading in .out/.dta folders (combined).
+  NOTE: please consider this functionality beta stage as it has not been extensively tested!
+* cleaned up the read_dta_files function since we don't need measured_mhs as we do that later
 == 0.0.8 / 2009-06-29

data/lib/ms/sequest.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Ms
   module Sequest
-    VERSION = '0.0.8'
+    VERSION = '0.0.9'
   end
 end

data/lib/ms/sequest/params.rb CHANGED Viewed

@@ -75,7 +75,7 @@ class Ms::Sequest::Params
     hash = {}
     in_add_amino_acid_section = false
     add_section_re = /^\s*add_/
-    prev_pos = nil
+      prev_pos = nil
     while line = fh.gets
       if line =~ add_section_re
         in_add_amino_acid_section = true
@@ -94,11 +94,13 @@ class Ms::Sequest::Params
     hash
   end
-  # returns self
+  # returns self or nil if no sequest found in the io
   def parse_io(fh)
     # seek to the SEQUEST file
     loop do
-      if fh.gets =~ @@sequest_line
+      line = fh.gets
+      return nil if line.nil?  # we return nil if we reach then end of the file without seeing sequest params
+      if line =~ @@sequest_line
         # double check that we are in a sequest params file:
         pos = fh.pos
         if fh.gets =~ /^first_database_name/
@@ -235,12 +237,12 @@ class Ms::Sequest::Params
             when :precursor : precursor_mass_type
             when :fragment : fragment_mass_type
             end
-   case reply
-   when 'average'
-     Ms::Mass::AA::AVG
-   when 'monoisotopic'
-     Ms::Mass::AA::MONO
-   end
+    case reply
+    when 'average'
+      Ms::Mass::AA::AVG
+    when 'monoisotopic'
+      Ms::Mass::AA::MONO
+    end
   end
   # at least in Bioworks 3.2, the First number after the enzyme

data/lib/ms/sequest/srf.rb CHANGED Viewed

@@ -57,8 +57,7 @@ class Ms::Sequest::Srf
         handle.seek(params_start_index)
         Ms::Sequest::Params.new.parse_io(handle)
       else
-        warn "#{filename} has no SEQUEST information, may be a truncated/corrupt file!"
-        nil
+        nil  # not found
       end
     end
   end
@@ -85,6 +84,9 @@ class Ms::Sequest::Srf
   #     # searches then you probably want to set this to false to avoid
   #     # recalculation.
   #
+  #     :read_pephits => true | false (default true)
+  #     # will attempt to read peptide hit information (equivalent to .out
+  #     # files), otherwise, just reads the dta information.
   def initialize(filename=nil, opts={})
     @peps = []
@@ -143,10 +145,25 @@ class Ms::Sequest::Srf
     self
   end
+  def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0)
+    dta_files = Array.new(num_files)
+    out_files = Array.new(num_files)
+    start = dta_start_byte
+    fh.pos = start
+    num_files.times do |i|
+      dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
+      #p dta_files[i]
+      out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
+      #p out_files[i]
+    end
+    [dta_files, out_files]
+  end
   # returns self
   # opts are the same as for 'new'
   def from_file(filename, opts)
-    opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true}.merge(opts)
+    opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts)
     params = Ms::Sequest::Srf.get_sequest_params(filename)
     dup_references = 0
     dup_refs_gt_0 = false
@@ -169,7 +186,7 @@ END
         dup_refs_gt_0 = true
       end
     else
-      warn "no params file found in srf, could be truncated file!"
+      warn "no params file found in srf, could be combined file or truncated/corrupt file!"
     end
     File.open(filename, 'rb') do |fh|
@@ -184,24 +201,44 @@ END
                   when '3.5'
                     true
                   end
-      @dta_files, measured_mhs = read_dta_files(fh,@header.num_dta_files, unpack_35)
-      @out_files = read_out_files(fh,@header.num_dta_files, measured_mhs, unpack_35, dup_refs_gt_0)
-      if fh.eof?
-        #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
-        @params = nil
-        @index = []
+      if @header.combined
+        @base_name = File.basename(filename, '.*')
+        # I'm not sure why this is the case, but the reported number is too
+        # big by one on the 2 files I've seen so far, so we will correct it here!
+        @header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1
+        if opts[:read_pephits] == false
+          raise NotImplementedError, "on combined files must read everything right now!"
+        end
+        (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0)
       else
-        @params = Ms::Sequest::Params.new.parse_io(fh)
+        @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
+        @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35)
+        if opts[:read_pephits]
+          @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0)
+          if fh.eof?
+            #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..."
+            @params = nil
+            @index = []
+          end
+        end
+      end
+      start_pos_in_case = fh.pos
+      @params = Ms::Sequest::Params.new.parse_io(fh)
+      if @params.nil?
+        fh.pos = start_pos_in_case
+        # seek to the index
+        fh.scanf "\000\000\000\000"
+      else # we have a params file
         # This is very sensitive to the grab_params method in sequest params
         fh.read(12)  ## gap between last params entry and index
-        @index = read_scan_index(fh,@header.num_dta_files)
       end
+      @index = read_scan_index(fh,@header.num_dta_files)
+      #p @index
     end
     ### UPDATE SOME THINGS:
-    @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first
     # give each hit a base_name, first_scan, last_scan
     @index.each_with_index do |ind,i|
       mass_measured = @dta_files[i][0]
@@ -244,24 +281,19 @@ END
   # returns an array of dta_files
   def read_dta_files(fh, num_files, unpack_35)
-    measured_mhs = Array.new(num_files) ## A parallel array to capture the actual mh
     dta_files = Array.new(num_files)
     start = dta_start_byte
-    unless fh.pos == start
-      fh.pos = start
-    end
+    fh.pos = start
     header.num_dta_files.times do |i|
-      dta_file = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
-      measured_mhs[i] = dta_file[0]
-      dta_files[i] = dta_file
+      dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35)
     end
-    [dta_files, measured_mhs]
+    dta_files
   end
   # filehandle (fh) must be at the start of the outfiles.  'read_dta_files'
   # will put the fh there.
-  def read_out_files(fh,number_files, measured_mhs, unpack_35, dup_refs_gt_0)
+  def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0)
     out_files = Array.new(number_files)
     header.num_dta_files.times do |i|
       out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0)
@@ -312,6 +344,14 @@ class Ms::Sequest::Srf::Header
   attr_accessor :params_filename
   attr_accessor :sequest_log_filename
+  # true if this is a combined file, false if represents a single file
+  # this is set by examining the DTAGen object for signs of a single file
+  attr_reader :combined
+  __chars_re = Regexp.escape( "\r\0" )
+  NEWLINE_OR_NULL_RE = /[#{__chars_re}]/o
   def num_dta_files
     @dta_gen.num_dta_files
   end
@@ -321,6 +361,11 @@ class Ms::Sequest::Srf::Header
     st = fh.read(4)
     @version = '3.' + st.unpack('I').first.to_s
     @dta_gen = Ms::Sequest::Srf::DTAGen.new.from_io(fh)
+    # if the start_mass end_mass start_scan and end_scan are all zero, its a
+    # combined srf file:
+    @combined = [0.0, 0.0, 0, 0].zip(%w(start_mass end_mass start_scan end_scan)).all? do |one,two|
+      one == @dta_gen.send(two.to_sym)
+    end
     ## get the rest of the info
     byte_length = Byte_length.dup
@@ -328,19 +373,23 @@ class Ms::Sequest::Srf::Header
     fh.pos = Start_byte[:enzyme]
     [:enzyme, :ion_series, :model, :modifications, :raw_filename, :db_filename, :dta_log_filename, :params_filename, :sequest_log_filename].each do |param|
-      send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param]) )
+      send("#{param}=".to_sym, get_null_padded_string(fh, byte_length[param], @combined))
     end
     self
   end
   private
-  def get_null_padded_string(fh,bytes)
+  def get_null_padded_string(fh, bytes, combined=false)
     st = fh.read(bytes)
     # for empty declarations
     if st[0] == 0x000000
       return ''
     end
-    st.rstrip!
+    if combined
+      st = st[ 0, st.index(NEWLINE_OR_NULL_RE) ]
+    else
+      st.rstrip!
+    end
     st
   end
@@ -497,6 +546,7 @@ class Ms::Sequest::Srf::Out
       Ms::Sequest::Srf::Out::Pep.set_deltacn_from_deltacn_orig(ar)
     end
     self[6] = ar
+    self[4].chomp!
     self
   end

data/lib/ms/sequest/srf/sqt.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'tap/task'
+require 'ms/calc'
 require 'ms/sequest'
 require 'ms/sequest/srf'
 require 'ms/sequest/sqt'
@@ -136,9 +137,9 @@ module Ms
             out_file_total_inten = out_file.total_inten
             out_file_lowest_sp = out_file.lowest_sp
             if opt[:round]
-              dta_file_mh = round(dta_file_mh, mh_dp)
-              out_file_total_inten = round(out_file_total_inten, tic_dp)
-              out_file_lowest_sp = round(out_file_lowest_sp, sp_dp)
+              dta_file_mh = Ms::Calc.round(dta_file_mh, mh_dp)
+              out_file_total_inten = Ms::Calc.round(out_file_total_inten, tic_dp)
+              out_file_lowest_sp = Ms::Calc.round(out_file_lowest_sp, sp_dp)
             end
             out.puts ['S', out_file.first_scan, out_file.last_scan, out_file.charge, time_to_process, out_file.computer, dta_file_mh, out_file_total_inten, out_file_lowest_sp, out_file.num_matched_peptides].join("\t")
@@ -148,10 +149,10 @@ module Ms
               hit_xcorr = hit.xcorr
               hit_sp = hit.sp
               if opt[:round]
-                hit_mh = round(hit_mh, mh_dp)
-                hit_deltacn_orig_updated = round(hit_deltacn_orig_updated, dcn_dp)
-                hit_xcorr = round(hit_xcorr, xcorr_dp)
-                hit_sp = round(hit_sp, sp_dp)
+                hit_mh = Ms::Calc.round(hit_mh, mh_dp)
+                hit_deltacn_orig_updated = Ms::Calc.round(hit_deltacn_orig_updated, dcn_dp)
+                hit_xcorr = Ms::Calc.round(hit_xcorr, xcorr_dp)
+                hit_sp = Ms::Calc.round(hit_sp, sp_dp)
               end
               # note that the rank is determined by the order..
               out.puts ['M', index+1, hit.rsp, hit_mh, hit_deltacn_orig_updated, hit_xcorr, hit_sp, hit.ions_matched, hit.ions_total, hit.sequence, manual_validation_status].join("\t")

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ms-sequest
 version: !ruby/object:Gem::Version
-  version: 0.0.8
+  version: 0.0.9
 platform: ruby
 authors:
 - John Prince
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-06-29 00:00:00 -06:00
+date: 2009-09-08 00:00:00 -06:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -30,7 +30,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.0.1
+        version: 0.0.2
     version:
 - !ruby/object:Gem::Dependency
   name: tap
@@ -55,8 +55,8 @@ dependencies:
 description: reads .SRF, .SQT and supports conversions
 email: jtprince@gmail.com
 executables:
-- srf_to_sqt.rb
 - srf_to_search.rb
+- srf_to_sqt.rb
 extensions: []
 extra_rdoc_files:
@@ -64,12 +64,12 @@ extra_rdoc_files:
 - MIT-LICENSE
 - History
 files:
+- lib/ms/sequest.rb
+- lib/ms/sequest/sqt.rb
 - lib/ms/sequest/params.rb
-- lib/ms/sequest/srf/search.rb
 - lib/ms/sequest/srf/sqt.rb
+- lib/ms/sequest/srf/search.rb
 - lib/ms/sequest/srf.rb
-- lib/ms/sequest/sqt.rb
-- lib/ms/sequest.rb
 - README
 - MIT-LICENSE
 - History