RubyGems - mspire - Versions diffs - 0.3.9 → 0.4.2 - Mend

mspire 0.3.9 → 0.4.2

Files changed (87) hide show

data/INSTALL +24 -7
data/README +15 -13
data/README.rdoc +18 -0
data/Rakefile +50 -14
data/bin/aafreqs.rb +0 -0
data/bin/bioworks2excel.rb +0 -0
data/bin/bioworks_to_pepxml.rb +2 -1
data/bin/bioworks_to_pepxml_gui.rb +0 -0
data/bin/fasta_shaker.rb +0 -0
data/bin/filter_and_validate.rb +0 -0
data/bin/gi2annot.rb +0 -0
data/bin/id_class_anal.rb +0 -0
data/bin/id_precision.rb +0 -0
data/bin/ms_to_lmat.rb +0 -0
data/bin/pepproph_filter.rb +0 -0
data/bin/protein_summary.rb +0 -0
data/bin/protxml2prots_peps.rb +0 -0
data/bin/raw_to_mzXML.rb +3 -3
data/bin/run_percolator.rb +122 -0
data/bin/sqt_group.rb +0 -0
data/bin/srf_group.rb +0 -0
data/changelog.txt +29 -0
data/lib/ms/gradient_program.rb +0 -1
data/lib/ms/msrun.rb +62 -29
data/lib/ms/parser/mzdata/axml.rb +55 -0
data/lib/ms/parser/mzdata/dom.rb +51 -36
data/lib/ms/parser/mzdata.rb +8 -2
data/lib/ms/parser/mzxml/axml.rb +59 -0
data/lib/ms/parser/mzxml/dom.rb +80 -57
data/lib/ms/parser/mzxml/hpricot.rb +1 -1
data/lib/ms/parser/mzxml/libxml.rb +6 -2
data/lib/ms/parser/mzxml.rb +110 -3
data/lib/ms/parser.rb +4 -4
data/lib/ms/precursor.rb +19 -4
data/lib/ms/scan.rb +7 -7
data/lib/ms/spectrum.rb +249 -58
data/lib/mspire.rb +1 -1
data/lib/spec_id/bioworks.rb +2 -2
data/lib/spec_id/precision/filter/cmdline.rb +8 -1
data/lib/spec_id/precision/prob/cmdline.rb +2 -2
data/lib/spec_id/precision/prob.rb +1 -0
data/lib/spec_id/proph/pep_summary.rb +3 -4
data/lib/spec_id/proph/prot_summary.rb +3 -3
data/lib/spec_id/protein_summary.rb +1 -1
data/lib/spec_id/sequest/pepxml.rb +5 -5
data/lib/spec_id/sqt.rb +4 -4
data/lib/spec_id/srf.rb +49 -8
data/lib/spec_id.rb +5 -0
data/lib/xml_style_parser.rb +16 -2
data/script/compile_and_plot_smriti_final.rb +0 -0
data/script/create_little_pepxml.rb +0 -0
data/script/degenerate_peptides.rb +0 -0
data/script/estimate_fpr_by_cysteine.rb +0 -0
data/script/extract_gradient_programs.rb +1 -1
data/script/find_cysteine_background.rb +0 -0
data/script/genuine_tps_and_probs.rb +0 -0
data/script/get_apex_values_rexml.rb +0 -0
data/script/mascot_fix_pepxml.rb +123 -0
data/script/msvis.rb +0 -0
data/script/mzXML2timeIndex.rb +0 -0
data/script/peps_per_bin.rb +0 -0
data/script/prep_dir.rb +0 -0
data/script/simple_protein_digestion.rb +0 -0
data/script/smriti_final_analysis.rb +0 -0
data/script/sqt_to_meta.rb +0 -0
data/script/top_hit_per_scan.rb +0 -0
data/script/toppred_to_yaml.rb +0 -0
data/script/tpp_installer.rb +0 -0
data/specs/bin/prob_validate_spec.rb +5 -2
data/specs/bin/protein_summary_spec.rb +5 -1
data/specs/ms/msrun_spec.rb +176 -133
data/specs/ms/parser_spec.rb +3 -3
data/specs/ms/spectrum_spec.rb +0 -2
data/specs/spec_id/precision/filter_spec.rb +4 -1
data/specs/spec_id/precision/prob_spec.rb +2 -2
data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
data/specs/spec_id/sqt_spec.rb +5 -5
data/specs/spec_id/srf_spec.rb +56 -93
data/specs/spec_id/srf_spec_helper.rb +121 -284
data/specs/spec_id_spec.rb +3 -0
data/specs/transmem/toppred_spec.rb +1 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
metadata +247 -229

data/specs/spec_id/srf_spec.rb CHANGED Viewed

@@ -3,6 +3,8 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
 require File.expand_path( File.dirname(__FILE__) + '/srf_spec_helper' )
 require 'spec_id/srf'
+require 'fileutils'
 include SRFHelper
 #tfiles = File.dirname(__FILE__) + '/tfiles/'
@@ -60,11 +62,23 @@ describe 'an srf reader', :shared => true do
     @dta_files_last.object_match(@srf_obj.dta_files.last).should be_true
   end
+  # given an array of out_file objects, returns the first set of hits
+  def get_first_peps(out_files)
+    out_files.each do |outf|
+      if outf.num_hits > 0
+        return outf.hits
+      end
+    end
+    return nil
+  end
   it 'retrieves correct out files' do
     @out_files_first.object_match(@srf_obj.out_files.first).should be_true
     @out_files_last.object_match(@srf_obj.out_files.last).should be_true
-    @out_files_first_last_pep.object_match(@srf_obj.out_files.first.hits.last).should be_true
-    @out_files_last_last_pep.object_match(@srf_obj.out_files.last.hits.last).should be_true
+    # first available peptide hit
+    @out_files_first_pep.object_match(get_first_peps(@srf_obj.out_files).first).should be_true
+    # last available peptide hit
+    @out_files_last_pep.object_match(get_first_peps(@srf_obj.out_files.reverse).last).should be_true
   end
   xit 'retrieves correct params' do
@@ -75,35 +89,30 @@ describe 'an srf reader', :shared => true do
 end
+Expected_hash_keys = %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_pep out_files_last_pep params)
-describe klass, " reading a version 3.2 .srf file" do
-  spec_large do
-    before(:all) do
-      @file = Tfiles_l + '/sash7/sequest/7MIX_STD_110802_1.srf'
-      %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
-        instance_variable_set("@#{c}", File_32[c.to_sym])
+to_run = {
+  '3.2' => {:hash => File_32, :file => '/opd1_2runs_2mods/sequest32/020.srf'},
+  '3.3' => {:hash => File_33, :file => '/opd1_2runs_2mods/sequest33/020.srf'},
+  '3.3.1' => {:hash => File_331, :file => '/opd1_2runs_2mods/sequest331/020.srf'},
+}
+to_run.each do |version,info|
+  describe klass, " reading a version #{version} .srf file" do
+    spec_large do
+      before(:all) do
+        @file = Tfiles_l + info[:file]
+        Expected_hash_keys.each do |c|
+          instance_variable_set("@#{c}", info[:hash][c.to_sym])
+        end
       end
+      it_should_behave_like "an srf reader"
     end
-    it_should_behave_like "an srf reader"
   end
 end
-describe klass, " reading a version 3.3 .srf file" do
-  it_should 'reading a version 3.3 .srf file'
-end
-describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
-  spec_large do
-    before(:all) do
-      @file = Tfiles_l + '/sash7/sequest/bioworks331/7MIX_STD_110802_1.srf'
-      %w(header dta_gen dta_files_first dta_files_last out_files_first out_files_last out_files_first_last_pep out_files_last_last_pep params).each do |c|
-        instance_variable_set("@#{c}", File_35[c.to_sym])
-      end
-    end
-    it_should_behave_like "an srf reader"
-  end
+describe klass, " reading a corrupted file" do
   it 'should read a null file from an aborted run w/o failing (but gives error msg)' do
     file = Tfiles + '/corrupted_900.srf'
     error_msg = Tfiles + '/error_msg.tmp'
@@ -123,78 +132,9 @@ describe klass, " reading a version 3.5 (bioworks 3.3.1) .srf file" do
     IO.read(error_msg).should =~ /corrupted_900\.srf/
     File.unlink error_msg
   end
-end
-describe klass, 'reading an srf file' do
-  spec_large do
-    before(:all) do
-      start = Time.now
-      tf_srf = Tfiles_l + "/sash7/sequest/older/7MIX_STD_110802_1.srf"
-      @srf = klass.new(tf_srf)
-      puts "- read in #{Time.now - start} seconds"
-    end
-    #def initialize(arg)
-    #  super(arg)
-    #  @tfiles = File.dirname(__FILE__) + '/tfiles/'
-    #  @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
-    #  @srg_file = @tfiles + "tmp_bioworks.srg"
-    #  @srf = $srf
-    #  @group = $group
-    #end
-    it 'reads' do
-    end
-    it 'reads an srf file (w/o probs) and extracts all basic information' do
-      ## Verify that we have everything and it is as we expect (not exhaustive)
-      head = @srf.header
-      dtgen = head.dta_gen
-      ## HEADER
-      hash_match(Header, head)
-      hash_match(Dta_gen, dtgen)
-      ## DTA_FILES
-      hash_match(Dta_files_first, @srf.dta_files.first)
-      hash_match(Dta_files_last, @srf.dta_files.last)
-      ## OUT_FILES
-      hash_match(Out_files_first, @srf.out_files.first)
-      hit = @srf.out_files.first.hits.first
-      hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
-      hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
-      hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
-      ## SEQUEST_PARAMS
-      hash_match(Sequest_params, @srf.params)
-      ## INDEX
-      @srf.index.last.should == [7161, 7161, 3]
-      @srf.index.first.should == [2, 2, 1]
-      @srf.dta_files.size.should == @srf.index.size
-      @srf.dta_files.size.should == @srf.out_files.size
-    end
-    it_should 'give accurate peptides' do
-    end
-  end
-  ## treats reference special
-  def hash_match(hash, srf)
-    hash.each do |k,v|
-      if v.is_a? Float
-        delta = v/100000
-        srf.send(k.to_sym).should be_close(v, delta)
-      elsif k == :reference
-        srf.prots.first.reference.should == v[0,38]
-      else
-        srf.send(k.to_sym).should == v
-      end
-    end
-  end
 end
 describe SRFGroup, 'creating an srg file' do
   it 'creates one given some non-existing, relative filenames' do
     ## TEST SRG GROUPING:
     filenames = %w(my/lucky/filename /another/filename)
@@ -205,5 +145,28 @@ describe SRFGroup, 'creating an srg file' do
     File.exist?(srg_file).should be_true
     File.unlink(srg_file)
   end
+end
+# @TODO: this test needs to be created for a small mock dataset!!
+describe SRF, 'creating dta files' do
+  spec_large do
+    before(:all) do
+      file = Tfiles_l + '/opd1_2runs_2mods/sequest33/020.srf'
+      @srf = SRF.new(file)
+    end
+    it 'creates dta files' do
+      @srf.to_dta_files
+      File.exist?('020').should be_true
+      File.directory?('020').should be_true
+      File.exist?('020/020.3366.3366.2.dta').should be_true
+      lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
+      lines.first.should == "1113.10649290125 2\r\n"
+      lines[1].should == "164.56591796875 4817.0\r\n"
+      FileUtils.rm_rf '020'
+    end
+  end
 end

data/specs/spec_id/srf_spec_helper.rb CHANGED Viewed

@@ -3,300 +3,137 @@ module SRFHelper
   File_32 = {
     :header =>
     {
-      :params_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\sashimi7.params",
-      :model => "LCQ Deca XP",
-      :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_dta.log",
-      :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
-      :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
-      :modifications => "",
-      :enzyme => "Enzyme:Trypsin(KR/P) (2)",
-      :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\sashimi7\\7MIX_STD_110802_1_sequest.log",
-      :version => "3.2",
-      :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW"
+      :params_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\ecoli.params",
+      :raw_filename=>"C:\\Xcalibur\\data\\john\\opd00001\\020.RAW",
+      :modifications=>"(M* +15.99940) (STY# +79.97990)",
+      :sequest_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_sequest.log",
+      :ion_series=>"ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
+      :db_filename=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta",
+      :enzyme=>"Enzyme:Trypsin(KR/P) (2)",
+      :version=>"3.2",
+      :model=>"LCQ Deca XP",
+      :dta_log_filename=>"C:\\Xcalibur\\sequest\\john\\opd1_2runs_2mods\\020_dta.log"
     },
-       :dta_gen => {
-        :min_group_count => 1,
-        :start_time => 1.39999997615814,
-        :start_mass => 400.0,
-        :end_scan => 7161,
-        :group_scan => 1,
-        :start_scan => 1,
-        :num_dta_files => 6952,
-        :min_ion_threshold => 15,
-        :end_mass => 4500.0,
-      },
-      :dta_files_first => {
-        :mh => 1221.88989257812,
-        :dta_tic => 7703132.0,
-        :num_peaks => 74,
-        :charge => 1,
-        :ms_level => 2,
-        :total_num_possible_charge_states => 0,
-        :peaks => 592,
-      },
-      :dta_files_last => {
-       :mh => 2604.8360326775,
-       :dta_tic => 31977.0,
-       :num_peaks => 17,
-       :charge => 3,
-       :ms_level => 2,
-       :total_num_possible_charge_states => 0,
-       :peaks => 136,
-      },
-      :out_files_first => {
-        :num_hits => 10,
-        :computer => 'VELA',
-        :date_time => '05/12/2006, 10:58 AM,',
-        :hits => 10
-      },
-      :out_files_last => {
-         :num_hits => 10,
-         :computer => 'VELA',
-         :date_time => '05/12/2006, 11:11 AM,',
-         :hits => 10
-      },
-      :out_files_first_last_pep => {
-        :aaseq => 'QFSLSKSSLPK',
-        :sequence => 'K.QFSLSKSSLPK.S',
-        :mh => 1222.4156904522,
-        :deltacn => 1.1,
-        :sp => 57.4083709716797,
-        :xcorr => 0.802009999752045,
-        :id => 19977,
-        :rsp => 60,
-        :ions_matched => 7,
-        :ions_total => 20,
-        :prots => 1,
-        :deltamass => 0.525797874074897,
-        :ppm => 430.315265940608,
-        :base_name => '7MIX_STD_110802_1',
-        :first_scan => 2,
-        :last_scan => 2,
-        :charge => 1
-      },
-        :out_files_last_last_pep =>
-      {
-        :aaseq => 'EAFLVNSDLTLRAQLTEFRDHK',
-        :sequence => 'R.EAFLVNSDLTLRAQLTEFRDHK.L',
-        :mh => 2604.9025174522,
-        :deltacn => 1.1,
-        :sp => 26.1511478424072,
-        :xcorr => 0.634012818336487,
-        :id => 8105,
-        :rsp => 165,
-        :ions_matched => 6,
-        :ions_total => 84,
-        :prots => 1,
-        :deltamass => 0.0664847746993473,
-        :ppm => 25.523592988311,
-        :base_name => '7MIX_STD_110802_1',
-        :first_scan => 7161,
-        :last_scan => 7161,
-        :charge => 3,
-      },
+    :dta_gen => {
+      :min_group_count => 1,
+      :start_time => 1.5,
+      :start_mass => 300.0,
+      :end_scan => 3620,
+      :group_scan => 1,
+      :start_scan => 1,
+      :num_dta_files => 3747,
+      :min_ion_threshold => 15,
+      :end_mass => 4500.0,
+    },
+    :dta_files_first => {
-      :params => {
-        "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.4000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"},
+      :mh=>390.92919921875,
+      :dta_tic=>9041311.0,
+      :num_peaks=>48,
+      :charge=>1,
+      :ms_level=>2,
+      :total_num_possible_charge_states=>0,
+    },
+    :dta_files_last => {
+      :dta_tic=>842424.0,
+      :mh=>357.041198730469,
+      :num_peaks=>78,
+      :ms_level=>2,
+      :charge=>1,
+      :total_num_possible_charge_states=>0,
+    },
+    :out_files_first => {
+      :num_hits => 0,
+      :computer => 'VELA',
+      :date_time => '05/06/2008, 02:08 PM,',
+      :hits => 0,
+    },
+    :out_files_last => {
+      :num_hits => 0,
+      :computer => 'VELA',
+      :date_time => '05/06/2008, 02:11 PM,',
+      :hits => 0,
+    },
+    :out_files_first_pep => {
+      :aaseq=>"YRLGGSTK",
+      :sequence=>"R.Y#RLGGS#T#K.K",
+      :mh=>1121.9390244522,
+      :deltacn_orig=>0.0,
+      :sp=>29.8529319763184,
+      :xcorr=>0.123464643955231,
+      :id=>2104,
+      :rsp=>1,
+      :ions_matched=>5,
+      :ions_total=>35,
+      :prots=>1,
+      :deltamass=>-0.00579976654989878,
+      :ppm=>5.16938660859491,
+      :base_name=>"020",
+      :first_scan=>3,
+      :last_scan=>3,
+      :charge=>1,
+      :deltacn=>0.795928299427032,
+      :base_name=>"020",
+    },
+    :out_files_last_pep =>
+    {
+      :aaseq=>"LLPGTARTMRR",
+      :sequence=>"R.LLPGTARTMRR.M",
+      :mh=>1272.5493424522,
+      :deltacn_orig=>0.835508584976196,
+      :deltacn=>1.1,
+      :sp=>57.9885787963867,
+      :xcorr=>0.109200321137905,
+      :id=>1361,
+      :rsp=>11,
+      :ions_matched=>6,
+      :ions_total=>40,
+      :prots=>1,
+      :deltamass=>0.00243330985608736,
+      :ppm=>1.91215729542523,
+      :base_name=>"020",
+      :first_scan=>3619,
+      :last_scan=>3619,
+      :charge=>3,
+      :deltacn=>1.1,
+      :base_name=>"020",
+    },
+    :params => {
+        "add_O_Ornithine"=>"0.0000", "add_F_Phenylalanine"=>"0.0000", "add_A_Alanine"=>"0.0000", "add_C_Cysteine"=>"0.0000", "add_Y_Tyrosine"=>"0.0000", "add_X_LorI"=>"0.0000", "add_J_user_amino_acid"=>"0.0000", "add_Cterm_peptide"=>"0.0000", "add_S_Serine"=>"0.0000", "add_Nterm_protein"=>"0.0000", "add_D_Aspartic_Acid"=>"0.0000", "add_Q_Glutamine"=>"0.0000", "add_K_Lysine"=>"0.0000", "add_R_Arginine"=>"0.0000", "add_W_Tryptophan"=>"0.0000", "add_Nterm_peptide"=>"0.0000", "add_H_Histidine"=>"0.0000", "add_L_Leucine"=>"0.0000", "add_I_Isoleucine"=>"0.0000", "add_N_Asparagine"=>"0.0000", "add_B_avg_NandD"=>"0.0000", "add_Z_avg_QandE"=>"0.0000", "add_E_Glutamic_Acid"=>"0.0000", "add_G_Glycine"=>"0.0000", "add_P_Proline"=>"0.0000", "add_M_Methionine"=>"0.0000", "add_Cterm_protein"=>"0.0000", "add_V_Valine"=>"0.0000", "add_T_Threonine"=>"0.0000", "add_U_user_amino_acid"=>"0.0000", "match_peak_tolerance"=>"1.0000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"15.999400 M 79.979900 STY 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\ecoli_K12_ncbi_20060321.fasta", "peptide_mass_tolerance"=>"25.0000", "digest_mass_range"=>"600.0 3500.0", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.0000", "peptide_mass_units"=>"2", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"3", "ion_cutoff_percentage"=>"0.0000", "mass_type_fragment"=>"0"
+    }
   }
-  File_35 = {}
-  File_32.each {|k,v| File_35[k] = v.dup }
-  File_35[:header].merge!( {
-    :sequest_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_sequest.log",
-    :raw_filename => "C:\\Xcalibur\\data\\john\\sash7\\7MIX_STD_110802_1.RAW",
-    :params_filename => "C:\\Xcalibur\\sequest\\john\\bioworks331\\sashimi7.params",
-    :dta_log_filename => "C:\\Xcalibur\\sequest\\7MIX_STD_110802_1_dta.log",
-    :version=>"3.5"
-  } )
-  File_35[:params].merge!( {
-  "add_O_Ornithine"=>"0.00000", "add_F_Phenylalanine"=>"0.00000", "add_A_Alanine"=>"0.00000", "add_C_Cysteine"=>"0.00000", "add_Y_Tyrosine"=>"0.00000", "add_X_LorI"=>"0.00000", "add_J_user_amino_acid"=>"0.00000", "add_Cterm_peptide"=>"0.00000", "add_S_Serine"=>"0.00000", "add_Nterm_protein"=>"0.00000", "add_D_Aspartic_Acid"=>"0.00000", "add_Q_Glutamine"=>"0.00000", "add_K_Lysine"=>"0.00000", "add_R_Arginine"=>"0.00000", "add_W_Tryptophan"=>"0.00000", "add_Nterm_peptide"=>"0.00000", "add_H_Histidine"=>"0.00000", "add_L_Leucine"=>"0.00000", "add_I_Isoleucine"=>"0.00000", "add_N_Asparagine"=>"0.00000", "add_B_avg_NandD"=>"0.00000", "add_Z_avg_QandE"=>"0.00000", "add_E_Glutamic_Acid"=>"0.00000", "add_G_Glycine"=>"0.00000", "add_P_Proline"=>"0.00000", "add_M_Methionine"=>"0.00000", "add_Cterm_protein"=>"0.00000", "add_V_Valine"=>"0.00000", "add_T_Threonine"=>"0.00000", "add_U_user_amino_acid"=>"0.00000", "match_peak_tolerance"=>"1.00000", "match_peak_allowed_error"=>"1", "normalize_xcorr"=>"0", "nucleotide_reading_frame"=>"0", "num_results"=>"250", "sequence_header_filter"=>"", "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y", "partial_sequence"=>"", "max_num_internal_cleavage_sites"=>"2", "search_engine"=>"SEQUEST", "print_duplicate_references"=>"40", "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0", "remove_precursor_peak"=>"0", "num_output_lines"=>"10", "second_database_name"=>"", "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta", "peptide_mass_tolerance"=>"1.40000", "digest_mass_range"=>"400.0000 4500.0000", "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P", "show_fragment_ions"=>"0", "protein_mass_filter"=>"0 0", "term_diff_search_options"=>"0.000000 0.000000", "num_description_lines"=>"5", "fragment_ion_tolerance"=>"1.00000", "peptide_mass_units"=>"0", "mass_type_parent"=>"0", "match_peak_count"=>"0", "max_num_differential_per_peptide"=>"1", "fragment_ion_units"=>"0", "ion_cutoff_percentage"=>"0.00000", "mass_type_fragment"=>"0"}
-  )
+  File_33 = {}
+  File_32.each do |k,v|
+    File_33[k] = v.dup
+  end
-  File_35[:out_files_first].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:11 PM,'} )
-  File_35[:out_files_last].merge!( {:computer=>'TESLA', :date_time=>'09/17/2007, 03:15 PM,'} )
-  # I'm assuming this difference is due to higher precision mass...? (not a
-  # parsing error)
-  File_35[:out_files_first_last_pep][:rsp] = 56
-  File_35[:out_files_last_last_pep][:rsp] = 125
+  ## Bioworks 3.3 (srf version 3.3)
+  File_33[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\021112-EcoliSol37-1\\020.RAW"
+  File_33[:header][:version] = "3.3"
-  Header = {
-    :db_filename => "C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
-    :ion_series => "ion series nABY ABCDVWXYZ: 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
-    :sequest_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_sequest.log",
-    :raw_filename => "C:\\Xcalibur\\data\\john\\sashimi7\\7MIX_STD_110802_1.RAW",
-    :enzyme => "Enzyme:Trypsin(KR/P) (2)",
-    :params_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\sash7.params",
-    :modifications => "",
-    :version => "3.2",
-    :dta_log_filename => "C:\\Xcalibur\\sequest\\john\\db_quest\\alone_new\\sash7\\7MIX_STD_110802_1_dta.log",
-    :model => "LCQ Deca XP",
-  }
-  ## DTA Gen
-  Dta_gen = {
-    :group_scan => 1,
-    :start_time => 1.39999997615814,
-    :start_scan => 1,
-    :num_dta_files => 6952,
-    :min_ion_threshold => 15,
-    :end_mass => 4500.0,
-    :min_group_count => 1,
-    :start_mass => 400.0,
-    :end_scan => 7161,
-  }
-  Dta_files_first = {
-    :mh => 1221.88989257812,
-    :dta_tic => 7703132.0,
-    :num_peaks => 74,
-    :charge => 1,
-    :ms_level => 2,
-    :total_num_possible_charge_states => 0,
-    :peaks => "\346\214\271C\000p|F\340\016\335C\000D\fG\022l\335C\000\3604F\020\205\337C\000D~F\260\256\340C\000\020\347E\220\023\343C\000\220&F\020R\352C\000\244\313F\246\237\353C\000\360\032E\206\223\004D\000\204\030F\260\177\005D\000\346\220F \316\005D\000`\222F<\001\006D\000\356\217Fd\213\010D\000\336\tGr\314\vD\000\034}F\262\006\rD\000\026\221F\f\202!D\000\340\274E\302u#D\000\030\036Fl\275#D\000U\035G\254~&D\200\370\022H\364\315&D\000\346bGT\365&D\000\000\000@\300s5D\000`\307ET\2008D\000\3175G\310{:D\200\307\251G\230\311:D\000`5F\000\214<D\000\000\270E\254\301<D\000\340\024FX\264=D\000\270\021F\204\204?D\000\226\006H\356\256?D\000\000\000@\300\023@D\000\005\002Gb~BD\200\256\350G\032\312BD\000zAG\034\316CD\000\350\254E8\314DD\000\270\310E\316\020ED\000\010\254E\026\005QD\000\240\267E\250tSD\000tEFB\200VD\200\342\235G\374\247VD\000$\023F\000\206XD\200K\245G\242\303XD\000\343xG\270\201YD\000\214\325F\304\365ZD\0008\225FZF[D\000\230RF\232~[D@\r\201Hl\307[D\000L\031Hv\001\\D\000\3540Fx\201^D`\222\275H\f\305^D\000wZG\006\023oD\000\360\217E\354\205oD\200\335-H\350zrD\000\224,GFXtD\000\364\223F\222\201tD\200\221\341H\024\304tD\000)\034H\314\354tD\000\000\200@\022}wD\200\001\205I\274\274wD\000\t\210H\260\344wD\000\000pA\004\370yD\000@\203Eh\272\205D\2006\214Gh\336\205D\000\026\235Fb,\210D\200\177 H\\@\210D\240,\355Il`\210D\200\022\026I\320\202\210D\000 \336Fx\227\210D\000\000\200?\334{\212D\000<\252F4>\222D\000\264\213F\302\321\223D\000H\354Ed\275\230D\000-\fHv\332\230D@\313\tH\374\367\230D\000?\aG",
-  }
-  Dta_files_last = {
-    :mh => 2604.8360326775,
-    :dta_tic => 31977.0,
-    :num_peaks => 17,
-    :charge => 3,
-    :ms_level => 2,
-    :total_num_possible_charge_states => 0,
-    :peaks => "4\n\216C\000`\305D\254\205\303C\000@;D\354\321\nD\000 \275D\232\243'D\000\020iE\350\2302D\000`\245D\f\3164D\000p@E\314JID\000\300\213D\264\002PD\000\260\016E\252\213[D\0000\eE\340NoD\000@\177D0\371xD\000@:Dd\f\205D\000\000yD\200\261\215D\000@\371D\210N\221D\000`\274D\034N\256D\000\020\032EN\372\266D\000\000\aD\356\223\322D\000\250\227E"
-  }
+  File_33[:out_files_first][:computer] = 'TESLA'
+  File_33[:out_files_first][:date_time] = '04/24/2007, 10:41 AM,'
+  File_33[:out_files_last][:computer] = 'TESLA'
+  File_33[:out_files_last][:date_time] = '04/24/2007, 10:42 AM,'
-  Out_files_first = {
-    :num_hits => 10,
-    :computer => "VELA",
-    :date_time => "11/17/2006, 04:13 PM,",
-  }
+  File_33[:out_files_first_pep][:sp] = 29.8535556793213
+  File_33[:out_files_last_pep][:sp] = 57.987476348877
+  File_33[:out_files_last_pep][:rsp] = 10
+  File_33[:out_files_last_pep][:deltacn_orig] = 0.835624694824219
-  Out_files_first_hit = {
-    :mh => 1220.5128044522,
-    :deltacn => 0.071944423019886, ## this is the modified version
-    :sp => 96.5815887451172,
-    :xcorr => 1.08377742767334,
-    :id => 224,
-    :rsp => 13,
-    :ions_matched => 8,
-    :ions_total => 20,
-    :sequence => "K.LCPHLTLLPGR.F",
-    :aaseq => "LCPHLTLLPGR",
-    :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
-    :first_scan => 2,
-    :last_scan => 2,
-    :base_name => '7MIX_STD_110802_1',
-    :charge => 1,
-  }
-  Out_files_last = {
-    :num_hits => 10,
-    :computer => "VELA",
-    :date_time => "11/17/2006, 04:25 PM," ,
-  }
-  Out_files_last_first_hit = {
-    :mh => 2605.9368784522,
-    :deltacn => 0.03921128064394,
-    :sp => 76.7447052001953,
-    :xcorr => 0.915680646896362,
-    :id => 13562,
-    :rsp => 4,
-    :ions_matched => 10,
-    :ions_total => 84,
-    :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
-    :aaseq => "HLEINPNHPIVETLRQKAETHK",
-    :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
-    :first_scan => 7161,
-    :last_scan => 7161,
-    :base_name => '7MIX_STD_110802_1',
-    :deltamass => 2605.9368784522 - 2604.8360326775,
-    :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
-    :charge => 3,
-  }
-  Out_files_last_last_hit = {
-    :mh => 2604.9025174522,
-    :deltacn => 1.1,
-    :sp => 26.1511478424072,
-    :xcorr => 0.634012818336487,
-    :id => 8105,
-    :rsp => 165,
-    :ions_matched => 6,
-    :ions_total => 84,
-    :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
-    :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
-    :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
-    :first_scan => 7161,
-    :last_scan => 7161,
-    :base_name => '7MIX_STD_110802_1',
-    :deltamass =>  2604.9025174522 - 2604.8360326775,
-    :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
-    :charge => 3,
-  }
-  Sequest_params = {
-     "add_F_Phenylalanine"=>"0.0000",
-     "add_O_Ornithine"=>"0.0000",
-     "add_Y_Tyrosine"=>"0.0000",
-     "add_C_Cysteine"=>"0.0000",
-     "add_A_Alanine"=>"0.0000",
-     "add_J_user_amino_acid"=>"0.0000",
-     "add_X_LorI"=>"0.0000",
-     "add_S_Serine"=>"0.0000",
-     "add_Cterm_peptide"=>"0.0000",
-     "add_Q_Glutamine"=>"0.0000",
-     "add_D_Aspartic_Acid"=>"0.0000",
-     "add_Nterm_protein"=>"0.0000",
-     "add_W_Tryptophan"=>"0.0000",
-     "add_R_Arginine"=>"0.0000",
-     "add_K_Lysine"=>"0.0000",
-     "add_H_Histidine"=>"0.0000",
-     "add_Nterm_peptide"=>"0.0000",
-     "add_E_Glutamic_Acid"=>"0.0000",
-     "add_Z_avg_QandE"=>"0.0000",
-     "add_B_avg_NandD"=>"0.0000",
-     "add_N_Asparagine"=>"0.0000",
-     "add_I_Isoleucine"=>"0.0000",
-     "add_L_Leucine"=>"0.0000",
-     "add_M_Methionine"=>"0.0000",
-     "add_P_Proline"=>"0.0000",
-     "add_G_Glycine"=>"0.0000",
-     "add_U_user_amino_acid"=>"0.0000",
-     "add_T_Threonine"=>"0.0000",
-     "add_V_Valine"=>"0.0000",
-     "add_Cterm_protein"=>"0.0000",
-     "match_peak_tolerance"=>"1.0000",
-     "match_peak_allowed_error"=>"1",
-     "normalize_xcorr"=>"0",
-     "nucleotide_reading_frame"=>"0",
-     "num_results"=>"250",
-     "sequence_header_filter"=>"",
-     "diff_search_options"=>"0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y",
-     "partial_sequence"=>"",
-     "max_num_internal_cleavage_sites"=>"2",
-     "search_engine"=>"SEQUEST",
-     "print_duplicate_references"=>"40",
-     "ion_series"=>"0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0",
-     "remove_precursor_peak"=>"0",
-     "num_output_lines"=>"10",
-     "second_database_name"=>"",
-     "first_database_name"=>"C:\\Xcalibur\\database\\mixed_db_human_ecoli_7prot_unique.fasta",
-     "peptide_mass_tolerance"=>"1.4000",
-     "digest_mass_range"=>"600.0 3500.0",
-     "enzyme_info"=>"Trypsin(KR/P) 1 1 KR P",
-     "show_fragment_ions"=>"0",
-     "protein_mass_filter"=>"0 0",
-     "term_diff_search_options"=>"0.000000 0.000000",
-     "num_description_lines"=>"5",
-     "fragment_ion_tolerance"=>"1.0000",
-     "peptide_mass_units"=>"0",
-     "mass_type_parent"=>"0",
-     "match_peak_count"=>"0",
-     "max_num_differential_per_peptide"=>"1",
-     "ion_cutoff_percentage"=>"0.0000",
-     "mass_type_fragment"=>"0"
-  }
+  ## Bioworks 3.3.1 (srf version 3.5)
+  File_331 = {}
+  File_33.each do |k,v|
+    File_331[k] = v.dup
+  end
+  File_331[:header][:raw_filename] = "C:\\Xcalibur\\data\\john\\opd1_2runs_2mods\\020.RAW"
+  File_331[:header][:version] = "3.5"
+  File_331[:out_files_first][:date_time] = '05/06/2008, 03:31 PM,'
+  File_331[:out_files_last][:date_time] = '05/06/2008, 03:32 PM,'
 end

data/specs/spec_id_spec.rb CHANGED Viewed

@@ -85,6 +85,7 @@ describe 'creating a list of proteins from peptides', :shared => true do
 end
 describe SpecID, 'with generic proteins' do
+  include SpecID
   before(:all) do
     @prots = (0..7).map do |n|
       SpecID::GenericProt.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -95,6 +96,7 @@ describe SpecID, 'with generic proteins' do
 end
 describe SpecID, 'with array based proteins' do
+  include SpecID
   before(:all) do
     @prots = (0..7).map do |n|
       SRF::OUT::Prot.new.set_from_hash({:reference => "prot_"+n.to_s, :peps => []})
@@ -109,6 +111,7 @@ class TrueClass ; include Boolean end
 class FalseClass; include Boolean end
 describe SpecID, 'being created' do
+  include SpecID
   it 'can be from small bioworks.xml' do
     sp = SpecID.new(Tfiles + '/bioworks_small.xml')
     sp.prots.size.should == 106

data/specs/transmem/toppred_spec.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
 require File.expand_path( File.dirname(__FILE__) + '/../transmem_spec_shared' )
 require 'transmem/toppred'
+require 'yaml'
 describe TopPred::Index do
   before(:all) do