RubyGems - mspire - Versions diffs - 0.3.1 → 0.3.9 - Mend

mspire 0.3.1 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

data/Rakefile +2 -2
data/bin/bioworks_to_pepxml.rb +15 -3
data/bin/ms_to_lmat.rb +2 -1
data/bin/sqt_group.rb +26 -0
data/changelog.txt +36 -0
data/lib/ms/msrun.rb +3 -1
data/lib/ms/parser/mzdata/dom.rb +14 -14
data/lib/ms/scan.rb +3 -3
data/lib/mspire.rb +1 -1
data/lib/sample_enzyme.rb +39 -0
data/lib/spec_id.rb +18 -0
data/lib/spec_id/aa_freqs.rb +6 -9
data/lib/spec_id/digestor.rb +16 -17
data/lib/spec_id/mass.rb +63 -1
data/lib/spec_id/parser/proph.rb +101 -2
data/lib/spec_id/precision/filter.rb +3 -2
data/lib/spec_id/precision/filter/cmdline.rb +3 -1
data/lib/spec_id/precision/filter/output.rb +1 -0
data/lib/spec_id/precision/prob.rb +88 -21
data/lib/spec_id/precision/prob/cmdline.rb +28 -16
data/lib/spec_id/precision/prob/output.rb +8 -2
data/lib/spec_id/proph/pep_summary.rb +25 -12
data/lib/spec_id/sequest.rb +28 -0
data/lib/spec_id/sequest/pepxml.rb +142 -197
data/lib/spec_id/sqt.rb +349 -0
data/lib/spec_id/srf.rb +33 -23
data/lib/validator.rb +40 -57
data/lib/validator/aa.rb +3 -90
data/lib/validator/aa_est.rb +112 -0
data/lib/validator/cmdline.rb +163 -31
data/lib/validator/decoy.rb +15 -7
data/lib/validator/digestion_based.rb +5 -4
data/lib/validator/q_value.rb +32 -0
data/script/peps_per_bin.rb +67 -0
data/script/sqt_to_meta.rb +24 -0
data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
data/specs/bin/fasta_shaker_spec.rb +2 -2
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
data/specs/bin/filter_and_validate_spec.rb +25 -6
data/specs/bin/ms_to_lmat_spec.rb +2 -2
data/specs/bin/prob_validate_spec.rb +5 -3
data/specs/sample_enzyme_spec.rb +86 -1
data/specs/spec_helper.rb +11 -9
data/specs/spec_id/bioworks_spec.rb +2 -1
data/specs/spec_id/precision/filter_spec.rb +5 -5
data/specs/spec_id/precision/prob_spec.rb +0 -67
data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
data/specs/spec_id/protein_summary_spec.rb +4 -4
data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
data/specs/spec_id/sequest_spec.rb +38 -0
data/specs/spec_id/sqt_spec.rb +111 -3
data/specs/spec_id_spec.rb +2 -0
data/specs/transmem/phobius_spec.rb +3 -1
data/specs/transmem/toppred_spec.rb +1 -1
data/specs/validator/aa_est_spec.rb +66 -0
data/specs/validator/aa_spec.rb +1 -68
data/specs/validator/background_spec.rb +2 -0
data/specs/validator/bias_spec.rb +3 -27
data/specs/validator/decoy_spec.rb +2 -2
data/specs/validator/transmem_spec.rb +2 -1
data/test_files/small.sqt +87 -0
metadata +312 -293

data/specs/spec_helper.rb CHANGED Viewed

@@ -3,12 +3,14 @@ gem 'rspec'
 # a global flag that lets me know what format we're dealing with for output
 $specdoc = false
-ObjectSpace.each_object do |obj|
-  case obj
-  when Spec::Runner::Formatter::SpecdocFormatter
-    $specdoc = true
-  end
-end
+## something changed between version 1.0.6?? and 1.1.1 in rspec so that
+#Spec::Runner is no longer an object being created...
+#ObjectSpace.each_object do |obj|
+#  case obj
+#  when Spec::Runner::Formatter::SpecdocFormatter
+#    $specdoc = true
+#  end
+#end
 # Set up some global testing variables:
 #silent {
@@ -109,9 +111,9 @@ require SPEC_DIR + '/load_bin_path'
 class String
   #alias_method :exist?, exist_as_a_file?
   #alias_method exist_as_a_file?, exist?
-  def exist?
-    File.exist? self
-  end
+  #def exist?
+  #  File.exist? self
+  #end
   def exist_as_a_file?
     File.exist? self
   end

data/specs/spec_id/bioworks_spec.rb CHANGED Viewed

@@ -37,7 +37,8 @@ describe Bioworks, 'set from an xml file' do
     tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
     bio = Bioworks.new(tf_bioworks_to_excel)
     bio.to_excel(tmpfile)
-    File.should exist(tmpfile)
+    tmpfile.exist_as_a_file?.should be_true
+    #File.should exist_as_a_file(tmpfile)
     exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
     act = _arr_of_arrs(tmpfile)
     exp.each_index do |i|

data/specs/spec_id/precision/filter_spec.rb CHANGED Viewed

@@ -103,7 +103,7 @@ describe 'filtering on small bioworks file with inverse prots' do
     @regexp = /^INV_/o
     @file = Tfiles + '/bioworks_with_INV_small.xml'
     @spec_id = SpecID.new(@file)
-    vals = [Validator::Decoy.new(@regexp)]
+    vals = [Validator::Decoy.new(:constraint => @regexp)]
     @opts = {:sequest => {:xcorr1 => 1.0, :xcorr2 => 1.0, :xcorr3 => 1.0, :deltacn => 0.1, :ppm => 1000.0, :include_deltacnstar=> false}, :validators => vals}
   end
@@ -121,7 +121,7 @@ describe 'filtering on small bioworks file with inverse prots' do
     # this does a minimal test to see if this functions properly
     # (not for accuracy, which is done in validator_spec)
     ## WITH FASTA FILE:
-    val1 = Validator::AA.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta'))
+    val1 = Validator::AAEst.new('C').set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
     @opts[:validators] << val1   # obviously this guy is not his
     ans1 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
     peps = ans1[:pephits]
@@ -131,7 +131,7 @@ describe 'filtering on small bioworks file with inverse prots' do
     ## WITH A CYSTEINE BACKGROUND:
     background_cys = 0.0172
-    val3 = Validator::AA.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta'))
+    val3 = Validator::AAEst.new('C', :background => background_cys).set_frequency(Fasta.new(Tfiles + '/small.fasta').prots)
     @opts[:validators][1] = val3
     ans3 = SpecID::Precision::Filter.new.filter_and_validate(@spec_id, @opts)
     peps = ans3[:pephits]
@@ -156,8 +156,8 @@ describe 'filtering on a real srf file' do
       regexp = /FAKINGIT_OUT/
       opts[:decoy] = regexp
-      decoy_val = Validator::Decoy.new(regexp) # this is not real, just to test
-      cys_val = Validator::AA.new('C').set_frequency(Fasta.new(fasta_file))
+      decoy_val = Validator::Decoy.new(:constraint => regexp) # this is not real, just to test
+      cys_val = Validator::AAEst.new('C').set_frequency(Fasta.new(fasta_file).prots)
       tmm_val = Validator::Transmem::Protein.new(tmm_file, :min_num_tms => 1, :soluble_fraction => true, :correct_wins => true, :no_include_tm_peps => false, :background => 0.0).set_false_to_total_ratio( Digestor.digest( Fasta.new(fasta_file), Sequest::Params.new(sequest_file) ) )
       opts[:validators] = [decoy_val, cys_val, tmm_val]
       ans = SpecID::Precision::Filter.new.filter_and_validate(spec_id, opts)

data/specs/spec_id/precision/prob_spec.rb CHANGED Viewed

@@ -41,71 +41,4 @@ describe 'finding precision Proph::Prot::Pep objects' do
 end
-=begin
-  it 'gets precision with all validators (including probability and decoy)' do
-    ## create some decoy peptides!
-    @spec_id.peps.sort_by {|pep| pep.probability }[100..-1].each_with_index do |pep,i|
-      if i % 3 == 0
-        pep.prots.each {|prot| prot.protein_name = 'DECOY_' + prot.protein_name }
-      end
-    end
-    # check which ones are ACTUALLY normal and decoy
-    (decoy, normal) = @spec_id.peps.partition do |pep|
-      pep.prots.all? {|prot| prot.protein_name =~ /^DECOY_/}
-    end
-    num_decoy = decoy.size
-    num_normal = normal.size
-  end
-prob_spec_helper = File.expand_path( File.dirname(__FILE__) + '/prob_spec_helper' )
-# this does a minimal test to see if this functions properly
-# (not for accuracy, which is done in validator_spec)
-## WITH FASTA FILE:
-base_dir = Tfiles_l + '/opd1_2runs_2mods/sequest'
-fasta_file = base_dir + '/ecoli_K12_ncbi_20060321.fasta'
-params_file = base_dir + '/ecoli.params'
-bias_file = base_dir + '/ecoli_K12_ncbi_20060321.bias.fasta'
-toppred_file = base_dir + '/ecoli_K12_ncbi_20060321.toppred.xml'
-fasta_file.should exist
-prob = Validator::Probability.new
-badaa_freq = Validator::AA.new('C', :frequency => 0.0115866200193321)
-badaa_dig = Validator::AA.new('C')
-bias = Validator::Bias.new(Fasta.new(bias_file))
-transmem = Validator::Transmem::Protein.new(toppred_file)
-decoy = Validator::Decoy.new(/^DECOY_/)
-turn_on_digestion = true
-if turn_on_digestion
-  # digestion based validators need this set!
-  digested_peps = Digestor.digest(Fasta.new(fasta_file), Sequest::Params.new(params_file))
-  [badaa_dig, bias, transmem].each do |val|
-    val.set_false_to_total_ratio(digested_peps)
-  end
-end
-transmem.transmem_status_hash = transmem.create_transmem_status_hash(@spec_id.peps)
-val_list = [decoy, badaa_freq, badaa_dig, prob, bias, transmem]
-opts = { :validators => val_list }
-hash = SpecID::Precision::Prob.new.precision_vs_num_hits(@spec_id, opts)
-#puts "OUTPUT: "
-#puts hash.to_yaml
-# frozen
-e_hash = ProbMSHelper::Answer2
-# hash[:pephits_precision].size.should == e_hash[:pephits_precision].size
-# other data types are tested above, just testing validators
-hash[:pephits_precision].zip( e_hash[:pephits_precision] ) do |val_hash, val_hash_e|
-  val_hash[:values].size.should == num_normal
-  #val_hash[:validator].should == val_hash_e[:validator]
-  val_hash[:values].zip(val_hash_e[:values]) {|v,e| v.should be_close(e, 0.000000001)}
-end
-    end
-end
-=end

data/specs/spec_id/proph/pep_summary_spec.rb CHANGED Viewed

@@ -13,7 +13,7 @@ ToCheck = {
 }
-describe Proph::PepSummary, "reading a .xml file" do
+describe Proph::PepSummary, "reading a small .xml file" do
   before(:each) do
     file = Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml'
     @obj = Proph::PepSummary.new(file)
@@ -23,15 +23,25 @@ describe Proph::PepSummary, "reading a .xml file" do
     lambda { Proph::PepSummary.new(Tfiles + '/opd1/000.tpp_2.9.2.first10.xml')}.should raise_error(ArgumentError)
   end
-  it 'has spectrum queries' do
-    @obj.spectrum_queries.size.should == 18
+  it 'has msms_run_summary objects with spectrum_queries' do
+    @obj.msms_run_summaries.size.should == 1
+    sqs = @obj.msms_run_summaries.first.spectrum_queries
+    sqs.size.should == 18
     [:first, :last].each do |mth|
       ToCheck[:spectrum_query][mth].each do |k,v|
-        @obj.spectrum_queries.send(mth).send(k).should == v
+        if v.is_a? Float
+          sqs.send(mth).send(k).should be_close(v, 0.0000000001)
+        else
+          sqs.send(mth).send(k).should == v
+        end
       end
       ToCheck[:search_hit][mth].each do |k,v|
-        @obj.spectrum_queries.send(mth).search_results.first.search_hits.first.send(k).should == v
+        if v.is_a? Float
+          sqs.send(mth).search_results.first.search_hits.first.send(k).should be_close(v, 0.0000000001)
+        else
+          sqs.send(mth).search_results.first.search_hits.first.send(k).should == v
+        end
       end
     end
   end
@@ -44,7 +54,11 @@ describe Proph::PepSummary, "reading a .xml file" do
     [:first, :last].each do |mth|
       ToCheck[:search_hit][mth].each do |k,v|
-        @obj.peps.send(mth).send(k).should == v
+        if v.is_a? Float
+          @obj.peps.send(mth).send(k).should be_close(v, 0.0000000001)
+        else
+          @obj.peps.send(mth).send(k).should == v
+        end
       end
     end
@@ -52,92 +66,33 @@ describe Proph::PepSummary, "reading a .xml file" do
 end
-####################################################
-# OTHER TESTS NOT IMPLEMENTED (do we need these??)
-####################################################
-=begin
-require 'test/unit'
-require 'spec_id'
-require 'ms/scan'
-class ProphTest < Test::Unit::TestCase
-  def initialize(arg)
-    super(arg)
-    @tfiles = File.dirname(__FILE__) + '/tfiles/'
-    @pepproph_xml = @tfiles + 'pepproph_small.xml'
-  end
-  def Xtest_filter_by_min_pep_prob
-    obj = Proph::Pep::Parser.new
-    new_file = "tfiles/tmp.xml"
-    assert_match(/peptideprophet_result probability="0.[0-5]/, IO.read(@pepproph_xml))
-    obj.filter_by_min_pep_prob(@pepproph_xml, new_file, 0.50)
-    assert_no_match(/peptideprophet_result probability="0.[0-5]/, IO.read(new_file))
-    assert_match(/<peptideprophet_result[^>]*probability="0.[6-9][^>]*>/, IO.read(new_file))
-    File.unlink new_file
-  end
-  def Xtest_uniq_by_seqcharge
-    cls = Proph::Pep
-    p1 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
-    p2 = cls.new({ :charge => '3', :sequence => 'PEPTIDE' })
-    p3 = cls.new({ :charge => '2', :sequence => 'PEPTIDE' })
-    p4 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
-    p5 = cls.new({ :charge => '2', :sequence => 'APEPTIDE' })
-    un_peps = cls.uniq_by_seqcharge([p1,p2,p3,p4,p5])
-    ## WHY ISn't that working? below!
-    ##assert_equal([p1,p2,p4].to_set, un_peps.to_set)
-    assert(equal_sets([p1,p2,p4], un_peps))
-  end
-  def Xequal_sets(arr1, arr2)
-    c1 = arr1.dup
-    c2 = arr2.dup
-    arr1.each do |c|
-      arr2.each do |d|
-        if c == d
-          c1.delete c
-          c2.delete d
-        end
-      end
+describe Proph::PepSummary, 'reading a large .xml file' do
+  spec_large do
+    before(:all) do
+      file = Tfiles_l + '/opd1_2runs_2mods/prophet/interact-opd1_mods.xml'
+      @obj = Proph::PepSummary.new(file)
     end
-    if (c1.size == c2.size) && (c1.size == 0)
-      true
-    else
-      false
+    it 'has peps of class Proph::PepSummary::Pep' do
+      @obj.peps.first.class.to_s.should == 'Proph::PepSummary::Pep'
+      @obj.peps.size.should == 1643
     end
-  end
-  def Xtest_arithmetic_avg_scan_by_parent_time
-    i1 = 100015.0
-    i2 = 30000.0
-    i3 = 100.0
-    t1 = 0.13
-    t2 = 0.23
-    t3 = 0.33
-    p1 = MS::Scan.new(1,1, t1)
-    p2 = MS::Scan.new(2,1, t2)
-    p3 = MS::Scan.new(3,1, t3)
-    s1 = MS::Scan.new(1,2,0.10, 300.2, i1, p1)
-    s2 = MS::Scan.new(2,2,0.20, 301.1, i2, p2)
-    s3 = MS::Scan.new(3,2,0.30, 302.0, i3, p3)
-    scan = Proph::Pep.new({:scans => [s1,s2,s3]}).arithmetic_avg_scan_by_parent_time
-    tot_inten = i1 + i2 + i3
-    tm = ( t1 * (i1/tot_inten) + t2 * (i2/tot_inten) + t3 * (i3/tot_inten) )
-    {:ms_level => 2, :prec_inten => 130115.0/3, :num => nil, :prec_mz => 301.1.to_f, :time => tm }.each do |k,v|
-      if k == :prec_mz  # not sure why this is bugging out, but..
-        assert_equal(v.to_s, scan.send(k).to_s)
-      else
-        assert_equal(v, scan.send(k))
-      end
+    it 'contains peps that respond_to :aaseq' do
+      @obj.peps.first.should respond_to(:aaseq)
     end
+    it 'has prots (also callable from peps)' do
+      (@obj.prots.size > 0).should be_true
+      @obj.peps.all? {|v| v.prots.size > 0 }.should be_true
+      peps_with_prots = @obj.peps.select {|v| v.prots.size > 1 }
+      # frozen:
+      peps_with_prots.first.prots.size.should == 3
+      peps_with_prots.first.prots.first.name.should == "gi|16128676|ref|NP_415229.1|"
+      peps_with_prots.first.prots.first.protein_descr.should == "RhsC protein in RhsC element [Escherichia coli K12]"
+      peps_with_prots.first.prots.first.reference.should == "gi|16128676|ref|NP_415229.1| RhsC protein in RhsC element [Escherichia coli K12]"
+      peps_with_prots.first.prots.last.protein_descr.should == "RhsA protein in RhsA element [Escherichia coli K12]"
+    end
   end
 end
-=end

data/specs/spec_id/protein_summary_spec.rb CHANGED Viewed

@@ -18,7 +18,7 @@ xdescribe ProteinSummary do
   spec_large do
     it 'does basic summary on prophet file' do
       runit "-c 5.0 #{@tf_proph}"
-      @tf_summary.should exist
+      @tf_summary.exist_as_a_file?.should be_true
       string = IO.read(@tf_summary)
       string.should =~ /gi\|16132176\|ref\|NP_418775\.1\|/
       string.should =~ /16132176/
@@ -28,7 +28,7 @@ xdescribe ProteinSummary do
   it 'does basic summary on bioworks.xml file' do
     runit "#{@tf_bioworks_small}"
-    @tf_bioworks_small_summary_html.should exist
+    @tf_bioworks_small_summary_html.exist_as_a_file?.should be_true
     File.unlink @tf_bioworks_small_summary_html unless @no_delete
     # @TODO: need to freeze the output here
   end
@@ -40,7 +40,7 @@ xdescribe ProteinSummary do
     runit "#{@tf_bioworks_small} --precision"
     IO.read(@tf_bioworks_small_summary_html).should =~ /# hits.*106/m
     # should add more tests here...
-    @tf_bioworks_small_summary_html.should exist
+    @tf_bioworks_small_summary_html.exist_as_a_file?.should be_true
     File.unlink @tf_bioworks_small_summary_html unless @no_delete
   end
@@ -59,7 +59,7 @@ xdescribe ProteinSummary do
   spec_large do
     it 'gives correct peptide counts' do
       runit "-c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}"
-      @tf_peptide_count.should exist
+      @tf_peptide_count.exist_as_a_file?.should be_true
       file = IO.read(@tf_peptide_count)
       file.should include("gi|16132176|ref|NP_418775.1|\t2")
       file.should include("gi|16131996|ref|NP_418595.1|\t1")

data/specs/spec_id/sequest/pepxml_spec.rb CHANGED Viewed

@@ -8,84 +8,6 @@ require 'spec_id/sequest/pepxml'
 NODELETE = false
-describe Sequest::PepXML::SearchHit, 'making enzyme calculations on sequences' do
-  before(:each) do
-    @tf_params_fullKRP = Tfiles + "/bioworks32.params"
-    # The enzyme is: 1 KR P
-    @tf_params_justKR = Tfiles + "/bioworks33.params"
-  end
-  it 'calculates the number of tolerant termini' do
-    exp = [{
-      # full KR/P
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 1,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    },
-    {
-      # just KR
-      'K.EPTIDR.E' => 2,
-      'K.PEPTIDR.E' => 2,
-      'F.EEPTIDR.E' => 1,
-      'F.PEPTIDW.R' => 0,
-    }
-    ]
-    scall = Sequest::PepXML::SearchHit
-    sym = :calc_num_tol_term
-    params_ar = [Sequest::Params.new(@tf_params_fullKRP), Sequest::Params.new(@tf_params_justKR)]
-    params_ar.zip(exp) do |params,hash|
-      hash.each do |seq, val|
-        scall.send(sym, params, seq).should == val
-      end
-    end
-  end
-  it 'calculates number of missed cleavages' do
-    exp = [{
-    "K.EPTIDR.E" => 0,
-    "K.PEPTIDR.E" => 0,
-    "F.EEPTIDR.E" => 0,
-    "F.PEPTIDW.R" => 0,
-    "F.PERPTIDW.R" => 0,
-    "F.PEPKPTIDW.R" => 0,
-    "F.PEPKTIDW.R" => 1,
-    "K.RTTIDR.E" => 1,
-    "K.RTTIKK.E" => 2,
-    "F.PKEPRTIDW.R" => 2,
-    "F.PKEPRTIDKP.R" => 2,
-    "F.PKEPRAALKPEERPTIDKW.R" => 3,
-    },
-    {
-    "K.EPTIDR.E" => 0,
-    "K.PEPTIDR.E" => 0,
-    "F.EEPTIDR.E" => 0,
-    "F.PEPTIDW.R" => 0,
-    "F.PERPTIDW.R" => 1,
-    "F.PEPKPTIDW.R" => 1,
-    "F.PEPKTIDW.R" => 1,
-    "K.RTTIDR.E" => 1,
-    "K.RTTIKK.E" => 2,
-    "F.PKEPRTIDW.R" => 2,
-    "F.PKEPRTIDKP.R" => 3,
-    "F.PKEPRAALKPEERPTIDKW.R" => 5,
-    }
-    ]
-    params_ar = [Sequest::Params.new(@tf_params_fullKRP), Sequest::Params.new(@tf_params_justKR)]
-    scall = Sequest::PepXML::SearchHit
-    sym = :calc_num_missed_cleavages
-    #params_ar[1] = params_ar[0]
-    params_ar.zip(exp) do |params, hash|
-      hash.each do |seq, val|
-        scall.send(sym, params, seq).should == val
-      end
-    end
-  end
-end
 describe Sequest::PepXML, " created from small bioworks.xml" do
   spec_large do
@@ -423,7 +345,7 @@ describe 'bioworks file with modifications transformed into pepxml' do
     it 'gets modifications right in real run' do
       @out_files.each do |fn|
-        fn.should exist
+        fn.exist_as_a_file?.should be_true
         beginning = IO.read(fn)
         lines = beginning.split("\n")
         [