RubyGems - mspire - Versions diffs - 0.4.4 → 0.4.5 - Mend

mspire 0.4.4 → 0.4.5

Files changed (9) hide show

data/changelog.txt +5 -0
data/lib/spec_id/precision/filter/cmdline.rb +0 -1
data/lib/spec_id/precision/filter.rb +12 -10
data/lib/spec_id/precision/prob.rb +16 -5
data/lib/validator/cmdline.rb +4 -7
data/lib/validator/decoy.rb +10 -6
data/lib/validator.rb +3 -3
data/specs/spec_id/srf_spec.rb +2 -2
metadata +2 -2

data/changelog.txt CHANGED Viewed

@@ -208,3 +208,8 @@ probabilities into q-values
 6. filter_validate.rb implements a p value method using xcorr values, however,
 this is not very effective since xcorr values underrepresent the the
 difference between good hits and bad hits
+## version 0.4.5
+1. using pi_zero instead of decoy_to_target_ratio.  While all tests are
+passing, this release should be considered experimental with the use of any
+target-decoy validation.

data/lib/spec_id/precision/filter/cmdline.rb CHANGED Viewed

@@ -128,7 +128,6 @@ module SpecID
             op.separator ""
             op.val_opt(:decoy, opts)
-            op.exact_opt(opts, :decoy_pi_zero)
             op.val_opt(:digestion, opts)
             op.val_opt(:bias, opts)
             op.val_opt(:bad_aa, opts)

data/lib/spec_id/precision/filter.rb CHANGED Viewed

@@ -310,16 +310,18 @@ class SpecID::Precision::Filter
         [peps]  # no decoy
       end
-    if opts[:decoy_pi_zero]
-      if pep_sets.size < 2
-        raise ArgumentError, "must have a decoy validator for pi zero calculation!"
-      end
-      require 'pi_zero'
-      (_target, _decoy) = pep_sets
-      pvals = PiZero.p_values_for_sequest(*pep_sets).sort
-      pi_zero = PiZero.pi_zero(pvals)
-      opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
-    end
+    # This method doesn't seem to do so well, but a person can use a different
+    # one and enter in their own custom pi_0 value!
+    #if opts[:decoy_pi_zero]
+    #  if pep_sets.size < 2
+    #    raise ArgumentError, "must have a decoy validator for pi zero calculation!"
+    #  end
+    #  require 'pi_zero'
+    #  (_target, _decoy) = pep_sets
+    #  pvals = PiZero.p_values_for_sequest(*pep_sets).sort
+    #  pi_zero = PiZero.pi_zero(pvals)
+    #  opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
+    #end
     if opts[:proteins]
       protein_validator = Validator::ProtFromPep.new

data/lib/spec_id/precision/prob.rb CHANGED Viewed

@@ -31,12 +31,22 @@ class SpecID::Precision::Prob
     end
   end
+  # this is the way I was doing it:
+  #       ajdusted = (1+R)*prec / (R*precision +1)
+  #       # where R is the decoy_to_target ratio
   # opts may include:
   #   :proteins => true|*false
   #   :validators => array of Validator objects
-  #   adjusts the precision in the *probability* validators ajdusted =
-  #   (1+R)*prec / (R*precision +1) where R is the decoy_to_target ratio
-  #   used in the decoy validator (R = 0.0 if no decoy validator)
+  #
+  #   This method will adjust the precision in the *probability* validators
+  #   used in the decoy validator (both terms with pi_0 in the denominator go
+  #   to zero if there is no decoy validator and the precision is not
+  #   adjusted)
+  #
+  #       ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
+  #       # where pi_0 is the ratio incorrect target hits to total decoy hits
+  #
   #   NOTE: if you have decoy data, you MUST pass in a decoy validator for the
   #   decoy pephits to be removed from other validator analyses!
   #
@@ -82,7 +92,7 @@ class SpecID::Precision::Prob
     else
       decoy_val = decoy_vals.first
       if decoy_val
-        decoy_to_target_ratio = decoy_val.decoy_to_target_ratio
+        pi_zero = decoy_val.pi_zero
       end
     end
@@ -167,7 +177,8 @@ class SpecID::Precision::Prob
         val_hash[decoy_val].push(decoy_precision) if decoy_val
         probability_validators.zip(last_prob_values) do |val,prec|
           if decoy_val
-            val_hash[val].push( ((decoy_to_target_ratio+1.0)*prec) / ((decoy_to_target_ratio*prec) + 1.0) )
+            raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
+            val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
           else
             val_hash[val] << prec
           end

data/lib/validator/cmdline.rb CHANGED Viewed

@@ -41,7 +41,7 @@ class Validator::Cmdline
     {
       :hits_together => true,
       :decoy_on_match => true,
-      :decoy_to_target_ratio => 1.0,
+      :pi_zero => 1.0,
     },
     :bad_aa =>
     {
@@ -61,7 +61,7 @@ class Validator::Cmdline
     :ties => true,
   }
   COMMAND_LINE = {
-    :decoy => ["--decoy /REGEXP/|FILENAME[,DTR,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
+    :decoy => ["--decoy /REGEXP/|FILENAME[,PI0,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
                                                 "FILENAME of separate search on decoys.",
                                                 "All regular expressions must be surrounded by '/'",
                                                 "(no extended options [trailing modifiers]).",
@@ -72,11 +72,8 @@ class Validator::Cmdline
                                                 "    --decoy '/^\\s*REVERSE/'",
                                                 "If decoys proteins were searched in a separate file,",
                                                 "then give the FILENAME (e.g., --decoy decoy.srg)",
-                                                "DTR = Decoy to Target Ratio (default: #{DEFAULTS[:decoy][:decoy_to_target_ratio]})",
+                                                "PI0 = Incorrect Targets to Decoy Ratio (default: #{DEFAULTS[:decoy][:pi_zero]})",
                                                 "DOM = *true/false, decoy on match",],
-      :decoy_pi_zero => ["--decoy_pi_zero", "uses sequest Xcorrs to estimate the",
-                                            "percentage of incorrect target hits.",
-                                            "This over-rides any given DTR (above)"],
         :tps => ["--tps <fasta>", "for a completely defined sample, this is the",
                                   "fasta file containing the true protein hits"],
          # may require digestion:
@@ -159,7 +156,7 @@ class Validator::Cmdline
             raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
             first_arg
           end
-        val_opts[:decoy_to_target_ratio] = (ar[1] || DEFAULTS[:decoy][:decoy_to_target_ratio]).to_f
+        val_opts[:pi_zero] = (ar[1] || DEFAULTS[:decoy][:pi_zero]).to_f
         val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
         myargs.push(val_opts)
         opts[:validators].push(myargs)

data/lib/validator/decoy.rb CHANGED Viewed

@@ -8,7 +8,11 @@ class Validator::Decoy < Validator
   attr_accessor :decoy_on_match
   attr_accessor :correct_wins
-  attr_accessor :decoy_to_target_ratio
+  # This is the the number of incorrect target hits over the total decoy hits
+  # The very rough, conservative ballpark estimate is the ratio of target hits
+  # to decoy hits.  This can be refined by removing the number of true target
+  # hits from the targets used to calculate it.
+  attr_accessor :pi_zero
   attr_accessor :last_pep_was_decoy
@@ -21,12 +25,12 @@ class Validator::Decoy < Validator
   DEFAULTS = {
     :decoy_on_match => true,
     :correct_wins => true,
-    :decoy_to_target_ratio => 1.0,
+    :pi_zero => 1.0,
   }
   def initialize(opts={})
     merged = DEFAULTS.merge(opts)
-    @constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
+    @constraint, @decoy_on_match, @correct_wins, @pi_zero = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :pi_zero)
   end
   # returns [normal, decoy] (?? I think ??)
@@ -82,15 +86,15 @@ class Validator::Decoy < Validator
     @normal_peps_just_submitted = normal
     @increment_normal += normal.size
     @increment_decoy += decoy.size
-    calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
+    calc_precision(@increment_normal, @increment_decoy, @pi_zero)
   end
   def pephit_precision(peps, separate_peps=nil)
     if separate_peps
-      calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
+      calc_precision(peps.size, separate_peps.size, @pi_zero)
     else
       (norm, decoy) = partition(peps)
-      calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
+      calc_precision(norm.size, decoy.size, @pi_zero)
     end
   end

data/lib/validator.rb CHANGED Viewed

@@ -121,7 +121,7 @@ class Validator
           hash[cat.to_sym] = val.send(cat.to_sym)
         end
       when Validator::Decoy
-        %w(decoy_to_target_ratio correct_wins decoy_on_match).each do |cat|
+        %w(pi_zero correct_wins decoy_on_match).each do |cat|
           hash[cat.to_sym] = val.send(cat.to_sym)
         end
         hash[:constraint] = val.constraint.inspect if val.constraint
@@ -167,11 +167,11 @@ end
 # normal hits (which may be true or false) and the second are decoy hits.
 # edge case:  if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
 module Precision::Calculator::Decoy
-  def calc_precision(num_normal, num_decoy, decoy_to_target_ratio=1.0)
+  def calc_precision(num_normal, num_decoy, pi_zero=1.0)
     # will calculate as floats in case fractional amounts passed in for
     # whatever reason
     num_normal_f = num_normal.to_f
-    num_true_pos = num_normal_f - (num_decoy.to_f / decoy_to_target_ratio)
+    num_true_pos = num_normal_f - (num_decoy.to_f * pi_zero)
     precision =
       if num_normal_f == 0.0
         if num_decoy.to_f > 0.0

data/specs/spec_id/srf_spec.rb CHANGED Viewed

@@ -162,8 +162,8 @@ describe SRF, 'creating dta files' do
       File.directory?('020').should be_true
       File.exist?('020/020.3366.3366.2.dta').should be_true
       lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
-      lines.first.should == "1113.10649290125 2\r\n"
-      lines[1].should == "164.56591796875 4817.0\r\n"
+      lines.first.should == "1113.106493 2\r\n"
+      lines[1].should == "164.5659 4817\r\n"
       FileUtils.rm_rf '020'
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mspire
 version: !ruby/object:Gem::Version
-  version: 0.4.4
+  version: 0.4.5
 platform: ruby
 authors:
 - John Prince
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-09-24 00:00:00 -06:00
+date: 2008-09-25 00:00:00 -06:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency