mspire 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
data/changelog.txt CHANGED
@@ -208,3 +208,8 @@ probabilities into q-values
208
208
  6. filter_validate.rb implements a p value method using xcorr values, however,
209
209
  this is not very effective since xcorr values underrepresent the the
210
210
  difference between good hits and bad hits
211
+
212
+ ## version 0.4.5
213
+ 1. using pi_zero instead of decoy_to_target_ratio. While all tests are
214
+ passing, this release should be considered experimental with the use of any
215
+ target-decoy validation.
@@ -128,7 +128,6 @@ module SpecID
128
128
  op.separator ""
129
129
 
130
130
  op.val_opt(:decoy, opts)
131
- op.exact_opt(opts, :decoy_pi_zero)
132
131
  op.val_opt(:digestion, opts)
133
132
  op.val_opt(:bias, opts)
134
133
  op.val_opt(:bad_aa, opts)
@@ -310,16 +310,18 @@ class SpecID::Precision::Filter
310
310
  [peps] # no decoy
311
311
  end
312
312
 
313
- if opts[:decoy_pi_zero]
314
- if pep_sets.size < 2
315
- raise ArgumentError, "must have a decoy validator for pi zero calculation!"
316
- end
317
- require 'pi_zero'
318
- (_target, _decoy) = pep_sets
319
- pvals = PiZero.p_values_for_sequest(*pep_sets).sort
320
- pi_zero = PiZero.pi_zero(pvals)
321
- opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
322
- end
313
+ # This method doesn't seem to do so well, but a person can use a different
314
+ # one and enter in their own custom pi_0 value!
315
+ #if opts[:decoy_pi_zero]
316
+ # if pep_sets.size < 2
317
+ # raise ArgumentError, "must have a decoy validator for pi zero calculation!"
318
+ # end
319
+ # require 'pi_zero'
320
+ # (_target, _decoy) = pep_sets
321
+ # pvals = PiZero.p_values_for_sequest(*pep_sets).sort
322
+ # pi_zero = PiZero.pi_zero(pvals)
323
+ # opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
324
+ #end
323
325
 
324
326
  if opts[:proteins]
325
327
  protein_validator = Validator::ProtFromPep.new
@@ -31,12 +31,22 @@ class SpecID::Precision::Prob
31
31
  end
32
32
  end
33
33
 
34
+ # this is the way I was doing it:
35
+ # ajdusted = (1+R)*prec / (R*precision +1)
36
+ # # where R is the decoy_to_target ratio
37
+
34
38
  # opts may include:
35
39
  # :proteins => true|*false
36
40
  # :validators => array of Validator objects
37
- # adjusts the precision in the *probability* validators ajdusted =
38
- # (1+R)*prec / (R*precision +1) where R is the decoy_to_target ratio
39
- # used in the decoy validator (R = 0.0 if no decoy validator)
41
+ #
42
+ # This method will adjust the precision in the *probability* validators
43
+ # used in the decoy validator (both terms with pi_0 in the denominator go
44
+ # to zero if there is no decoy validator and the precision is not
45
+ # adjusted)
46
+ #
47
+ # ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
48
+ # # where pi_0 is the ratio incorrect target hits to total decoy hits
49
+ #
40
50
  # NOTE: if you have decoy data, you MUST pass in a decoy validator for the
41
51
  # decoy pephits to be removed from other validator analyses!
42
52
  #
@@ -82,7 +92,7 @@ class SpecID::Precision::Prob
82
92
  else
83
93
  decoy_val = decoy_vals.first
84
94
  if decoy_val
85
- decoy_to_target_ratio = decoy_val.decoy_to_target_ratio
95
+ pi_zero = decoy_val.pi_zero
86
96
  end
87
97
  end
88
98
 
@@ -167,7 +177,8 @@ class SpecID::Precision::Prob
167
177
  val_hash[decoy_val].push(decoy_precision) if decoy_val
168
178
  probability_validators.zip(last_prob_values) do |val,prec|
169
179
  if decoy_val
170
- val_hash[val].push( ((decoy_to_target_ratio+1.0)*prec) / ((decoy_to_target_ratio*prec) + 1.0) )
180
+ raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
181
+ val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
171
182
  else
172
183
  val_hash[val] << prec
173
184
  end
@@ -41,7 +41,7 @@ class Validator::Cmdline
41
41
  {
42
42
  :hits_together => true,
43
43
  :decoy_on_match => true,
44
- :decoy_to_target_ratio => 1.0,
44
+ :pi_zero => 1.0,
45
45
  },
46
46
  :bad_aa =>
47
47
  {
@@ -61,7 +61,7 @@ class Validator::Cmdline
61
61
  :ties => true,
62
62
  }
63
63
  COMMAND_LINE = {
64
- :decoy => ["--decoy /REGEXP/|FILENAME[,DTR,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
64
+ :decoy => ["--decoy /REGEXP/|FILENAME[,PI0,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
65
65
  "FILENAME of separate search on decoys.",
66
66
  "All regular expressions must be surrounded by '/'",
67
67
  "(no extended options [trailing modifiers]).",
@@ -72,11 +72,8 @@ class Validator::Cmdline
72
72
  " --decoy '/^\\s*REVERSE/'",
73
73
  "If decoys proteins were searched in a separate file,",
74
74
  "then give the FILENAME (e.g., --decoy decoy.srg)",
75
- "DTR = Decoy to Target Ratio (default: #{DEFAULTS[:decoy][:decoy_to_target_ratio]})",
75
+ "PI0 = Incorrect Targets to Decoy Ratio (default: #{DEFAULTS[:decoy][:pi_zero]})",
76
76
  "DOM = *true/false, decoy on match",],
77
- :decoy_pi_zero => ["--decoy_pi_zero", "uses sequest Xcorrs to estimate the",
78
- "percentage of incorrect target hits.",
79
- "This over-rides any given DTR (above)"],
80
77
  :tps => ["--tps <fasta>", "for a completely defined sample, this is the",
81
78
  "fasta file containing the true protein hits"],
82
79
  # may require digestion:
@@ -159,7 +156,7 @@ class Validator::Cmdline
159
156
  raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
160
157
  first_arg
161
158
  end
162
- val_opts[:decoy_to_target_ratio] = (ar[1] || DEFAULTS[:decoy][:decoy_to_target_ratio]).to_f
159
+ val_opts[:pi_zero] = (ar[1] || DEFAULTS[:decoy][:pi_zero]).to_f
163
160
  val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
164
161
  myargs.push(val_opts)
165
162
  opts[:validators].push(myargs)
@@ -8,7 +8,11 @@ class Validator::Decoy < Validator
8
8
 
9
9
  attr_accessor :decoy_on_match
10
10
  attr_accessor :correct_wins
11
- attr_accessor :decoy_to_target_ratio
11
+ # This is the the number of incorrect target hits over the total decoy hits
12
+ # The very rough, conservative ballpark estimate is the ratio of target hits
13
+ # to decoy hits. This can be refined by removing the number of true target
14
+ # hits from the targets used to calculate it.
15
+ attr_accessor :pi_zero
12
16
 
13
17
  attr_accessor :last_pep_was_decoy
14
18
 
@@ -21,12 +25,12 @@ class Validator::Decoy < Validator
21
25
  DEFAULTS = {
22
26
  :decoy_on_match => true,
23
27
  :correct_wins => true,
24
- :decoy_to_target_ratio => 1.0,
28
+ :pi_zero => 1.0,
25
29
  }
26
30
 
27
31
  def initialize(opts={})
28
32
  merged = DEFAULTS.merge(opts)
29
- @constraint, @decoy_on_match, @correct_wins, @decoy_to_target_ratio = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :decoy_to_target_ratio)
33
+ @constraint, @decoy_on_match, @correct_wins, @pi_zero = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :pi_zero)
30
34
  end
31
35
 
32
36
  # returns [normal, decoy] (?? I think ??)
@@ -82,15 +86,15 @@ class Validator::Decoy < Validator
82
86
  @normal_peps_just_submitted = normal
83
87
  @increment_normal += normal.size
84
88
  @increment_decoy += decoy.size
85
- calc_precision(@increment_normal, @increment_decoy, @decoy_to_target_ratio)
89
+ calc_precision(@increment_normal, @increment_decoy, @pi_zero)
86
90
  end
87
91
 
88
92
  def pephit_precision(peps, separate_peps=nil)
89
93
  if separate_peps
90
- calc_precision(peps.size, separate_peps.size, @decoy_to_target_ratio)
94
+ calc_precision(peps.size, separate_peps.size, @pi_zero)
91
95
  else
92
96
  (norm, decoy) = partition(peps)
93
- calc_precision(norm.size, decoy.size, @decoy_to_target_ratio)
97
+ calc_precision(norm.size, decoy.size, @pi_zero)
94
98
  end
95
99
  end
96
100
 
data/lib/validator.rb CHANGED
@@ -121,7 +121,7 @@ class Validator
121
121
  hash[cat.to_sym] = val.send(cat.to_sym)
122
122
  end
123
123
  when Validator::Decoy
124
- %w(decoy_to_target_ratio correct_wins decoy_on_match).each do |cat|
124
+ %w(pi_zero correct_wins decoy_on_match).each do |cat|
125
125
  hash[cat.to_sym] = val.send(cat.to_sym)
126
126
  end
127
127
  hash[:constraint] = val.constraint.inspect if val.constraint
@@ -167,11 +167,11 @@ end
167
167
  # normal hits (which may be true or false) and the second are decoy hits.
168
168
  # edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
169
169
  module Precision::Calculator::Decoy
170
- def calc_precision(num_normal, num_decoy, decoy_to_target_ratio=1.0)
170
+ def calc_precision(num_normal, num_decoy, pi_zero=1.0)
171
171
  # will calculate as floats in case fractional amounts passed in for
172
172
  # whatever reason
173
173
  num_normal_f = num_normal.to_f
174
- num_true_pos = num_normal_f - (num_decoy.to_f / decoy_to_target_ratio)
174
+ num_true_pos = num_normal_f - (num_decoy.to_f * pi_zero)
175
175
  precision =
176
176
  if num_normal_f == 0.0
177
177
  if num_decoy.to_f > 0.0
@@ -162,8 +162,8 @@ describe SRF, 'creating dta files' do
162
162
  File.directory?('020').should be_true
163
163
  File.exist?('020/020.3366.3366.2.dta').should be_true
164
164
  lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
165
- lines.first.should == "1113.10649290125 2\r\n"
166
- lines[1].should == "164.56591796875 4817.0\r\n"
165
+ lines.first.should == "1113.106493 2\r\n"
166
+ lines[1].should == "164.5659 4817\r\n"
167
167
 
168
168
  FileUtils.rm_rf '020'
169
169
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Prince
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-09-24 00:00:00 -06:00
12
+ date: 2008-09-25 00:00:00 -06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency