mspire 0.4.4 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- data/changelog.txt +5 -0
- data/lib/spec_id/precision/filter/cmdline.rb +0 -1
- data/lib/spec_id/precision/filter.rb +12 -10
- data/lib/spec_id/precision/prob.rb +16 -5
- data/lib/validator/cmdline.rb +4 -7
- data/lib/validator/decoy.rb +10 -6
- data/lib/validator.rb +3 -3
- data/specs/spec_id/srf_spec.rb +2 -2
- metadata +2 -2
data/changelog.txt
CHANGED
@@ -208,3 +208,8 @@ probabilities into q-values
|
|
208
208
|
6. filter_validate.rb implements a p value method using xcorr values, however,
|
209
209
|
this is not very effective since xcorr values underrepresent the the
|
210
210
|
difference between good hits and bad hits
|
211
|
+
|
212
|
+
## version 0.4.5
|
213
|
+
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
214
|
+
passing, this release should be considered experimental with the use of any
|
215
|
+
target-decoy validation.
|
@@ -310,16 +310,18 @@ class SpecID::Precision::Filter
|
|
310
310
|
[peps] # no decoy
|
311
311
|
end
|
312
312
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
313
|
+
# This method doesn't seem to do so well, but a person can use a different
|
314
|
+
# one and enter in their own custom pi_0 value!
|
315
|
+
#if opts[:decoy_pi_zero]
|
316
|
+
# if pep_sets.size < 2
|
317
|
+
# raise ArgumentError, "must have a decoy validator for pi zero calculation!"
|
318
|
+
# end
|
319
|
+
# require 'pi_zero'
|
320
|
+
# (_target, _decoy) = pep_sets
|
321
|
+
# pvals = PiZero.p_values_for_sequest(*pep_sets).sort
|
322
|
+
# pi_zero = PiZero.pi_zero(pvals)
|
323
|
+
# opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
|
324
|
+
#end
|
323
325
|
|
324
326
|
if opts[:proteins]
|
325
327
|
protein_validator = Validator::ProtFromPep.new
|
@@ -31,12 +31,22 @@ class SpecID::Precision::Prob
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# this is the way I was doing it:
|
35
|
+
# ajdusted = (1+R)*prec / (R*precision +1)
|
36
|
+
# # where R is the decoy_to_target ratio
|
37
|
+
|
34
38
|
# opts may include:
|
35
39
|
# :proteins => true|*false
|
36
40
|
# :validators => array of Validator objects
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# used in the decoy validator (
|
41
|
+
#
|
42
|
+
# This method will adjust the precision in the *probability* validators
|
43
|
+
# used in the decoy validator (both terms with pi_0 in the denominator go
|
44
|
+
# to zero if there is no decoy validator and the precision is not
|
45
|
+
# adjusted)
|
46
|
+
#
|
47
|
+
# ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
|
48
|
+
# # where pi_0 is the ratio incorrect target hits to total decoy hits
|
49
|
+
#
|
40
50
|
# NOTE: if you have decoy data, you MUST pass in a decoy validator for the
|
41
51
|
# decoy pephits to be removed from other validator analyses!
|
42
52
|
#
|
@@ -82,7 +92,7 @@ class SpecID::Precision::Prob
|
|
82
92
|
else
|
83
93
|
decoy_val = decoy_vals.first
|
84
94
|
if decoy_val
|
85
|
-
|
95
|
+
pi_zero = decoy_val.pi_zero
|
86
96
|
end
|
87
97
|
end
|
88
98
|
|
@@ -167,7 +177,8 @@ class SpecID::Precision::Prob
|
|
167
177
|
val_hash[decoy_val].push(decoy_precision) if decoy_val
|
168
178
|
probability_validators.zip(last_prob_values) do |val,prec|
|
169
179
|
if decoy_val
|
170
|
-
|
180
|
+
raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
|
181
|
+
val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
|
171
182
|
else
|
172
183
|
val_hash[val] << prec
|
173
184
|
end
|
data/lib/validator/cmdline.rb
CHANGED
@@ -41,7 +41,7 @@ class Validator::Cmdline
|
|
41
41
|
{
|
42
42
|
:hits_together => true,
|
43
43
|
:decoy_on_match => true,
|
44
|
-
:
|
44
|
+
:pi_zero => 1.0,
|
45
45
|
},
|
46
46
|
:bad_aa =>
|
47
47
|
{
|
@@ -61,7 +61,7 @@ class Validator::Cmdline
|
|
61
61
|
:ties => true,
|
62
62
|
}
|
63
63
|
COMMAND_LINE = {
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,
|
64
|
+
:decoy => ["--decoy /REGEXP/|FILENAME[,PI0,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
65
65
|
"FILENAME of separate search on decoys.",
|
66
66
|
"All regular expressions must be surrounded by '/'",
|
67
67
|
"(no extended options [trailing modifiers]).",
|
@@ -72,11 +72,8 @@ class Validator::Cmdline
|
|
72
72
|
" --decoy '/^\\s*REVERSE/'",
|
73
73
|
"If decoys proteins were searched in a separate file,",
|
74
74
|
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
75
|
-
"
|
75
|
+
"PI0 = Incorrect Targets to Decoy Ratio (default: #{DEFAULTS[:decoy][:pi_zero]})",
|
76
76
|
"DOM = *true/false, decoy on match",],
|
77
|
-
:decoy_pi_zero => ["--decoy_pi_zero", "uses sequest Xcorrs to estimate the",
|
78
|
-
"percentage of incorrect target hits.",
|
79
|
-
"This over-rides any given DTR (above)"],
|
80
77
|
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
81
78
|
"fasta file containing the true protein hits"],
|
82
79
|
# may require digestion:
|
@@ -159,7 +156,7 @@ class Validator::Cmdline
|
|
159
156
|
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
160
157
|
first_arg
|
161
158
|
end
|
162
|
-
val_opts[:
|
159
|
+
val_opts[:pi_zero] = (ar[1] || DEFAULTS[:decoy][:pi_zero]).to_f
|
163
160
|
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
164
161
|
myargs.push(val_opts)
|
165
162
|
opts[:validators].push(myargs)
|
data/lib/validator/decoy.rb
CHANGED
@@ -8,7 +8,11 @@ class Validator::Decoy < Validator
|
|
8
8
|
|
9
9
|
attr_accessor :decoy_on_match
|
10
10
|
attr_accessor :correct_wins
|
11
|
-
|
11
|
+
# This is the the number of incorrect target hits over the total decoy hits
|
12
|
+
# The very rough, conservative ballpark estimate is the ratio of target hits
|
13
|
+
# to decoy hits. This can be refined by removing the number of true target
|
14
|
+
# hits from the targets used to calculate it.
|
15
|
+
attr_accessor :pi_zero
|
12
16
|
|
13
17
|
attr_accessor :last_pep_was_decoy
|
14
18
|
|
@@ -21,12 +25,12 @@ class Validator::Decoy < Validator
|
|
21
25
|
DEFAULTS = {
|
22
26
|
:decoy_on_match => true,
|
23
27
|
:correct_wins => true,
|
24
|
-
:
|
28
|
+
:pi_zero => 1.0,
|
25
29
|
}
|
26
30
|
|
27
31
|
def initialize(opts={})
|
28
32
|
merged = DEFAULTS.merge(opts)
|
29
|
-
@constraint, @decoy_on_match, @correct_wins, @
|
33
|
+
@constraint, @decoy_on_match, @correct_wins, @pi_zero = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :pi_zero)
|
30
34
|
end
|
31
35
|
|
32
36
|
# returns [normal, decoy] (?? I think ??)
|
@@ -82,15 +86,15 @@ class Validator::Decoy < Validator
|
|
82
86
|
@normal_peps_just_submitted = normal
|
83
87
|
@increment_normal += normal.size
|
84
88
|
@increment_decoy += decoy.size
|
85
|
-
calc_precision(@increment_normal, @increment_decoy, @
|
89
|
+
calc_precision(@increment_normal, @increment_decoy, @pi_zero)
|
86
90
|
end
|
87
91
|
|
88
92
|
def pephit_precision(peps, separate_peps=nil)
|
89
93
|
if separate_peps
|
90
|
-
calc_precision(peps.size, separate_peps.size, @
|
94
|
+
calc_precision(peps.size, separate_peps.size, @pi_zero)
|
91
95
|
else
|
92
96
|
(norm, decoy) = partition(peps)
|
93
|
-
calc_precision(norm.size, decoy.size, @
|
97
|
+
calc_precision(norm.size, decoy.size, @pi_zero)
|
94
98
|
end
|
95
99
|
end
|
96
100
|
|
data/lib/validator.rb
CHANGED
@@ -121,7 +121,7 @@ class Validator
|
|
121
121
|
hash[cat.to_sym] = val.send(cat.to_sym)
|
122
122
|
end
|
123
123
|
when Validator::Decoy
|
124
|
-
%w(
|
124
|
+
%w(pi_zero correct_wins decoy_on_match).each do |cat|
|
125
125
|
hash[cat.to_sym] = val.send(cat.to_sym)
|
126
126
|
end
|
127
127
|
hash[:constraint] = val.constraint.inspect if val.constraint
|
@@ -167,11 +167,11 @@ end
|
|
167
167
|
# normal hits (which may be true or false) and the second are decoy hits.
|
168
168
|
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
169
169
|
module Precision::Calculator::Decoy
|
170
|
-
def calc_precision(num_normal, num_decoy,
|
170
|
+
def calc_precision(num_normal, num_decoy, pi_zero=1.0)
|
171
171
|
# will calculate as floats in case fractional amounts passed in for
|
172
172
|
# whatever reason
|
173
173
|
num_normal_f = num_normal.to_f
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f
|
174
|
+
num_true_pos = num_normal_f - (num_decoy.to_f * pi_zero)
|
175
175
|
precision =
|
176
176
|
if num_normal_f == 0.0
|
177
177
|
if num_decoy.to_f > 0.0
|
data/specs/spec_id/srf_spec.rb
CHANGED
@@ -162,8 +162,8 @@ describe SRF, 'creating dta files' do
|
|
162
162
|
File.directory?('020').should be_true
|
163
163
|
File.exist?('020/020.3366.3366.2.dta').should be_true
|
164
164
|
lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
|
165
|
-
lines.first.should == "1113.
|
166
|
-
lines[1].should == "164.
|
165
|
+
lines.first.should == "1113.106493 2\r\n"
|
166
|
+
lines[1].should == "164.5659 4817\r\n"
|
167
167
|
|
168
168
|
FileUtils.rm_rf '020'
|
169
169
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-09-
|
12
|
+
date: 2008-09-25 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|