mspire 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/changelog.txt +5 -0
- data/lib/spec_id/precision/filter/cmdline.rb +0 -1
- data/lib/spec_id/precision/filter.rb +12 -10
- data/lib/spec_id/precision/prob.rb +16 -5
- data/lib/validator/cmdline.rb +4 -7
- data/lib/validator/decoy.rb +10 -6
- data/lib/validator.rb +3 -3
- data/specs/spec_id/srf_spec.rb +2 -2
- metadata +2 -2
data/changelog.txt
CHANGED
@@ -208,3 +208,8 @@ probabilities into q-values
|
|
208
208
|
6. filter_validate.rb implements a p value method using xcorr values, however,
|
209
209
|
this is not very effective since xcorr values underrepresent the the
|
210
210
|
difference between good hits and bad hits
|
211
|
+
|
212
|
+
## version 0.4.5
|
213
|
+
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
214
|
+
passing, this release should be considered experimental with the use of any
|
215
|
+
target-decoy validation.
|
@@ -310,16 +310,18 @@ class SpecID::Precision::Filter
|
|
310
310
|
[peps] # no decoy
|
311
311
|
end
|
312
312
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
313
|
+
# This method doesn't seem to do so well, but a person can use a different
|
314
|
+
# one and enter in their own custom pi_0 value!
|
315
|
+
#if opts[:decoy_pi_zero]
|
316
|
+
# if pep_sets.size < 2
|
317
|
+
# raise ArgumentError, "must have a decoy validator for pi zero calculation!"
|
318
|
+
# end
|
319
|
+
# require 'pi_zero'
|
320
|
+
# (_target, _decoy) = pep_sets
|
321
|
+
# pvals = PiZero.p_values_for_sequest(*pep_sets).sort
|
322
|
+
# pi_zero = PiZero.pi_zero(pvals)
|
323
|
+
# opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
|
324
|
+
#end
|
323
325
|
|
324
326
|
if opts[:proteins]
|
325
327
|
protein_validator = Validator::ProtFromPep.new
|
@@ -31,12 +31,22 @@ class SpecID::Precision::Prob
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
# this is the way I was doing it:
|
35
|
+
# ajdusted = (1+R)*prec / (R*precision +1)
|
36
|
+
# # where R is the decoy_to_target ratio
|
37
|
+
|
34
38
|
# opts may include:
|
35
39
|
# :proteins => true|*false
|
36
40
|
# :validators => array of Validator objects
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# used in the decoy validator (
|
41
|
+
#
|
42
|
+
# This method will adjust the precision in the *probability* validators
|
43
|
+
# used in the decoy validator (both terms with pi_0 in the denominator go
|
44
|
+
# to zero if there is no decoy validator and the precision is not
|
45
|
+
# adjusted)
|
46
|
+
#
|
47
|
+
# ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
|
48
|
+
# # where pi_0 is the ratio incorrect target hits to total decoy hits
|
49
|
+
#
|
40
50
|
# NOTE: if you have decoy data, you MUST pass in a decoy validator for the
|
41
51
|
# decoy pephits to be removed from other validator analyses!
|
42
52
|
#
|
@@ -82,7 +92,7 @@ class SpecID::Precision::Prob
|
|
82
92
|
else
|
83
93
|
decoy_val = decoy_vals.first
|
84
94
|
if decoy_val
|
85
|
-
|
95
|
+
pi_zero = decoy_val.pi_zero
|
86
96
|
end
|
87
97
|
end
|
88
98
|
|
@@ -167,7 +177,8 @@ class SpecID::Precision::Prob
|
|
167
177
|
val_hash[decoy_val].push(decoy_precision) if decoy_val
|
168
178
|
probability_validators.zip(last_prob_values) do |val,prec|
|
169
179
|
if decoy_val
|
170
|
-
|
180
|
+
raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
|
181
|
+
val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
|
171
182
|
else
|
172
183
|
val_hash[val] << prec
|
173
184
|
end
|
data/lib/validator/cmdline.rb
CHANGED
@@ -41,7 +41,7 @@ class Validator::Cmdline
|
|
41
41
|
{
|
42
42
|
:hits_together => true,
|
43
43
|
:decoy_on_match => true,
|
44
|
-
:
|
44
|
+
:pi_zero => 1.0,
|
45
45
|
},
|
46
46
|
:bad_aa =>
|
47
47
|
{
|
@@ -61,7 +61,7 @@ class Validator::Cmdline
|
|
61
61
|
:ties => true,
|
62
62
|
}
|
63
63
|
COMMAND_LINE = {
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,
|
64
|
+
:decoy => ["--decoy /REGEXP/|FILENAME[,PI0,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
65
65
|
"FILENAME of separate search on decoys.",
|
66
66
|
"All regular expressions must be surrounded by '/'",
|
67
67
|
"(no extended options [trailing modifiers]).",
|
@@ -72,11 +72,8 @@ class Validator::Cmdline
|
|
72
72
|
" --decoy '/^\\s*REVERSE/'",
|
73
73
|
"If decoys proteins were searched in a separate file,",
|
74
74
|
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
75
|
-
"
|
75
|
+
"PI0 = Incorrect Targets to Decoy Ratio (default: #{DEFAULTS[:decoy][:pi_zero]})",
|
76
76
|
"DOM = *true/false, decoy on match",],
|
77
|
-
:decoy_pi_zero => ["--decoy_pi_zero", "uses sequest Xcorrs to estimate the",
|
78
|
-
"percentage of incorrect target hits.",
|
79
|
-
"This over-rides any given DTR (above)"],
|
80
77
|
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
81
78
|
"fasta file containing the true protein hits"],
|
82
79
|
# may require digestion:
|
@@ -159,7 +156,7 @@ class Validator::Cmdline
|
|
159
156
|
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
160
157
|
first_arg
|
161
158
|
end
|
162
|
-
val_opts[:
|
159
|
+
val_opts[:pi_zero] = (ar[1] || DEFAULTS[:decoy][:pi_zero]).to_f
|
163
160
|
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
164
161
|
myargs.push(val_opts)
|
165
162
|
opts[:validators].push(myargs)
|
data/lib/validator/decoy.rb
CHANGED
@@ -8,7 +8,11 @@ class Validator::Decoy < Validator
|
|
8
8
|
|
9
9
|
attr_accessor :decoy_on_match
|
10
10
|
attr_accessor :correct_wins
|
11
|
-
|
11
|
+
# This is the the number of incorrect target hits over the total decoy hits
|
12
|
+
# The very rough, conservative ballpark estimate is the ratio of target hits
|
13
|
+
# to decoy hits. This can be refined by removing the number of true target
|
14
|
+
# hits from the targets used to calculate it.
|
15
|
+
attr_accessor :pi_zero
|
12
16
|
|
13
17
|
attr_accessor :last_pep_was_decoy
|
14
18
|
|
@@ -21,12 +25,12 @@ class Validator::Decoy < Validator
|
|
21
25
|
DEFAULTS = {
|
22
26
|
:decoy_on_match => true,
|
23
27
|
:correct_wins => true,
|
24
|
-
:
|
28
|
+
:pi_zero => 1.0,
|
25
29
|
}
|
26
30
|
|
27
31
|
def initialize(opts={})
|
28
32
|
merged = DEFAULTS.merge(opts)
|
29
|
-
@constraint, @decoy_on_match, @correct_wins, @
|
33
|
+
@constraint, @decoy_on_match, @correct_wins, @pi_zero = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :pi_zero)
|
30
34
|
end
|
31
35
|
|
32
36
|
# returns [normal, decoy] (?? I think ??)
|
@@ -82,15 +86,15 @@ class Validator::Decoy < Validator
|
|
82
86
|
@normal_peps_just_submitted = normal
|
83
87
|
@increment_normal += normal.size
|
84
88
|
@increment_decoy += decoy.size
|
85
|
-
calc_precision(@increment_normal, @increment_decoy, @
|
89
|
+
calc_precision(@increment_normal, @increment_decoy, @pi_zero)
|
86
90
|
end
|
87
91
|
|
88
92
|
def pephit_precision(peps, separate_peps=nil)
|
89
93
|
if separate_peps
|
90
|
-
calc_precision(peps.size, separate_peps.size, @
|
94
|
+
calc_precision(peps.size, separate_peps.size, @pi_zero)
|
91
95
|
else
|
92
96
|
(norm, decoy) = partition(peps)
|
93
|
-
calc_precision(norm.size, decoy.size, @
|
97
|
+
calc_precision(norm.size, decoy.size, @pi_zero)
|
94
98
|
end
|
95
99
|
end
|
96
100
|
|
data/lib/validator.rb
CHANGED
@@ -121,7 +121,7 @@ class Validator
|
|
121
121
|
hash[cat.to_sym] = val.send(cat.to_sym)
|
122
122
|
end
|
123
123
|
when Validator::Decoy
|
124
|
-
%w(
|
124
|
+
%w(pi_zero correct_wins decoy_on_match).each do |cat|
|
125
125
|
hash[cat.to_sym] = val.send(cat.to_sym)
|
126
126
|
end
|
127
127
|
hash[:constraint] = val.constraint.inspect if val.constraint
|
@@ -167,11 +167,11 @@ end
|
|
167
167
|
# normal hits (which may be true or false) and the second are decoy hits.
|
168
168
|
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
169
169
|
module Precision::Calculator::Decoy
|
170
|
-
def calc_precision(num_normal, num_decoy,
|
170
|
+
def calc_precision(num_normal, num_decoy, pi_zero=1.0)
|
171
171
|
# will calculate as floats in case fractional amounts passed in for
|
172
172
|
# whatever reason
|
173
173
|
num_normal_f = num_normal.to_f
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f
|
174
|
+
num_true_pos = num_normal_f - (num_decoy.to_f * pi_zero)
|
175
175
|
precision =
|
176
176
|
if num_normal_f == 0.0
|
177
177
|
if num_decoy.to_f > 0.0
|
data/specs/spec_id/srf_spec.rb
CHANGED
@@ -162,8 +162,8 @@ describe SRF, 'creating dta files' do
|
|
162
162
|
File.directory?('020').should be_true
|
163
163
|
File.exist?('020/020.3366.3366.2.dta').should be_true
|
164
164
|
lines = IO.readlines('020/020.3366.3366.2.dta', "\r\n")
|
165
|
-
lines.first.should == "1113.
|
166
|
-
lines[1].should == "164.
|
165
|
+
lines.first.should == "1113.106493 2\r\n"
|
166
|
+
lines[1].should == "164.5659 4817\r\n"
|
167
167
|
|
168
168
|
FileUtils.rm_rf '020'
|
169
169
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-09-
|
12
|
+
date: 2008-09-25 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|