mspire 0.4.5 → 0.4.7
Sign up to get free protection for your applications and to get access to all the features.
- data/changelog.txt +8 -0
- data/lib/mspire.rb +1 -1
- data/lib/pi_zero.rb +75 -58
- data/lib/qvalue.rb +41 -32
- data/lib/spec_id/precision/prob/cmdline.rb +4 -1
- data/lib/spec_id/precision/prob.rb +1 -1
- data/lib/spec_id/srf.rb +30 -0
- data/lib/validator/cmdline.rb +6 -4
- data/lib/validator/decoy.rb +9 -8
- data/lib/validator.rb +2 -2
- data/specs/pi_zero_spec.rb +18 -7
- metadata +2 -2
data/changelog.txt
CHANGED
@@ -213,3 +213,11 @@ difference between good hits and bad hits
|
|
213
213
|
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
214
214
|
passing, this release should be considered experimental with the use of any
|
215
215
|
target-decoy validation.
|
216
|
+
|
217
|
+
## version 0.4.6
|
218
|
+
1. added NOTE to --to_qvalues option to include all results (no low prob
|
219
|
+
filter)
|
220
|
+
|
221
|
+
## version 0.4.7
|
222
|
+
1. Added ability to quickly grab sequest params out of a .SRF file
|
223
|
+
2. Added helpful runtime error if print_duplicate_references is 0.
|
data/lib/mspire.rb
CHANGED
data/lib/pi_zero.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'rsruby'
|
2
|
-
require 'gsl'
|
3
2
|
require 'vec'
|
4
3
|
require 'vec/r'
|
5
4
|
require 'enumerator'
|
@@ -21,7 +20,8 @@ module PiZero
|
|
21
20
|
pi_zeros = [] # pi_0
|
22
21
|
total = sorted_pvals.size # m
|
23
22
|
|
24
|
-
# totally
|
23
|
+
# totally inefficient implementation (with correct logic):
|
24
|
+
# TODO: implement this efficiently
|
25
25
|
start.step(stop, step) do |lam|
|
26
26
|
lambdas << lam
|
27
27
|
(greater, less) = sorted_pvals.partition {|pval| pval > lam }
|
@@ -30,6 +30,36 @@ module PiZero
|
|
30
30
|
[lambdas, pi_zeros]
|
31
31
|
end
|
32
32
|
|
33
|
+
=begin
|
34
|
+
def plateau_height_with_gsl(x, y)
|
35
|
+
require 'gsl'
|
36
|
+
x_deltas = (0...(x.size-1)).to_a.map do |i|
|
37
|
+
x[i+1] - x[i]
|
38
|
+
end
|
39
|
+
y_deltas = (0...(y.size-1)).to_a.map do |i|
|
40
|
+
y[i+1] - y[i]
|
41
|
+
end
|
42
|
+
new_xs = x.dup
|
43
|
+
new_ys = y.dup
|
44
|
+
x_deltas.reverse.each do |delt|
|
45
|
+
new_xs.push( new_xs.last + delt )
|
46
|
+
end
|
47
|
+
|
48
|
+
y_cnt = y.size
|
49
|
+
y_deltas.reverse.each do |delt|
|
50
|
+
y_cnt -= 1
|
51
|
+
new_ys.push( y[y_cnt] - delt )
|
52
|
+
end
|
53
|
+
|
54
|
+
x_vec = GSL::Vector.alloc(new_xs)
|
55
|
+
y_vec = GSL::Vector.alloc(new_ys)
|
56
|
+
coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3)
|
57
|
+
coef.eval(x.last)
|
58
|
+
#x2 = GSL::Vector::linspace(0,2.4,20)
|
59
|
+
#graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4")
|
60
|
+
end
|
61
|
+
=end
|
62
|
+
|
33
63
|
# expecting x and y to make a scatter plot descending to a plateau on the
|
34
64
|
# right side (which is assumed to be of increasing noise as it goes to the
|
35
65
|
# right)
|
@@ -42,46 +72,19 @@ module PiZero
|
|
42
72
|
# ** *** * *
|
43
73
|
# ***** **** ***
|
44
74
|
def plateau_height(x, y)
|
45
|
-
=begin
|
46
|
-
require 'gsl'
|
47
|
-
x_deltas = (0...(x.size-1)).to_a.map do |i|
|
48
|
-
x[i+1] - x[i]
|
49
|
-
end
|
50
|
-
y_deltas = (0...(y.size-1)).to_a.map do |i|
|
51
|
-
y[i+1] - y[i]
|
52
|
-
end
|
53
|
-
new_xs = x.dup
|
54
|
-
new_ys = y.dup
|
55
|
-
x_deltas.reverse.each do |delt|
|
56
|
-
new_xs.push( new_xs.last + delt )
|
57
|
-
end
|
58
|
-
|
59
|
-
y_cnt = y.size
|
60
|
-
y_deltas.reverse.each do |delt|
|
61
|
-
y_cnt -= 1
|
62
|
-
new_ys.push( y[y_cnt] - delt )
|
63
|
-
end
|
64
|
-
|
65
|
-
x_vec = GSL::Vector.alloc(new_xs)
|
66
|
-
y_vec = GSL::Vector.alloc(new_ys)
|
67
|
-
coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3)
|
68
|
-
coef.eval(x.last)
|
69
|
-
#x2 = GSL::Vector::linspace(0,2.4,20)
|
70
|
-
#graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4")
|
71
|
-
=end
|
72
|
-
|
73
75
|
r = RSRuby.instance
|
74
76
|
answ = r.smooth_spline(x,y, :df => 3)
|
75
77
|
## to plot it!
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
r.plot(x,y, :ylab=>"pi_zeros or frit")
|
79
|
+
r.lines(answ['x'], answ['y'])
|
80
|
+
r.points(answ['x'], answ['y'])
|
81
|
+
sleep(4)
|
80
82
|
|
81
83
|
answ['y'].last
|
82
84
|
end
|
83
85
|
|
84
86
|
def plateau_exponential(x,y)
|
87
|
+
require 'gsl'
|
85
88
|
xvec = GSL::Vector.alloc(x)
|
86
89
|
yvec = GSL::Vector.alloc(y)
|
87
90
|
a2, b2, = GSL::Fit.linear(xvec, GSL::Sf::log(yvec))
|
@@ -93,9 +96,10 @@ module PiZero
|
|
93
96
|
|
94
97
|
end
|
95
98
|
|
96
|
-
# returns a conservative (but close) estimate of pi_0 given
|
99
|
+
# returns a conservative (but close) estimate of pi_0 given p-values
|
97
100
|
# following Storey et al. 2003, PNAS.
|
98
|
-
def pi_zero(
|
101
|
+
def pi_zero(pvals)
|
102
|
+
sorted_pvals = pvals.sort
|
99
103
|
plateau_height( *(pi_zero_hats(sorted_pvals)) )
|
100
104
|
end
|
101
105
|
|
@@ -161,6 +165,8 @@ module PiZero
|
|
161
165
|
p_values(target_hits.map {|v| v.xcorr}, new_decoy_vals )
|
162
166
|
end
|
163
167
|
|
168
|
+
#### NEED TO VERIFY if this is PIT or PI_ZERO!
|
169
|
+
=begin
|
164
170
|
# takes a list of booleans with true being a target hit and false being a
|
165
171
|
# decoy hit and returns the pi_zero using the smooth method
|
166
172
|
# Should be ordered from best to worst (i.e., one expects more true values
|
@@ -184,12 +190,32 @@ module PiZero
|
|
184
190
|
ys.reverse!
|
185
191
|
plateau_height(xs, ys)
|
186
192
|
end
|
193
|
+
=end
|
194
|
+
|
195
|
+
# returns fraction of incorrect target hits (frit) (this is the percent
|
196
|
+
# incorrect targets [PIT] expressed as a fraction rather than percent)
|
197
|
+
# takes two parallel arrays consisting of the total number of hits (this
|
198
|
+
# will typically be the total # target hits) at that point and the
|
199
|
+
# precision (ranging from: [0,1]) (typically determined by counting the
|
200
|
+
# number of decoy hits). Expects the number of total hits to be
|
201
|
+
# monotonically increasing and the precision to roughly start high and
|
202
|
+
# decrease as more hits (of lesser quality) are added.
|
203
|
+
def frit_from_precision(total_num_hits_ar, precision_ar)
|
204
|
+
instant_pi_zeros = []
|
205
|
+
total_num_hits_ar.reverse.zip(precision_ar.reverse).each_cons(2) do |dp1, dp0|
|
206
|
+
(x1, y1) = dp1
|
207
|
+
(x0, y0) = dp0
|
208
|
+
instant_pi_zeros << ((x1 * (1.0 - y1)) - (x0 * (1.0 - y0) )) / (x1 - x0)
|
209
|
+
end
|
210
|
+
instant_pi_zeros.reverse!
|
211
|
+
plateau_height(total_num_hits_ar[1..-1], instant_pi_zeros)
|
212
|
+
end
|
187
213
|
|
188
214
|
# Takes an array of doublets ([[int, int], [int, int]...]) where the first
|
189
215
|
# value is the number of target hits and the second is the number of decoy
|
190
216
|
# hits. Expects that best hits are at the beginning of the list. Assumes
|
191
|
-
# that each sum is a subset
|
192
|
-
#
|
217
|
+
# that each sum is a subset of the following group (shown as actual hits
|
218
|
+
# rather than number of hits):
|
193
219
|
#
|
194
220
|
# [[target, target, target, decoy], [target, target, target, decoy,
|
195
221
|
# target, decoy, target], [target, target, target, decoy, target,
|
@@ -197,31 +223,22 @@ module PiZero
|
|
197
223
|
#
|
198
224
|
# This assumption may be relaxed somewhat and should still give good
|
199
225
|
# results.
|
200
|
-
def
|
201
|
-
|
226
|
+
def frit_from_groups(array_of_doublets)
|
227
|
+
frits = []
|
202
228
|
array_of_doublets.reverse.each_cons(2) do |two_doublets|
|
203
229
|
bigger, smaller = two_doublets
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
if
|
208
|
-
|
230
|
+
num_targets = bigger[0] - smaller[0]
|
231
|
+
num_decoy = bigger[1] - smaller[1]
|
232
|
+
num_targets = 0 if num_targets < 0
|
233
|
+
num_decoy = 0 if num_targets < 0
|
234
|
+
if num_decoy > 0
|
235
|
+
frits << (num_targets.to_f / num_decoy)
|
209
236
|
end
|
210
237
|
end
|
211
|
-
|
212
|
-
xs = (0...(
|
213
|
-
plateau_height(xs,
|
238
|
+
frits.reverse!
|
239
|
+
xs = (0...(frits.size)).to_a
|
240
|
+
plateau_height(xs, frits)
|
214
241
|
end
|
215
242
|
|
216
243
|
end
|
217
|
-
|
218
|
-
|
219
|
-
end
|
220
|
-
|
221
|
-
if $0 == __FILE__
|
222
|
-
#xcorrs = IO.readlines("/home/jtprince/xcorr_hist/all_xcorrs.yada").first.chomp.split(/\s+/).map {|v| v.to_f }
|
223
|
-
#PiZero.p_values_for_sequest(
|
224
|
-
#File.open("newtail.yada", 'w') {|out| out.puts new_dist.join(" ") }
|
225
|
-
|
226
|
-
|
227
244
|
end
|
data/lib/qvalue.rb
CHANGED
@@ -79,41 +79,50 @@ class VecD
|
|
79
79
|
end
|
80
80
|
|
81
81
|
pi_zeros = lambda_vals.map {|val| self.pi_zero_at_lambda(val) }
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
(0...lsz).each do |i|
|
106
|
-
pi0_boot[i] = ( p_boot.select{|v| v > lambda_vals[i] }.size.to_f/p_boot.size ) / (1-lambda_vals[i])
|
82
|
+
|
83
|
+
r = RSRuby.instance
|
84
|
+
r.plot(lambda_vals,pi_zeros, :ylab=>"instantaneous pi_zeros")
|
85
|
+
answ = r.smooth_spline(lambda_vals, pi_zeros, :df => Default_smooth_df)
|
86
|
+
r.lines(answ['x'], answ['y'])
|
87
|
+
r.points(answ['x'], answ['y'])
|
88
|
+
sleep(20)
|
89
|
+
|
90
|
+
answer =
|
91
|
+
if lambda_vals.size == 1
|
92
|
+
pi_zeros.first
|
93
|
+
else
|
94
|
+
case method
|
95
|
+
when :smooth
|
96
|
+
r = RSRuby.instance
|
97
|
+
calc_pi_zero = lambda do |_pi_zeros|
|
98
|
+
hash = r.smooth_spline(lambda_vals, _pi_zeros, :df => Default_smooth_df)
|
99
|
+
hash['y'][VecD.new(lambda_vals).max_indices.max]
|
100
|
+
end
|
101
|
+
if log_transform
|
102
|
+
pi_zeros.log_space {|log_vals| calc_pi_zero.call(log_vals) }
|
103
|
+
else
|
104
|
+
calc_pi_zero.call(pi_zeros)
|
107
105
|
end
|
108
|
-
|
106
|
+
when :bootstrap
|
107
|
+
min_pi0 = pi_zeros.min
|
108
|
+
lsz = lambda_vals.size
|
109
|
+
mse = VecD.new(lsz, 0)
|
110
|
+
pi0_boot = VecD.new(lsz, 0)
|
111
|
+
sz = self.size
|
112
|
+
100.times do # for(i in 1:100) {
|
113
|
+
p_boot = self.shuffle
|
114
|
+
(0...lsz).each do |i|
|
115
|
+
pi0_boot[i] = ( p_boot.select{|v| v > lambda_vals[i] }.size.to_f/p_boot.size ) / (1-lambda_vals[i])
|
116
|
+
end
|
117
|
+
mse = mse + ( (pi0_boot-min_pi0)**2 )
|
118
|
+
end
|
119
|
+
# pi0 <- min(pi0[mse==min(mse)])
|
120
|
+
pi_zero = pi_zeros.values_at(*(mse.min_indices)).min
|
121
|
+
[pi_zero,1].min
|
122
|
+
else
|
123
|
+
raise ArgumentError, ":pi_zero_method must be :smooth or :bootstrap!"
|
109
124
|
end
|
110
|
-
# pi0 <- min(pi0[mse==min(mse)])
|
111
|
-
pi_zero = pi_zeros.values_at(*(mse.min_indices)).min
|
112
|
-
[pi_zero,1].min
|
113
|
-
else
|
114
|
-
raise ArgumentError, ":pi_zero_method must be :smooth or :bootstrap!"
|
115
125
|
end
|
116
|
-
end
|
117
126
|
end
|
118
127
|
|
119
128
|
# Returns a VecD filled with parallel q-values
|
@@ -16,7 +16,10 @@ module SpecID
|
|
16
16
|
:to_qvalues => ['--to_qvalues', "transform probabilities into q-values",
|
17
17
|
"(includes pi_0 correction)",
|
18
18
|
"uses PROB [TYPE] if given and supercedes",
|
19
|
-
"the prob validation type"
|
19
|
+
"the prob validation type",
|
20
|
+
"*NOTE: include all PeptideProphet results",
|
21
|
+
"(don't use any low prob cutoff) for",
|
22
|
+
"accurate results!"],
|
20
23
|
:prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
|
21
24
|
"TYPE = nsp [default] prophet nsp",
|
22
25
|
" (nsp also should be used for PeptideProphet results)",
|
@@ -226,7 +226,7 @@ class SpecID::Precision::Prob
|
|
226
226
|
else
|
227
227
|
out[:probabilities] = probabilities
|
228
228
|
end
|
229
|
-
out[:pephits] = ordered_peps # just in case they want to see
|
229
|
+
# out[:pephits] = ordered_peps # just in case they want to see
|
230
230
|
out[:count] = num_pephits
|
231
231
|
out[:aaseqs] = pepstrings
|
232
232
|
out[:charges] = pepcharges
|
data/lib/spec_id/srf.rb
CHANGED
@@ -73,6 +73,7 @@ class SRFGroup
|
|
73
73
|
IO.readlines(file).grep(/\w/).map {|v| v.chomp }
|
74
74
|
end
|
75
75
|
|
76
|
+
|
76
77
|
# if srfs were read in separately, then the proteins will need to be merged
|
77
78
|
# by their reference
|
78
79
|
def merge_different_sets(srfs)
|
@@ -184,6 +185,20 @@ class SRF
|
|
184
185
|
|
185
186
|
attr_accessor :filtered_by_precursor_mass_tolerance
|
186
187
|
|
188
|
+
# returns a Sequest::Params object
|
189
|
+
def self.get_sequest_params(filename)
|
190
|
+
# split the file in half and only read the second half (since we can be
|
191
|
+
# confident that the params file will be there!)
|
192
|
+
File.open(filename) do |handle|
|
193
|
+
halfway = handle.stat.size / 2
|
194
|
+
handle.seek halfway
|
195
|
+
last_half = handle.read
|
196
|
+
params_start_index = last_half.rindex('[SEQUEST]') + halfway
|
197
|
+
handle.seek(params_start_index)
|
198
|
+
Sequest::Params.new.parse_handle(handle)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
187
202
|
def dta_start_byte
|
188
203
|
case @version
|
189
204
|
when '3.2' ; 3260
|
@@ -468,10 +483,24 @@ class SRF
|
|
468
483
|
|
469
484
|
# returns self
|
470
485
|
def from_file(filename, peps, global_ref_hash)
|
486
|
+
dups = SRF.get_sequest_params(filename).print_duplicate_references
|
487
|
+
if dups == '0'
|
488
|
+
raise RuntimeError, <<END
|
489
|
+
|
490
|
+
***************************************************************************
|
491
|
+
Sorry, but the SRF reader cannot read this file!
|
492
|
+
.srf files must currently be created with print_duplicate_references > 0
|
493
|
+
(This is how the srf object can link peptides with proteins!)
|
494
|
+
To capture all duplicate references, set the sequest parameter
|
495
|
+
'print_duplicate_references' to 100 or greater.
|
496
|
+
***************************************************************************
|
497
|
+
END
|
498
|
+
end
|
471
499
|
|
472
500
|
File.open(filename, "rb") do |fh|
|
473
501
|
@header = SRF::Header.new.from_handle(fh)
|
474
502
|
@version = @header.version
|
503
|
+
|
475
504
|
unpack_35 = case @version
|
476
505
|
when '3.2'
|
477
506
|
false
|
@@ -824,6 +853,7 @@ class SRF::OUT::Pep
|
|
824
853
|
end
|
825
854
|
|
826
855
|
def self.read_extra_references(fh, num_extra_references, pep_hits, global_ref_hash)
|
856
|
+
p num_extra_references
|
827
857
|
num_extra_references.times do
|
828
858
|
# 80 bytes total (with index number)
|
829
859
|
pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
|
data/lib/validator/cmdline.rb
CHANGED
@@ -41,7 +41,7 @@ class Validator::Cmdline
|
|
41
41
|
{
|
42
42
|
:hits_together => true,
|
43
43
|
:decoy_on_match => true,
|
44
|
-
:
|
44
|
+
:frit => 1.0, # fraction incorrect targets (like PIT)
|
45
45
|
},
|
46
46
|
:bad_aa =>
|
47
47
|
{
|
@@ -61,7 +61,7 @@ class Validator::Cmdline
|
|
61
61
|
:ties => true,
|
62
62
|
}
|
63
63
|
COMMAND_LINE = {
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,
|
64
|
+
:decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
65
65
|
"FILENAME of separate search on decoys.",
|
66
66
|
"All regular expressions must be surrounded by '/'",
|
67
67
|
"(no extended options [trailing modifiers]).",
|
@@ -72,7 +72,8 @@ class Validator::Cmdline
|
|
72
72
|
" --decoy '/^\\s*REVERSE/'",
|
73
73
|
"If decoys proteins were searched in a separate file,",
|
74
74
|
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
75
|
-
"
|
75
|
+
"FRIT = Fraction Incorrect Targets (like",
|
76
|
+
"the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
|
76
77
|
"DOM = *true/false, decoy on match",],
|
77
78
|
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
78
79
|
"fasta file containing the true protein hits"],
|
@@ -156,7 +157,7 @@ class Validator::Cmdline
|
|
156
157
|
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
157
158
|
first_arg
|
158
159
|
end
|
159
|
-
val_opts[:
|
160
|
+
val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
|
160
161
|
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
161
162
|
myargs.push(val_opts)
|
162
163
|
opts[:validators].push(myargs)
|
@@ -295,6 +296,7 @@ class Validator::Cmdline
|
|
295
296
|
val = 1e-9 if val == 0
|
296
297
|
val
|
297
298
|
end
|
299
|
+
File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
|
298
300
|
pvals = VecD.new(pvals)
|
299
301
|
#qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
|
300
302
|
qvals = pvals.qvalues
|
data/lib/validator/decoy.rb
CHANGED
@@ -9,10 +9,11 @@ class Validator::Decoy < Validator
|
|
9
9
|
attr_accessor :decoy_on_match
|
10
10
|
attr_accessor :correct_wins
|
11
11
|
# This is the the number of incorrect target hits over the total decoy hits
|
12
|
-
# The
|
13
|
-
#
|
12
|
+
# The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
|
13
|
+
# The rough, conservative ballpark estimate is the ratio of target hits to
|
14
|
+
# decoy hits. This can be refined by removing the number of true target
|
14
15
|
# hits from the targets used to calculate it.
|
15
|
-
attr_accessor :
|
16
|
+
attr_accessor :frit
|
16
17
|
|
17
18
|
attr_accessor :last_pep_was_decoy
|
18
19
|
|
@@ -25,12 +26,12 @@ class Validator::Decoy < Validator
|
|
25
26
|
DEFAULTS = {
|
26
27
|
:decoy_on_match => true,
|
27
28
|
:correct_wins => true,
|
28
|
-
:
|
29
|
+
:frit => 1.0,
|
29
30
|
}
|
30
31
|
|
31
32
|
def initialize(opts={})
|
32
33
|
merged = DEFAULTS.merge(opts)
|
33
|
-
@constraint, @decoy_on_match, @correct_wins, @
|
34
|
+
@constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
|
34
35
|
end
|
35
36
|
|
36
37
|
# returns [normal, decoy] (?? I think ??)
|
@@ -86,15 +87,15 @@ class Validator::Decoy < Validator
|
|
86
87
|
@normal_peps_just_submitted = normal
|
87
88
|
@increment_normal += normal.size
|
88
89
|
@increment_decoy += decoy.size
|
89
|
-
calc_precision(@increment_normal, @increment_decoy, @
|
90
|
+
calc_precision(@increment_normal, @increment_decoy, @frit)
|
90
91
|
end
|
91
92
|
|
92
93
|
def pephit_precision(peps, separate_peps=nil)
|
93
94
|
if separate_peps
|
94
|
-
calc_precision(peps.size, separate_peps.size, @
|
95
|
+
calc_precision(peps.size, separate_peps.size, @frit)
|
95
96
|
else
|
96
97
|
(norm, decoy) = partition(peps)
|
97
|
-
calc_precision(norm.size, decoy.size, @
|
98
|
+
calc_precision(norm.size, decoy.size, @frit)
|
98
99
|
end
|
99
100
|
end
|
100
101
|
|
data/lib/validator.rb
CHANGED
@@ -167,11 +167,11 @@ end
|
|
167
167
|
# normal hits (which may be true or false) and the second are decoy hits.
|
168
168
|
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
169
169
|
module Precision::Calculator::Decoy
|
170
|
-
def calc_precision(num_normal, num_decoy,
|
170
|
+
def calc_precision(num_normal, num_decoy, frit=1.0)
|
171
171
|
# will calculate as floats in case fractional amounts passed in for
|
172
172
|
# whatever reason
|
173
173
|
num_normal_f = num_normal.to_f
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f *
|
174
|
+
num_true_pos = num_normal_f - (num_decoy.to_f * frit)
|
175
175
|
precision =
|
176
176
|
if num_normal_f == 0.0
|
177
177
|
if num_decoy.to_f > 0.0
|
data/specs/pi_zero_spec.rb
CHANGED
@@ -18,7 +18,7 @@ describe PiZero do
|
|
18
18
|
@sorted_pvals = [0.0, 0.1, 0.223, 0.24, 0.55, 0.68, 0.68, 0.90, 0.98, 1.0]
|
19
19
|
end
|
20
20
|
|
21
|
-
|
21
|
+
xit 'calculates instantaneous pi_0 hats' do
|
22
22
|
answ = PiZero.pi_zero_hats(@sorted_pvals, :step => 0.1)
|
23
23
|
exp_lambdas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
24
24
|
passing_threshold = [9, 8, 8, 6, 6, 6, 5, 3, 3, 2]
|
@@ -36,7 +36,7 @@ describe PiZero do
|
|
36
36
|
# still working on this one
|
37
37
|
end
|
38
38
|
|
39
|
-
|
39
|
+
xit 'can find a plateau height' do
|
40
40
|
x = [0.0, 0.01, 0.012, 0.13, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
|
41
41
|
y = [1.0, 0.95, 0.92, 0.8, 0.7, 0.6, 0.55, 0.58, 0.62, 0.53, 0.54, 0.59, 0.4, 0.72]
|
42
42
|
z = PiZero.plateau_height(x,y)
|
@@ -47,7 +47,7 @@ describe PiZero do
|
|
47
47
|
#sleep(8)
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
xit 'can calculate p values for SEQUEST hits' do
|
51
51
|
class FakeSequest ; attr_accessor :xcorr ; def initialize(xcorr) ; @xcorr = xcorr ; end ; end
|
52
52
|
|
53
53
|
target = []
|
@@ -68,13 +68,13 @@ describe PiZero do
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
|
71
|
+
xit 'can calculate pi zero for target/decoy booleans' do
|
72
72
|
pi_zero = PiZero.pi_zero_from_booleans(@bools)
|
73
73
|
# frozen
|
74
74
|
pi_zero.should be_close(0.03522869, 0.0001)
|
75
75
|
end
|
76
76
|
|
77
|
-
it 'can calculate
|
77
|
+
it 'can calculate frit for groups of hits' do
|
78
78
|
# setup
|
79
79
|
targets = [4,3,8,3,5,3,4,5,4]
|
80
80
|
decoys = [0,2,2,3,5,7,8,8,8]
|
@@ -95,9 +95,20 @@ describe PiZero do
|
|
95
95
|
decoys_summed << sum
|
96
96
|
end
|
97
97
|
zipped = targets_summed.zip(decoys_summed)
|
98
|
-
|
98
|
+
frit = PiZero.frit_from_groups(zipped)
|
99
99
|
# frozen
|
100
|
-
|
100
|
+
frit.should be_close(0.384064, 0.00001)
|
101
|
+
end
|
102
|
+
|
103
|
+
xit 'can calcuate pi zero for total number of hits and precision' do
|
104
|
+
tot_hits = [1,10,20,30,50,200]
|
105
|
+
precision = [1.0, 1.0, 0.85, 0.80, 0.7, 0.5]
|
106
|
+
reply = PiZero.frit_from_precision(tot_hits, precision)
|
107
|
+
puts "ANSER"
|
108
|
+
# frozen
|
109
|
+
puts reply
|
110
|
+
#reply.should be_close()
|
111
|
+
|
101
112
|
end
|
102
113
|
|
103
114
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-11-06 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|