mspire 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/changelog.txt +8 -0
- data/lib/mspire.rb +1 -1
- data/lib/pi_zero.rb +75 -58
- data/lib/qvalue.rb +41 -32
- data/lib/spec_id/precision/prob/cmdline.rb +4 -1
- data/lib/spec_id/precision/prob.rb +1 -1
- data/lib/spec_id/srf.rb +30 -0
- data/lib/validator/cmdline.rb +6 -4
- data/lib/validator/decoy.rb +9 -8
- data/lib/validator.rb +2 -2
- data/specs/pi_zero_spec.rb +18 -7
- metadata +2 -2
data/changelog.txt
CHANGED
@@ -213,3 +213,11 @@ difference between good hits and bad hits
|
|
213
213
|
1. using pi_zero instead of decoy_to_target_ratio. While all tests are
|
214
214
|
passing, this release should be considered experimental with the use of any
|
215
215
|
target-decoy validation.
|
216
|
+
|
217
|
+
## version 0.4.6
|
218
|
+
1. added NOTE to --to_qvalues option to include all results (no low prob
|
219
|
+
filter)
|
220
|
+
|
221
|
+
## version 0.4.7
|
222
|
+
1. Added ability to quickly grab sequest params out of a .SRF file
|
223
|
+
2. Added helpful runtime error if print_duplicate_references is 0.
|
data/lib/mspire.rb
CHANGED
data/lib/pi_zero.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'rsruby'
|
2
|
-
require 'gsl'
|
3
2
|
require 'vec'
|
4
3
|
require 'vec/r'
|
5
4
|
require 'enumerator'
|
@@ -21,7 +20,8 @@ module PiZero
|
|
21
20
|
pi_zeros = [] # pi_0
|
22
21
|
total = sorted_pvals.size # m
|
23
22
|
|
24
|
-
# totally
|
23
|
+
# totally inefficient implementation (with correct logic):
|
24
|
+
# TODO: implement this efficiently
|
25
25
|
start.step(stop, step) do |lam|
|
26
26
|
lambdas << lam
|
27
27
|
(greater, less) = sorted_pvals.partition {|pval| pval > lam }
|
@@ -30,6 +30,36 @@ module PiZero
|
|
30
30
|
[lambdas, pi_zeros]
|
31
31
|
end
|
32
32
|
|
33
|
+
=begin
|
34
|
+
def plateau_height_with_gsl(x, y)
|
35
|
+
require 'gsl'
|
36
|
+
x_deltas = (0...(x.size-1)).to_a.map do |i|
|
37
|
+
x[i+1] - x[i]
|
38
|
+
end
|
39
|
+
y_deltas = (0...(y.size-1)).to_a.map do |i|
|
40
|
+
y[i+1] - y[i]
|
41
|
+
end
|
42
|
+
new_xs = x.dup
|
43
|
+
new_ys = y.dup
|
44
|
+
x_deltas.reverse.each do |delt|
|
45
|
+
new_xs.push( new_xs.last + delt )
|
46
|
+
end
|
47
|
+
|
48
|
+
y_cnt = y.size
|
49
|
+
y_deltas.reverse.each do |delt|
|
50
|
+
y_cnt -= 1
|
51
|
+
new_ys.push( y[y_cnt] - delt )
|
52
|
+
end
|
53
|
+
|
54
|
+
x_vec = GSL::Vector.alloc(new_xs)
|
55
|
+
y_vec = GSL::Vector.alloc(new_ys)
|
56
|
+
coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3)
|
57
|
+
coef.eval(x.last)
|
58
|
+
#x2 = GSL::Vector::linspace(0,2.4,20)
|
59
|
+
#graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4")
|
60
|
+
end
|
61
|
+
=end
|
62
|
+
|
33
63
|
# expecting x and y to make a scatter plot descending to a plateau on the
|
34
64
|
# right side (which is assumed to be of increasing noise as it goes to the
|
35
65
|
# right)
|
@@ -42,46 +72,19 @@ module PiZero
|
|
42
72
|
# ** *** * *
|
43
73
|
# ***** **** ***
|
44
74
|
def plateau_height(x, y)
|
45
|
-
=begin
|
46
|
-
require 'gsl'
|
47
|
-
x_deltas = (0...(x.size-1)).to_a.map do |i|
|
48
|
-
x[i+1] - x[i]
|
49
|
-
end
|
50
|
-
y_deltas = (0...(y.size-1)).to_a.map do |i|
|
51
|
-
y[i+1] - y[i]
|
52
|
-
end
|
53
|
-
new_xs = x.dup
|
54
|
-
new_ys = y.dup
|
55
|
-
x_deltas.reverse.each do |delt|
|
56
|
-
new_xs.push( new_xs.last + delt )
|
57
|
-
end
|
58
|
-
|
59
|
-
y_cnt = y.size
|
60
|
-
y_deltas.reverse.each do |delt|
|
61
|
-
y_cnt -= 1
|
62
|
-
new_ys.push( y[y_cnt] - delt )
|
63
|
-
end
|
64
|
-
|
65
|
-
x_vec = GSL::Vector.alloc(new_xs)
|
66
|
-
y_vec = GSL::Vector.alloc(new_ys)
|
67
|
-
coef, cov, chisq, status = GSL::Poly.fit(x_vec,y_vec, 3)
|
68
|
-
coef.eval(x.last)
|
69
|
-
#x2 = GSL::Vector::linspace(0,2.4,20)
|
70
|
-
#graph([x_vec,y_vec], [x2, coef.eval(x2)], "-C -g 3 -S 4")
|
71
|
-
=end
|
72
|
-
|
73
75
|
r = RSRuby.instance
|
74
76
|
answ = r.smooth_spline(x,y, :df => 3)
|
75
77
|
## to plot it!
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
r.plot(x,y, :ylab=>"pi_zeros or frit")
|
79
|
+
r.lines(answ['x'], answ['y'])
|
80
|
+
r.points(answ['x'], answ['y'])
|
81
|
+
sleep(4)
|
80
82
|
|
81
83
|
answ['y'].last
|
82
84
|
end
|
83
85
|
|
84
86
|
def plateau_exponential(x,y)
|
87
|
+
require 'gsl'
|
85
88
|
xvec = GSL::Vector.alloc(x)
|
86
89
|
yvec = GSL::Vector.alloc(y)
|
87
90
|
a2, b2, = GSL::Fit.linear(xvec, GSL::Sf::log(yvec))
|
@@ -93,9 +96,10 @@ module PiZero
|
|
93
96
|
|
94
97
|
end
|
95
98
|
|
96
|
-
# returns a conservative (but close) estimate of pi_0 given
|
99
|
+
# returns a conservative (but close) estimate of pi_0 given p-values
|
97
100
|
# following Storey et al. 2003, PNAS.
|
98
|
-
def pi_zero(
|
101
|
+
def pi_zero(pvals)
|
102
|
+
sorted_pvals = pvals.sort
|
99
103
|
plateau_height( *(pi_zero_hats(sorted_pvals)) )
|
100
104
|
end
|
101
105
|
|
@@ -161,6 +165,8 @@ module PiZero
|
|
161
165
|
p_values(target_hits.map {|v| v.xcorr}, new_decoy_vals )
|
162
166
|
end
|
163
167
|
|
168
|
+
#### NEED TO VERIFY if this is PIT or PI_ZERO!
|
169
|
+
=begin
|
164
170
|
# takes a list of booleans with true being a target hit and false being a
|
165
171
|
# decoy hit and returns the pi_zero using the smooth method
|
166
172
|
# Should be ordered from best to worst (i.e., one expects more true values
|
@@ -184,12 +190,32 @@ module PiZero
|
|
184
190
|
ys.reverse!
|
185
191
|
plateau_height(xs, ys)
|
186
192
|
end
|
193
|
+
=end
|
194
|
+
|
195
|
+
# returns fraction of incorrect target hits (frit) (this is the percent
|
196
|
+
# incorrect targets [PIT] expressed as a fraction rather than percent)
|
197
|
+
# takes two parallel arrays consisting of the total number of hits (this
|
198
|
+
# will typically be the total # target hits) at that point and the
|
199
|
+
# precision (ranging from: [0,1]) (typically determined by counting the
|
200
|
+
# number of decoy hits). Expects the number of total hits to be
|
201
|
+
# monotonically increasing and the precision to roughly start high and
|
202
|
+
# decrease as more hits (of lesser quality) are added.
|
203
|
+
def frit_from_precision(total_num_hits_ar, precision_ar)
|
204
|
+
instant_pi_zeros = []
|
205
|
+
total_num_hits_ar.reverse.zip(precision_ar.reverse).each_cons(2) do |dp1, dp0|
|
206
|
+
(x1, y1) = dp1
|
207
|
+
(x0, y0) = dp0
|
208
|
+
instant_pi_zeros << ((x1 * (1.0 - y1)) - (x0 * (1.0 - y0) )) / (x1 - x0)
|
209
|
+
end
|
210
|
+
instant_pi_zeros.reverse!
|
211
|
+
plateau_height(total_num_hits_ar[1..-1], instant_pi_zeros)
|
212
|
+
end
|
187
213
|
|
188
214
|
# Takes an array of doublets ([[int, int], [int, int]...]) where the first
|
189
215
|
# value is the number of target hits and the second is the number of decoy
|
190
216
|
# hits. Expects that best hits are at the beginning of the list. Assumes
|
191
|
-
# that each sum is a subset
|
192
|
-
#
|
217
|
+
# that each sum is a subset of the following group (shown as actual hits
|
218
|
+
# rather than number of hits):
|
193
219
|
#
|
194
220
|
# [[target, target, target, decoy], [target, target, target, decoy,
|
195
221
|
# target, decoy, target], [target, target, target, decoy, target,
|
@@ -197,31 +223,22 @@ module PiZero
|
|
197
223
|
#
|
198
224
|
# This assumption may be relaxed somewhat and should still give good
|
199
225
|
# results.
|
200
|
-
def
|
201
|
-
|
226
|
+
def frit_from_groups(array_of_doublets)
|
227
|
+
frits = []
|
202
228
|
array_of_doublets.reverse.each_cons(2) do |two_doublets|
|
203
229
|
bigger, smaller = two_doublets
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
if
|
208
|
-
|
230
|
+
num_targets = bigger[0] - smaller[0]
|
231
|
+
num_decoy = bigger[1] - smaller[1]
|
232
|
+
num_targets = 0 if num_targets < 0
|
233
|
+
num_decoy = 0 if num_targets < 0
|
234
|
+
if num_decoy > 0
|
235
|
+
frits << (num_targets.to_f / num_decoy)
|
209
236
|
end
|
210
237
|
end
|
211
|
-
|
212
|
-
xs = (0...(
|
213
|
-
plateau_height(xs,
|
238
|
+
frits.reverse!
|
239
|
+
xs = (0...(frits.size)).to_a
|
240
|
+
plateau_height(xs, frits)
|
214
241
|
end
|
215
242
|
|
216
243
|
end
|
217
|
-
|
218
|
-
|
219
|
-
end
|
220
|
-
|
221
|
-
if $0 == __FILE__
|
222
|
-
#xcorrs = IO.readlines("/home/jtprince/xcorr_hist/all_xcorrs.yada").first.chomp.split(/\s+/).map {|v| v.to_f }
|
223
|
-
#PiZero.p_values_for_sequest(
|
224
|
-
#File.open("newtail.yada", 'w') {|out| out.puts new_dist.join(" ") }
|
225
|
-
|
226
|
-
|
227
244
|
end
|
data/lib/qvalue.rb
CHANGED
@@ -79,41 +79,50 @@ class VecD
|
|
79
79
|
end
|
80
80
|
|
81
81
|
pi_zeros = lambda_vals.map {|val| self.pi_zero_at_lambda(val) }
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
(0...lsz).each do |i|
|
106
|
-
pi0_boot[i] = ( p_boot.select{|v| v > lambda_vals[i] }.size.to_f/p_boot.size ) / (1-lambda_vals[i])
|
82
|
+
|
83
|
+
r = RSRuby.instance
|
84
|
+
r.plot(lambda_vals,pi_zeros, :ylab=>"instantaneous pi_zeros")
|
85
|
+
answ = r.smooth_spline(lambda_vals, pi_zeros, :df => Default_smooth_df)
|
86
|
+
r.lines(answ['x'], answ['y'])
|
87
|
+
r.points(answ['x'], answ['y'])
|
88
|
+
sleep(20)
|
89
|
+
|
90
|
+
answer =
|
91
|
+
if lambda_vals.size == 1
|
92
|
+
pi_zeros.first
|
93
|
+
else
|
94
|
+
case method
|
95
|
+
when :smooth
|
96
|
+
r = RSRuby.instance
|
97
|
+
calc_pi_zero = lambda do |_pi_zeros|
|
98
|
+
hash = r.smooth_spline(lambda_vals, _pi_zeros, :df => Default_smooth_df)
|
99
|
+
hash['y'][VecD.new(lambda_vals).max_indices.max]
|
100
|
+
end
|
101
|
+
if log_transform
|
102
|
+
pi_zeros.log_space {|log_vals| calc_pi_zero.call(log_vals) }
|
103
|
+
else
|
104
|
+
calc_pi_zero.call(pi_zeros)
|
107
105
|
end
|
108
|
-
|
106
|
+
when :bootstrap
|
107
|
+
min_pi0 = pi_zeros.min
|
108
|
+
lsz = lambda_vals.size
|
109
|
+
mse = VecD.new(lsz, 0)
|
110
|
+
pi0_boot = VecD.new(lsz, 0)
|
111
|
+
sz = self.size
|
112
|
+
100.times do # for(i in 1:100) {
|
113
|
+
p_boot = self.shuffle
|
114
|
+
(0...lsz).each do |i|
|
115
|
+
pi0_boot[i] = ( p_boot.select{|v| v > lambda_vals[i] }.size.to_f/p_boot.size ) / (1-lambda_vals[i])
|
116
|
+
end
|
117
|
+
mse = mse + ( (pi0_boot-min_pi0)**2 )
|
118
|
+
end
|
119
|
+
# pi0 <- min(pi0[mse==min(mse)])
|
120
|
+
pi_zero = pi_zeros.values_at(*(mse.min_indices)).min
|
121
|
+
[pi_zero,1].min
|
122
|
+
else
|
123
|
+
raise ArgumentError, ":pi_zero_method must be :smooth or :bootstrap!"
|
109
124
|
end
|
110
|
-
# pi0 <- min(pi0[mse==min(mse)])
|
111
|
-
pi_zero = pi_zeros.values_at(*(mse.min_indices)).min
|
112
|
-
[pi_zero,1].min
|
113
|
-
else
|
114
|
-
raise ArgumentError, ":pi_zero_method must be :smooth or :bootstrap!"
|
115
125
|
end
|
116
|
-
end
|
117
126
|
end
|
118
127
|
|
119
128
|
# Returns a VecD filled with parallel q-values
|
@@ -16,7 +16,10 @@ module SpecID
|
|
16
16
|
:to_qvalues => ['--to_qvalues', "transform probabilities into q-values",
|
17
17
|
"(includes pi_0 correction)",
|
18
18
|
"uses PROB [TYPE] if given and supercedes",
|
19
|
-
"the prob validation type"
|
19
|
+
"the prob validation type",
|
20
|
+
"*NOTE: include all PeptideProphet results",
|
21
|
+
"(don't use any low prob cutoff) for",
|
22
|
+
"accurate results!"],
|
20
23
|
:prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
|
21
24
|
"TYPE = nsp [default] prophet nsp",
|
22
25
|
" (nsp also should be used for PeptideProphet results)",
|
@@ -226,7 +226,7 @@ class SpecID::Precision::Prob
|
|
226
226
|
else
|
227
227
|
out[:probabilities] = probabilities
|
228
228
|
end
|
229
|
-
out[:pephits] = ordered_peps # just in case they want to see
|
229
|
+
# out[:pephits] = ordered_peps # just in case they want to see
|
230
230
|
out[:count] = num_pephits
|
231
231
|
out[:aaseqs] = pepstrings
|
232
232
|
out[:charges] = pepcharges
|
data/lib/spec_id/srf.rb
CHANGED
@@ -73,6 +73,7 @@ class SRFGroup
|
|
73
73
|
IO.readlines(file).grep(/\w/).map {|v| v.chomp }
|
74
74
|
end
|
75
75
|
|
76
|
+
|
76
77
|
# if srfs were read in separately, then the proteins will need to be merged
|
77
78
|
# by their reference
|
78
79
|
def merge_different_sets(srfs)
|
@@ -184,6 +185,20 @@ class SRF
|
|
184
185
|
|
185
186
|
attr_accessor :filtered_by_precursor_mass_tolerance
|
186
187
|
|
188
|
+
# returns a Sequest::Params object
|
189
|
+
def self.get_sequest_params(filename)
|
190
|
+
# split the file in half and only read the second half (since we can be
|
191
|
+
# confident that the params file will be there!)
|
192
|
+
File.open(filename) do |handle|
|
193
|
+
halfway = handle.stat.size / 2
|
194
|
+
handle.seek halfway
|
195
|
+
last_half = handle.read
|
196
|
+
params_start_index = last_half.rindex('[SEQUEST]') + halfway
|
197
|
+
handle.seek(params_start_index)
|
198
|
+
Sequest::Params.new.parse_handle(handle)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
187
202
|
def dta_start_byte
|
188
203
|
case @version
|
189
204
|
when '3.2' ; 3260
|
@@ -468,10 +483,24 @@ class SRF
|
|
468
483
|
|
469
484
|
# returns self
|
470
485
|
def from_file(filename, peps, global_ref_hash)
|
486
|
+
dups = SRF.get_sequest_params(filename).print_duplicate_references
|
487
|
+
if dups == '0'
|
488
|
+
raise RuntimeError, <<END
|
489
|
+
|
490
|
+
***************************************************************************
|
491
|
+
Sorry, but the SRF reader cannot read this file!
|
492
|
+
.srf files must currently be created with print_duplicate_references > 0
|
493
|
+
(This is how the srf object can link peptides with proteins!)
|
494
|
+
To capture all duplicate references, set the sequest parameter
|
495
|
+
'print_duplicate_references' to 100 or greater.
|
496
|
+
***************************************************************************
|
497
|
+
END
|
498
|
+
end
|
471
499
|
|
472
500
|
File.open(filename, "rb") do |fh|
|
473
501
|
@header = SRF::Header.new.from_handle(fh)
|
474
502
|
@version = @header.version
|
503
|
+
|
475
504
|
unpack_35 = case @version
|
476
505
|
when '3.2'
|
477
506
|
false
|
@@ -824,6 +853,7 @@ class SRF::OUT::Pep
|
|
824
853
|
end
|
825
854
|
|
826
855
|
def self.read_extra_references(fh, num_extra_references, pep_hits, global_ref_hash)
|
856
|
+
p num_extra_references
|
827
857
|
num_extra_references.times do
|
828
858
|
# 80 bytes total (with index number)
|
829
859
|
pep = pep_hits[fh.read(8).unpack('x4I').first - 1]
|
data/lib/validator/cmdline.rb
CHANGED
@@ -41,7 +41,7 @@ class Validator::Cmdline
|
|
41
41
|
{
|
42
42
|
:hits_together => true,
|
43
43
|
:decoy_on_match => true,
|
44
|
-
:
|
44
|
+
:frit => 1.0, # fraction incorrect targets (like PIT)
|
45
45
|
},
|
46
46
|
:bad_aa =>
|
47
47
|
{
|
@@ -61,7 +61,7 @@ class Validator::Cmdline
|
|
61
61
|
:ties => true,
|
62
62
|
}
|
63
63
|
COMMAND_LINE = {
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,
|
64
|
+
:decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
65
65
|
"FILENAME of separate search on decoys.",
|
66
66
|
"All regular expressions must be surrounded by '/'",
|
67
67
|
"(no extended options [trailing modifiers]).",
|
@@ -72,7 +72,8 @@ class Validator::Cmdline
|
|
72
72
|
" --decoy '/^\\s*REVERSE/'",
|
73
73
|
"If decoys proteins were searched in a separate file,",
|
74
74
|
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
75
|
-
"
|
75
|
+
"FRIT = Fraction Incorrect Targets (like",
|
76
|
+
"the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
|
76
77
|
"DOM = *true/false, decoy on match",],
|
77
78
|
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
78
79
|
"fasta file containing the true protein hits"],
|
@@ -156,7 +157,7 @@ class Validator::Cmdline
|
|
156
157
|
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
157
158
|
first_arg
|
158
159
|
end
|
159
|
-
val_opts[:
|
160
|
+
val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
|
160
161
|
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
161
162
|
myargs.push(val_opts)
|
162
163
|
opts[:validators].push(myargs)
|
@@ -295,6 +296,7 @@ class Validator::Cmdline
|
|
295
296
|
val = 1e-9 if val == 0
|
296
297
|
val
|
297
298
|
end
|
299
|
+
File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
|
298
300
|
pvals = VecD.new(pvals)
|
299
301
|
#qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
|
300
302
|
qvals = pvals.qvalues
|
data/lib/validator/decoy.rb
CHANGED
@@ -9,10 +9,11 @@ class Validator::Decoy < Validator
|
|
9
9
|
attr_accessor :decoy_on_match
|
10
10
|
attr_accessor :correct_wins
|
11
11
|
# This is the the number of incorrect target hits over the total decoy hits
|
12
|
-
# The
|
13
|
-
#
|
12
|
+
# The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
|
13
|
+
# The rough, conservative ballpark estimate is the ratio of target hits to
|
14
|
+
# decoy hits. This can be refined by removing the number of true target
|
14
15
|
# hits from the targets used to calculate it.
|
15
|
-
attr_accessor :
|
16
|
+
attr_accessor :frit
|
16
17
|
|
17
18
|
attr_accessor :last_pep_was_decoy
|
18
19
|
|
@@ -25,12 +26,12 @@ class Validator::Decoy < Validator
|
|
25
26
|
DEFAULTS = {
|
26
27
|
:decoy_on_match => true,
|
27
28
|
:correct_wins => true,
|
28
|
-
:
|
29
|
+
:frit => 1.0,
|
29
30
|
}
|
30
31
|
|
31
32
|
def initialize(opts={})
|
32
33
|
merged = DEFAULTS.merge(opts)
|
33
|
-
@constraint, @decoy_on_match, @correct_wins, @
|
34
|
+
@constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
|
34
35
|
end
|
35
36
|
|
36
37
|
# returns [normal, decoy] (?? I think ??)
|
@@ -86,15 +87,15 @@ class Validator::Decoy < Validator
|
|
86
87
|
@normal_peps_just_submitted = normal
|
87
88
|
@increment_normal += normal.size
|
88
89
|
@increment_decoy += decoy.size
|
89
|
-
calc_precision(@increment_normal, @increment_decoy, @
|
90
|
+
calc_precision(@increment_normal, @increment_decoy, @frit)
|
90
91
|
end
|
91
92
|
|
92
93
|
def pephit_precision(peps, separate_peps=nil)
|
93
94
|
if separate_peps
|
94
|
-
calc_precision(peps.size, separate_peps.size, @
|
95
|
+
calc_precision(peps.size, separate_peps.size, @frit)
|
95
96
|
else
|
96
97
|
(norm, decoy) = partition(peps)
|
97
|
-
calc_precision(norm.size, decoy.size, @
|
98
|
+
calc_precision(norm.size, decoy.size, @frit)
|
98
99
|
end
|
99
100
|
end
|
100
101
|
|
data/lib/validator.rb
CHANGED
@@ -167,11 +167,11 @@ end
|
|
167
167
|
# normal hits (which may be true or false) and the second are decoy hits.
|
168
168
|
# edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
|
169
169
|
module Precision::Calculator::Decoy
|
170
|
-
def calc_precision(num_normal, num_decoy,
|
170
|
+
def calc_precision(num_normal, num_decoy, frit=1.0)
|
171
171
|
# will calculate as floats in case fractional amounts passed in for
|
172
172
|
# whatever reason
|
173
173
|
num_normal_f = num_normal.to_f
|
174
|
-
num_true_pos = num_normal_f - (num_decoy.to_f *
|
174
|
+
num_true_pos = num_normal_f - (num_decoy.to_f * frit)
|
175
175
|
precision =
|
176
176
|
if num_normal_f == 0.0
|
177
177
|
if num_decoy.to_f > 0.0
|
data/specs/pi_zero_spec.rb
CHANGED
@@ -18,7 +18,7 @@ describe PiZero do
|
|
18
18
|
@sorted_pvals = [0.0, 0.1, 0.223, 0.24, 0.55, 0.68, 0.68, 0.90, 0.98, 1.0]
|
19
19
|
end
|
20
20
|
|
21
|
-
|
21
|
+
xit 'calculates instantaneous pi_0 hats' do
|
22
22
|
answ = PiZero.pi_zero_hats(@sorted_pvals, :step => 0.1)
|
23
23
|
exp_lambdas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
24
24
|
passing_threshold = [9, 8, 8, 6, 6, 6, 5, 3, 3, 2]
|
@@ -36,7 +36,7 @@ describe PiZero do
|
|
36
36
|
# still working on this one
|
37
37
|
end
|
38
38
|
|
39
|
-
|
39
|
+
xit 'can find a plateau height' do
|
40
40
|
x = [0.0, 0.01, 0.012, 0.13, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
|
41
41
|
y = [1.0, 0.95, 0.92, 0.8, 0.7, 0.6, 0.55, 0.58, 0.62, 0.53, 0.54, 0.59, 0.4, 0.72]
|
42
42
|
z = PiZero.plateau_height(x,y)
|
@@ -47,7 +47,7 @@ describe PiZero do
|
|
47
47
|
#sleep(8)
|
48
48
|
end
|
49
49
|
|
50
|
-
|
50
|
+
xit 'can calculate p values for SEQUEST hits' do
|
51
51
|
class FakeSequest ; attr_accessor :xcorr ; def initialize(xcorr) ; @xcorr = xcorr ; end ; end
|
52
52
|
|
53
53
|
target = []
|
@@ -68,13 +68,13 @@ describe PiZero do
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
|
71
|
+
xit 'can calculate pi zero for target/decoy booleans' do
|
72
72
|
pi_zero = PiZero.pi_zero_from_booleans(@bools)
|
73
73
|
# frozen
|
74
74
|
pi_zero.should be_close(0.03522869, 0.0001)
|
75
75
|
end
|
76
76
|
|
77
|
-
it 'can calculate
|
77
|
+
it 'can calculate frit for groups of hits' do
|
78
78
|
# setup
|
79
79
|
targets = [4,3,8,3,5,3,4,5,4]
|
80
80
|
decoys = [0,2,2,3,5,7,8,8,8]
|
@@ -95,9 +95,20 @@ describe PiZero do
|
|
95
95
|
decoys_summed << sum
|
96
96
|
end
|
97
97
|
zipped = targets_summed.zip(decoys_summed)
|
98
|
-
|
98
|
+
frit = PiZero.frit_from_groups(zipped)
|
99
99
|
# frozen
|
100
|
-
|
100
|
+
frit.should be_close(0.384064, 0.00001)
|
101
|
+
end
|
102
|
+
|
103
|
+
xit 'can calcuate pi zero for total number of hits and precision' do
|
104
|
+
tot_hits = [1,10,20,30,50,200]
|
105
|
+
precision = [1.0, 1.0, 0.85, 0.80, 0.7, 0.5]
|
106
|
+
reply = PiZero.frit_from_precision(tot_hits, precision)
|
107
|
+
puts "ANSER"
|
108
|
+
# frozen
|
109
|
+
puts reply
|
110
|
+
#reply.should be_close()
|
111
|
+
|
101
112
|
end
|
102
113
|
|
103
114
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mspire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Prince
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-11-06 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|