mspire 0.1.5 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +5 -2
- data/bin/bioworks_to_pepxml.rb +84 -40
- data/bin/fasta_shaker.rb +100 -0
- data/bin/filter_spec_id.rb +185 -23
- data/bin/gi2annot.rb +2 -110
- data/bin/id_class_anal.rb +31 -21
- data/bin/id_precision.rb +12 -8
- data/bin/{false_positive_rate.rb → precision.rb} +1 -1
- data/bin/protein_summary.rb +55 -62
- data/changelog.txt +34 -0
- data/lib/align.rb +0 -1
- data/lib/fasta.rb +88 -24
- data/lib/gi.rb +114 -0
- data/lib/roc.rb +64 -58
- data/lib/spec_id/aa_freqs.rb +166 -0
- data/lib/spec_id/bioworks.rb +5 -1
- data/lib/spec_id/precision.rb +427 -0
- data/lib/spec_id/proph.rb +2 -2
- data/lib/spec_id/sequest.rb +810 -113
- data/lib/spec_id/srf.rb +486 -0
- data/lib/spec_id.rb +107 -23
- data/release_notes.txt +11 -0
- data/script/estimate_fpr_by_cysteine.rb +226 -0
- data/script/filter-peps.rb +3 -3
- data/script/find_cysteine_background.rb +137 -0
- data/script/gen_database_searching.rb +11 -7
- data/script/genuine_tps_and_probs.rb +136 -0
- data/script/top_hit_per_scan.rb +5 -2
- data/test/tc_aa_freqs.rb +59 -0
- data/test/tc_bioworks.rb +6 -1
- data/test/tc_bioworks_to_pepxml.rb +25 -18
- data/test/tc_fasta.rb +81 -3
- data/test/tc_fasta_shaker.rb +147 -0
- data/test/tc_gi.rb +20 -0
- data/test/tc_id_class_anal.rb +9 -12
- data/test/tc_id_precision.rb +12 -11
- data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
- data/test/tc_protein_summary.rb +31 -22
- data/test/tc_roc.rb +95 -50
- data/test/tc_sequest.rb +212 -145
- data/test/tc_spec.rb +10 -5
- data/test/tc_spec_id.rb +0 -2
- data/test/tc_spec_id_xml.rb +36 -0
- data/test/tc_srf.rb +216 -0
- metadata +35 -21
- data/lib/spec_id/false_positive_rate.rb +0 -476
- data/test/tc_gi2annot.rb +0 -12
data/test/tc_id_precision.rb
CHANGED
@@ -13,20 +13,21 @@ class IDPrecisionTest < Test::Unit::TestCase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_usage
|
16
|
-
puts "RUNNING: #{@cmd}"
|
16
|
+
#puts "RUNNING: #{@cmd}"
|
17
17
|
assert_match(/usage:/, `#{@cmd}`)
|
18
18
|
end
|
19
19
|
|
20
20
|
## freeze the output
|
21
21
|
def test_basic
|
22
22
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml}"
|
23
|
-
puts "RUNNING: #{cmd}"
|
23
|
+
#puts "RUNNING: #{cmd}"
|
24
24
|
reply = `#{cmd}`
|
25
25
|
string =<<END
|
26
|
+
# NH = number of hits
|
26
27
|
# TP = true positives
|
27
28
|
# FP = false positives
|
28
29
|
# PR = precision = TP/(TP+FP)
|
29
|
-
PepProts:
|
30
|
+
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
30
31
|
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
31
32
|
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
32
33
|
125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
|
@@ -34,23 +35,23 @@ PepProts: TP,PepProts: PR,SeqCharge: TP,SeqCharge: PR,Scan(TopHit): TP,Scan(TopH
|
|
34
35
|
186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
|
35
36
|
193, 1.0, 94, 1.0, 168, 1.0, 193, 1.0, 170, 1.0, 193, 1.0
|
36
37
|
204, 1.0, 95, 1.0, 169, 1.0, 204, 1.0, 171, 1.0, 204, 1.0
|
37
|
-
212, 1.0, 97, 0.
|
38
|
-
214, 0.
|
39
|
-
216, 0.
|
40
|
-
227, 0.
|
41
|
-
228, 0.
|
42
|
-
229, 0.
|
38
|
+
212, 1.0, 97, 0.989690721649485, 171, 0.994152046783626, 212, 1.0, 173, 0.994219653179191, 212, 1.0
|
39
|
+
214, 0.995327102803738, 99, 0.98989898989899, 172, 0.994186046511628, 214, 0.995327102803738, 175, 0.994285714285714, 214, 0.995327102803738
|
40
|
+
216, 0.99537037037037, 106, 0.990566037735849, 180, 0.994444444444444, 216, 0.99537037037037, 183, 0.994535519125683, 216, 0.99537037037037
|
41
|
+
227, 0.995594713656388, 107, 0.990654205607477, 181, 0.994475138121547, 227, 0.995594713656388, 184, 0.994565217391304, 227, 0.995594713656388
|
42
|
+
228, 0.995614035087719, 108, 0.981481481481482, 182, 0.989010989010989, 228, 0.995614035087719, 185, 0.989189189189189, 228, 0.995614035087719
|
43
|
+
229, 0.991266375545852, , , , , 229, 0.991266375545852, , , 229, 0.991266375545852
|
43
44
|
END
|
44
45
|
assert_equal(string, reply)
|
45
46
|
end
|
46
47
|
|
47
48
|
def test_basic_with_area
|
48
49
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
|
49
|
-
puts "RUNNING: #{cmd}"
|
50
|
+
#puts "RUNNING: #{cmd}"
|
50
51
|
reply = `#{cmd}`
|
51
52
|
string =<<END
|
52
53
|
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
53
|
-
|
54
|
+
./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
|
54
55
|
END
|
55
56
|
assert_equal(string, reply, "area under the curve")
|
56
57
|
end
|
@@ -3,18 +3,17 @@ require 'test/unit'
|
|
3
3
|
require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
|
4
4
|
|
5
5
|
|
6
|
-
class
|
6
|
+
class PrecisionTest < Test::Unit::TestCase
|
7
7
|
ROOT_DIR = File.join(File.dirname(__FILE__), "..")
|
8
8
|
|
9
9
|
def initialize(arg)
|
10
10
|
super(arg)
|
11
11
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
12
12
|
@tf_bioworks_esmall_xml = @tfiles + "bioworks_with_INV_small.xml"
|
13
|
-
@tf_bioworks_small_xml = @tfiles + "bioworks_small.xml"
|
14
13
|
@tf_bioworks_shuff = @tfiles + "bioworks_with_SHUFF_small.xml"
|
15
|
-
@cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S
|
16
|
-
@tf_html = "
|
17
|
-
@tf_png = "
|
14
|
+
@cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S precision.rb "
|
15
|
+
@tf_html = @tfiles + "ppv.html"
|
16
|
+
@tf_png = @tfiles + "ppv.png"
|
18
17
|
@nodelete = false
|
19
18
|
end
|
20
19
|
|
@@ -22,20 +21,12 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
22
21
|
assert_match(/Usage:/, `#{@cmd}`)
|
23
22
|
end
|
24
23
|
|
25
|
-
def test_basic_double_dbs
|
26
|
-
output = `#{@cmd} -pg -f #{@tf_bioworks_shuff} #{@tf_bioworks_small_xml} #{@tf_bioworks_small_xml}`
|
27
|
-
assert_match('0.815', output, "precision")
|
28
|
-
assert_match('0.369', output, "Gygi's fpr")
|
29
|
-
assert_match('0.185', output, "fpr")
|
30
|
-
assert_match('106', output, "num true positives")
|
31
|
-
assert_match('24', output, "num false positives")
|
32
|
-
end
|
33
|
-
|
34
24
|
def test_basic_cat
|
35
|
-
output = `#{@cmd} -o
|
25
|
+
output = `#{@cmd} -o #{@tf_html} -f SHUFF_ #{@tf_bioworks_shuff}`
|
36
26
|
puts output
|
37
27
|
|
38
28
|
assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
|
29
|
+
assert_match(/10.*0.3000/m, IO.read(@tf_html), "has values")
|
39
30
|
[@tf_html, @tf_png].each do |file|
|
40
31
|
assert(File.exist?(file), "file #{file} exists")
|
41
32
|
File.unlink(file) unless @nodelete
|
@@ -43,7 +34,9 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
43
34
|
end
|
44
35
|
|
45
36
|
def test_multiple_files
|
46
|
-
output = `#{@cmd} -o
|
37
|
+
output = `#{@cmd} -o #{@tf_html} -f SHUFF_,INV_ #{@tf_bioworks_shuff} #{@tf_bioworks_esmall_xml}`
|
38
|
+
assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
|
39
|
+
assert_match(/1.*1.0000.*1.*1.0000.*0.*0.*15.*0.8667/m, IO.read(@tf_html), "has values")
|
47
40
|
[@tf_html, @tf_png].each do |file|
|
48
41
|
assert(File.exist?(file), "file #{file} exists")
|
49
42
|
File.unlink(file) unless @nodelete
|
@@ -51,17 +44,15 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
51
44
|
end
|
52
45
|
|
53
46
|
def test_area_under_curve
|
54
|
-
file =
|
55
|
-
`#{@cmd} -o #{file} -
|
47
|
+
file = @tfiles + 'ppv_area.txt'
|
48
|
+
`#{@cmd} -o #{file} -a -f SHUFF_ #{@tf_bioworks_shuff}`
|
56
49
|
assert(File.exist?(file), "file #{file} exists")
|
57
50
|
output = IO.read(file)
|
58
|
-
assert_match(/
|
59
|
-
assert_match(/Prec.*7.87189/, output)
|
60
|
-
assert_match(/FPR.*1.12810/, output)
|
51
|
+
assert_match(/Prec.*7.39206/, output, "consistency check")
|
61
52
|
File.unlink file
|
62
53
|
|
63
54
|
outfile = File.join(File.dirname(__FILE__), 'other.html')
|
64
|
-
`#{@cmd} -o #{outfile} -
|
55
|
+
`#{@cmd} -o #{outfile} -f SHUFF_ #{@tf_bioworks_shuff}`
|
65
56
|
File.unlink outfile
|
66
57
|
File.unlink File.join(File.dirname(__FILE__),'other.png')
|
67
58
|
end
|
data/test/tc_protein_summary.rb
CHANGED
@@ -11,8 +11,9 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
11
11
|
def initialize(arg)
|
12
12
|
super(arg)
|
13
13
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
14
|
-
@
|
15
|
-
@
|
14
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
15
|
+
@tf_proph = @tfiles_l + "opd1/000_020-prot.xml"
|
16
|
+
@tf_summary = @tfiles_l + "opd1/000_020-prot.summary.html"
|
16
17
|
@tf_bioworks_small = @tfiles + 'bioworks_small.xml'
|
17
18
|
@tf_bioworks_small_summary_html = @tfiles + 'bioworks_small.summary.html'
|
18
19
|
@tf_proph_cat_inv = @tfiles + 'opd1/opd1_cat_inv_small-prot.xml'
|
@@ -26,13 +27,17 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
26
27
|
assert_match(/usage:/, `#{@cmd}`)
|
27
28
|
end
|
28
29
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
30
|
+
def Xtest_proph_basic
|
31
|
+
if File.exist? @tfiles_l
|
32
|
+
print `#{@cmd} -c 5.0 #{@tf_proph}`
|
33
|
+
assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
|
34
|
+
string = IO.read(@tf_summary)
|
35
|
+
assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
|
36
|
+
assert_match(/16132176/, string)
|
37
|
+
File.unlink(@tf_summary) unless NODELETE
|
38
|
+
else
|
39
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
40
|
+
end
|
36
41
|
end
|
37
42
|
|
38
43
|
def test_bioworks_basic
|
@@ -43,30 +48,34 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
43
48
|
# @TODO: need to freeze the output here
|
44
49
|
end
|
45
50
|
|
46
|
-
def
|
47
|
-
`#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --
|
51
|
+
def test_bioworks_with_precision
|
52
|
+
`#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
|
48
53
|
assert_match('TP : 106', IO.read(@tf_bioworks_small_summary_html))
|
49
54
|
assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
50
55
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
51
56
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
52
57
|
end
|
53
58
|
|
54
|
-
def
|
59
|
+
def Xtest_proph_with_precision
|
55
60
|
#puts @cmd
|
56
|
-
print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --
|
61
|
+
print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --precision`
|
57
62
|
File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
|
58
63
|
File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
|
59
64
|
end
|
60
65
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
def Xtest_peptide_count
|
67
|
+
if File.exist? @tfiles_l
|
68
|
+
print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
|
69
|
+
assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
|
70
|
+
file = IO.read(@tf_peptide_count)
|
71
|
+
assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
|
72
|
+
assert_match("gi|16131996|ref|NP_418595.1|\t1", file)
|
73
|
+
assert_match("gi|16131692|ref|NP_418288.1|\t4", file)
|
74
|
+
File.unlink @tf_peptide_count unless NODELETE
|
75
|
+
else
|
76
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
77
|
+
end
|
69
78
|
end
|
70
|
-
|
79
|
+
|
71
80
|
end
|
72
81
|
|
data/test/tc_roc.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
1
|
|
3
2
|
require 'test/unit'
|
4
3
|
|
@@ -8,42 +7,40 @@ class ROCTest < Test::Unit::TestCase
|
|
8
7
|
|
9
8
|
def initialize(arg)
|
10
9
|
super(arg)
|
11
|
-
@tp_methods = [:tps_and_precision, :tps_and_fpr2, :tps_and_fpr2_times2]
|
12
10
|
end
|
13
11
|
|
14
12
|
def test_area_under_curve
|
15
13
|
x = [1,2,3]
|
16
14
|
y = [2,3,4]
|
17
|
-
|
18
|
-
assert_equal(6, area)
|
15
|
+
_test_auc(6,x,y)
|
19
16
|
|
20
17
|
x = [1,2,3]
|
21
18
|
y = [-2,-3,-4]
|
22
|
-
|
23
|
-
assert_equal(-6, area)
|
19
|
+
_test_auc(-6, x, y)
|
24
20
|
|
25
21
|
x = [1,2,3]
|
26
22
|
y = [4,3,2]
|
27
|
-
|
28
|
-
assert_equal(6, area)
|
23
|
+
_test_auc(6, x, y)
|
29
24
|
|
30
25
|
x = [1,2,3]
|
31
26
|
y = [-4,-3,-2]
|
32
|
-
|
33
|
-
assert_equal(-6, area)
|
27
|
+
_test_auc(-6, x, y)
|
34
28
|
|
35
29
|
x = [4,5,6]
|
36
30
|
y = [2,1,2]
|
37
|
-
|
38
|
-
assert_equal(3, area)
|
31
|
+
_test_auc(3, x, y)
|
39
32
|
|
40
33
|
x = [4,5,6]
|
41
34
|
y = [-2,-1,-2]
|
35
|
+
_test_auc(-3, x, y)
|
36
|
+
end
|
37
|
+
|
38
|
+
def _test_auc(expected_area,x,y)
|
42
39
|
area = ROC.new.area_under_curve(x,y)
|
43
|
-
assert_equal(
|
40
|
+
assert_equal(expected_area, area)
|
44
41
|
end
|
45
42
|
|
46
|
-
|
43
|
+
def test_prep_list
|
47
44
|
t = true
|
48
45
|
f = false
|
49
46
|
x,y = ROC.new.prep_list([[0,f],[1,f],[2,f],[3,t],[3,f],[0,f],[4,f],[1,t],[2,t]])
|
@@ -51,71 +48,119 @@ class ROCTest < Test::Unit::TestCase
|
|
51
48
|
assert_equal([0,0,1,2,3,4], y)
|
52
49
|
end
|
53
50
|
|
54
|
-
def
|
51
|
+
def test_tps_and_ppv
|
55
52
|
tp = %w(1 2 3 4 5 6 6 6 7 8 9 10 10 10 10 11 12 ).collect {|c| c.to_f } # 17 total
|
56
53
|
fp = %w(3.5 4 5 5 5 6 6 6.5 7 8 9 9.5 10 15).collect {|c| c.to_f } # 14 total
|
57
54
|
xe = [1, 2, 3, 4, 5, 8, 9, 10, 11, 15, 16, 17]
|
58
55
|
# 1, 2, 3, 4 5, 6, 7, 8, 9
|
59
56
|
# 10 11 12
|
60
57
|
ye = [1, 1, 1, 4.0/6.0, 0.5, 8.0/(7.0+8.0), 9.0/(9.0+9.0), 0.5, 11.0/(11.0+ 11.0), 15.0/(15.0+13.0), 16.0/(16.0+13.0), 17.0/(17.0+13.0)]
|
61
|
-
|
62
|
-
x, y = ROC.new.tps_and_precision(tp,fp)
|
63
|
-
assert_equal(x.size, y.size)
|
64
|
-
assert_equal(xe, x)
|
65
|
-
assert_equal(ye, y, "complex real-life-like scenario")
|
66
|
-
end
|
58
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye,"complex real-life-like scenario")
|
67
59
|
|
68
|
-
def test_tps_and_precision
|
69
60
|
## leading fp's
|
70
61
|
tp = [1,2,3]
|
71
62
|
fp = [0,0,1,2,3,4]
|
72
63
|
xe = [1,2,3]
|
73
|
-
|
74
|
-
|
75
|
-
[6.0/(1+3), 8.0/(2+4), 10.0/(3+5)],
|
76
|
-
]
|
77
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
64
|
+
ye = [1.0/(1+3), 2.0/(2+4), 3.0/(3+5)]
|
65
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "leading fps")
|
78
66
|
|
79
67
|
## leading tp's
|
80
68
|
tp = [-1,2,3]
|
81
69
|
fp = [0,4]
|
82
70
|
xe = [1,2,3]
|
83
|
-
|
84
|
-
|
85
|
-
[0.0/(1+0), 2.0/(2+1), 2.0/(3+1)],
|
86
|
-
]
|
87
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
71
|
+
ye = [1.0/(1+0), 2.0/(2+1), 3.0/(3+1)]
|
72
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "leading tps")
|
88
73
|
|
89
74
|
## equal tp's leading
|
90
75
|
tp = [0.0001,0.0001,0.0001,2]
|
91
76
|
fp = [0.01,4.0]
|
92
77
|
xe = [3,4]
|
93
|
-
|
94
|
-
|
95
|
-
[0.0/(3+0), 2.0/(4+1)]
|
96
|
-
]
|
97
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
78
|
+
ye = [3.0/(3+0), 4.0/(4+1)]
|
79
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "equal tps leading")
|
98
80
|
|
99
81
|
## equal arrays with some repeated values
|
100
82
|
tp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
101
83
|
fp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
102
84
|
xe = [1,2,4,5,6,7]
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
85
|
+
ye = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
|
86
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "equal arrays with some repeated values")
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
def _test_tps_and_ppv_method(tp,fp,xe,ye,message='')
|
91
|
+
(x,y) = ROC.new.tps_and_ppv(tp,fp)
|
92
|
+
assert_equal(x.size, y.size)
|
93
|
+
assert_equal(xe, x)
|
94
|
+
assert_equal(ye, y, "tps_and_ppv: #{message}")
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
class DecoyROCTest < ROCTest
|
100
|
+
|
101
|
+
def test_pred_tps_ppv__leading_fps
|
102
|
+
## leading fp's
|
103
|
+
hits = [1,2,3]
|
104
|
+
decoys = [0,0,1,2,3,4]
|
105
|
+
num_hits_e = [1,2,3]
|
106
|
+
num_fps = [3,4,5]
|
107
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
108
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
109
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_pred_tps_ppv__leading_tps
|
113
|
+
## leading tp's
|
114
|
+
hits = [-1,2,3]
|
115
|
+
decoys = [0,4]
|
116
|
+
num_hits_e = [1,2,3]
|
117
|
+
num_fps = [0,1,1]
|
118
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
119
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
120
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_pred_tps_ppv__equal_tps_leading
|
124
|
+
hits = [0.0001,0.0001,0.0001,2]
|
125
|
+
decoys = [0.01,4.0]
|
126
|
+
num_hits_e = [3,4]
|
127
|
+
num_fps = [0,1]
|
128
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
129
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
130
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
108
131
|
end
|
109
132
|
|
110
|
-
def
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
133
|
+
def test_pred_tps_ppv__equal_arrays_with_some_repeated_values
|
134
|
+
hits = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
135
|
+
decoys = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
136
|
+
num_hits_e = [1,2,4,5,6,7]
|
137
|
+
num_fps = [1,2,4,5,6,7]
|
138
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
139
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
140
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
141
|
+
end
|
142
|
+
|
143
|
+
def _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
144
|
+
answer = DecoyROC.new.pred_and_tps_and_ppv(hits, decoys)
|
145
|
+
expected = [num_hits_e, tps_e, ppv_e]
|
146
|
+
%w(num_hits num_tps ppv).each_with_index do |cat, i|
|
147
|
+
assert_equal(expected[i], answer[i], cat)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def make_tps_e(num_fps, num_hits_e)
|
152
|
+
tps_e = []
|
153
|
+
num_hits_e.each_with_index do |v,i|
|
154
|
+
tps_e[i] = v - num_fps[i]
|
118
155
|
end
|
156
|
+
tps_e
|
157
|
+
end
|
158
|
+
|
159
|
+
def make_ppv_e(tps_e, num_hits_e)
|
160
|
+
ppv_e = []
|
161
|
+
tps_e.each_with_index {|v,i| ppv_e[i] = v.to_f/num_hits_e[i] }
|
162
|
+
ppv_e
|
119
163
|
end
|
120
164
|
|
121
165
|
end
|
166
|
+
|