mspire 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +5 -2
- data/bin/bioworks_to_pepxml.rb +84 -40
- data/bin/fasta_shaker.rb +100 -0
- data/bin/filter_spec_id.rb +185 -23
- data/bin/gi2annot.rb +2 -110
- data/bin/id_class_anal.rb +31 -21
- data/bin/id_precision.rb +12 -8
- data/bin/{false_positive_rate.rb → precision.rb} +1 -1
- data/bin/protein_summary.rb +55 -62
- data/changelog.txt +34 -0
- data/lib/align.rb +0 -1
- data/lib/fasta.rb +88 -24
- data/lib/gi.rb +114 -0
- data/lib/roc.rb +64 -58
- data/lib/spec_id/aa_freqs.rb +166 -0
- data/lib/spec_id/bioworks.rb +5 -1
- data/lib/spec_id/precision.rb +427 -0
- data/lib/spec_id/proph.rb +2 -2
- data/lib/spec_id/sequest.rb +810 -113
- data/lib/spec_id/srf.rb +486 -0
- data/lib/spec_id.rb +107 -23
- data/release_notes.txt +11 -0
- data/script/estimate_fpr_by_cysteine.rb +226 -0
- data/script/filter-peps.rb +3 -3
- data/script/find_cysteine_background.rb +137 -0
- data/script/gen_database_searching.rb +11 -7
- data/script/genuine_tps_and_probs.rb +136 -0
- data/script/top_hit_per_scan.rb +5 -2
- data/test/tc_aa_freqs.rb +59 -0
- data/test/tc_bioworks.rb +6 -1
- data/test/tc_bioworks_to_pepxml.rb +25 -18
- data/test/tc_fasta.rb +81 -3
- data/test/tc_fasta_shaker.rb +147 -0
- data/test/tc_gi.rb +20 -0
- data/test/tc_id_class_anal.rb +9 -12
- data/test/tc_id_precision.rb +12 -11
- data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
- data/test/tc_protein_summary.rb +31 -22
- data/test/tc_roc.rb +95 -50
- data/test/tc_sequest.rb +212 -145
- data/test/tc_spec.rb +10 -5
- data/test/tc_spec_id.rb +0 -2
- data/test/tc_spec_id_xml.rb +36 -0
- data/test/tc_srf.rb +216 -0
- metadata +35 -21
- data/lib/spec_id/false_positive_rate.rb +0 -476
- data/test/tc_gi2annot.rb +0 -12
data/test/tc_id_precision.rb
CHANGED
@@ -13,20 +13,21 @@ class IDPrecisionTest < Test::Unit::TestCase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_usage
|
16
|
-
puts "RUNNING: #{@cmd}"
|
16
|
+
#puts "RUNNING: #{@cmd}"
|
17
17
|
assert_match(/usage:/, `#{@cmd}`)
|
18
18
|
end
|
19
19
|
|
20
20
|
## freeze the output
|
21
21
|
def test_basic
|
22
22
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml}"
|
23
|
-
puts "RUNNING: #{cmd}"
|
23
|
+
#puts "RUNNING: #{cmd}"
|
24
24
|
reply = `#{cmd}`
|
25
25
|
string =<<END
|
26
|
+
# NH = number of hits
|
26
27
|
# TP = true positives
|
27
28
|
# FP = false positives
|
28
29
|
# PR = precision = TP/(TP+FP)
|
29
|
-
PepProts:
|
30
|
+
PepProts: NH,PepProts: PR,SeqCharge: NH,SeqCharge: PR,Scan(TopHit): NH,Scan(TopHit): PR,Scan(Top10): NH,Scan(Top10): PR,ScanCharge(TopHit): NH,ScanCharge(TopHit): PR,ScanCharge(Top10): NH,ScanCharge(Top10): PR
|
30
31
|
75, 1.0, 37, 1.0, 75, 1.0, 75, 1.0, 75, 1.0, 75, 1.0
|
31
32
|
95, 1.0, 49, 1.0, 95, 1.0, 95, 1.0, 95, 1.0, 95, 1.0
|
32
33
|
125, 1.0, 67, 1.0, 123, 1.0, 125, 1.0, 125, 1.0, 125, 1.0
|
@@ -34,23 +35,23 @@ PepProts: TP,PepProts: PR,SeqCharge: TP,SeqCharge: PR,Scan(TopHit): TP,Scan(TopH
|
|
34
35
|
186, 1.0, 90, 1.0, 161, 1.0, 186, 1.0, 163, 1.0, 186, 1.0
|
35
36
|
193, 1.0, 94, 1.0, 168, 1.0, 193, 1.0, 170, 1.0, 193, 1.0
|
36
37
|
204, 1.0, 95, 1.0, 169, 1.0, 204, 1.0, 171, 1.0, 204, 1.0
|
37
|
-
212, 1.0, 97, 0.
|
38
|
-
214, 0.
|
39
|
-
216, 0.
|
40
|
-
227, 0.
|
41
|
-
228, 0.
|
42
|
-
229, 0.
|
38
|
+
212, 1.0, 97, 0.989690721649485, 171, 0.994152046783626, 212, 1.0, 173, 0.994219653179191, 212, 1.0
|
39
|
+
214, 0.995327102803738, 99, 0.98989898989899, 172, 0.994186046511628, 214, 0.995327102803738, 175, 0.994285714285714, 214, 0.995327102803738
|
40
|
+
216, 0.99537037037037, 106, 0.990566037735849, 180, 0.994444444444444, 216, 0.99537037037037, 183, 0.994535519125683, 216, 0.99537037037037
|
41
|
+
227, 0.995594713656388, 107, 0.990654205607477, 181, 0.994475138121547, 227, 0.995594713656388, 184, 0.994565217391304, 227, 0.995594713656388
|
42
|
+
228, 0.995614035087719, 108, 0.981481481481482, 182, 0.989010989010989, 228, 0.995614035087719, 185, 0.989189189189189, 228, 0.995614035087719
|
43
|
+
229, 0.991266375545852, , , , , 229, 0.991266375545852, , , 229, 0.991266375545852
|
43
44
|
END
|
44
45
|
assert_equal(string, reply)
|
45
46
|
end
|
46
47
|
|
47
48
|
def test_basic_with_area
|
48
49
|
cmd = "#{@cmd} INV_ #{@tf_bioworks_inv_xml} -a"
|
49
|
-
puts "RUNNING: #{cmd}"
|
50
|
+
#puts "RUNNING: #{cmd}"
|
50
51
|
reply = `#{cmd}`
|
51
52
|
string =<<END
|
52
53
|
Filename PepProts SeqCharge Scan(TopHit) Scan(Top10) ScanCharge(TopHit) ScanCharge(Top10)
|
53
|
-
|
54
|
+
./test/tfiles/bioworks_with_INV_small.xml 228.925377117814 107.877585995136 181.929045912105 228.925377117814 184.924437525838 228.925377117814
|
54
55
|
END
|
55
56
|
assert_equal(string, reply, "area under the curve")
|
56
57
|
end
|
@@ -3,18 +3,17 @@ require 'test/unit'
|
|
3
3
|
require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
|
4
4
|
|
5
5
|
|
6
|
-
class
|
6
|
+
class PrecisionTest < Test::Unit::TestCase
|
7
7
|
ROOT_DIR = File.join(File.dirname(__FILE__), "..")
|
8
8
|
|
9
9
|
def initialize(arg)
|
10
10
|
super(arg)
|
11
11
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
12
12
|
@tf_bioworks_esmall_xml = @tfiles + "bioworks_with_INV_small.xml"
|
13
|
-
@tf_bioworks_small_xml = @tfiles + "bioworks_small.xml"
|
14
13
|
@tf_bioworks_shuff = @tfiles + "bioworks_with_SHUFF_small.xml"
|
15
|
-
@cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S
|
16
|
-
@tf_html = "
|
17
|
-
@tf_png = "
|
14
|
+
@cmd = "ruby -I#{File.join(ROOT_DIR, "lib")} -S precision.rb "
|
15
|
+
@tf_html = @tfiles + "ppv.html"
|
16
|
+
@tf_png = @tfiles + "ppv.png"
|
18
17
|
@nodelete = false
|
19
18
|
end
|
20
19
|
|
@@ -22,20 +21,12 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
22
21
|
assert_match(/Usage:/, `#{@cmd}`)
|
23
22
|
end
|
24
23
|
|
25
|
-
def test_basic_double_dbs
|
26
|
-
output = `#{@cmd} -pg -f #{@tf_bioworks_shuff} #{@tf_bioworks_small_xml} #{@tf_bioworks_small_xml}`
|
27
|
-
assert_match('0.815', output, "precision")
|
28
|
-
assert_match('0.369', output, "Gygi's fpr")
|
29
|
-
assert_match('0.185', output, "fpr")
|
30
|
-
assert_match('106', output, "num true positives")
|
31
|
-
assert_match('24', output, "num false positives")
|
32
|
-
end
|
33
|
-
|
34
24
|
def test_basic_cat
|
35
|
-
output = `#{@cmd} -o
|
25
|
+
output = `#{@cmd} -o #{@tf_html} -f SHUFF_ #{@tf_bioworks_shuff}`
|
36
26
|
puts output
|
37
27
|
|
38
28
|
assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
|
29
|
+
assert_match(/10.*0.3000/m, IO.read(@tf_html), "has values")
|
39
30
|
[@tf_html, @tf_png].each do |file|
|
40
31
|
assert(File.exist?(file), "file #{file} exists")
|
41
32
|
File.unlink(file) unless @nodelete
|
@@ -43,7 +34,9 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
43
34
|
end
|
44
35
|
|
45
36
|
def test_multiple_files
|
46
|
-
output = `#{@cmd} -o
|
37
|
+
output = `#{@cmd} -o #{@tf_html} -f SHUFF_,INV_ #{@tf_bioworks_shuff} #{@tf_bioworks_esmall_xml}`
|
38
|
+
assert_match(/<table.*<\/table>/m, IO.read(@tf_html), "has html table in it")
|
39
|
+
assert_match(/1.*1.0000.*1.*1.0000.*0.*0.*15.*0.8667/m, IO.read(@tf_html), "has values")
|
47
40
|
[@tf_html, @tf_png].each do |file|
|
48
41
|
assert(File.exist?(file), "file #{file} exists")
|
49
42
|
File.unlink(file) unless @nodelete
|
@@ -51,17 +44,15 @@ class FalsePositiveRateTest < Test::Unit::TestCase
|
|
51
44
|
end
|
52
45
|
|
53
46
|
def test_area_under_curve
|
54
|
-
file =
|
55
|
-
`#{@cmd} -o #{file} -
|
47
|
+
file = @tfiles + 'ppv_area.txt'
|
48
|
+
`#{@cmd} -o #{file} -a -f SHUFF_ #{@tf_bioworks_shuff}`
|
56
49
|
assert(File.exist?(file), "file #{file} exists")
|
57
50
|
output = IO.read(file)
|
58
|
-
assert_match(/
|
59
|
-
assert_match(/Prec.*7.87189/, output)
|
60
|
-
assert_match(/FPR.*1.12810/, output)
|
51
|
+
assert_match(/Prec.*7.39206/, output, "consistency check")
|
61
52
|
File.unlink file
|
62
53
|
|
63
54
|
outfile = File.join(File.dirname(__FILE__), 'other.html')
|
64
|
-
`#{@cmd} -o #{outfile} -
|
55
|
+
`#{@cmd} -o #{outfile} -f SHUFF_ #{@tf_bioworks_shuff}`
|
65
56
|
File.unlink outfile
|
66
57
|
File.unlink File.join(File.dirname(__FILE__),'other.png')
|
67
58
|
end
|
data/test/tc_protein_summary.rb
CHANGED
@@ -11,8 +11,9 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
11
11
|
def initialize(arg)
|
12
12
|
super(arg)
|
13
13
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
14
|
-
@
|
15
|
-
@
|
14
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
15
|
+
@tf_proph = @tfiles_l + "opd1/000_020-prot.xml"
|
16
|
+
@tf_summary = @tfiles_l + "opd1/000_020-prot.summary.html"
|
16
17
|
@tf_bioworks_small = @tfiles + 'bioworks_small.xml'
|
17
18
|
@tf_bioworks_small_summary_html = @tfiles + 'bioworks_small.summary.html'
|
18
19
|
@tf_proph_cat_inv = @tfiles + 'opd1/opd1_cat_inv_small-prot.xml'
|
@@ -26,13 +27,17 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
26
27
|
assert_match(/usage:/, `#{@cmd}`)
|
27
28
|
end
|
28
29
|
|
29
|
-
def
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
30
|
+
def Xtest_proph_basic
|
31
|
+
if File.exist? @tfiles_l
|
32
|
+
print `#{@cmd} -c 5.0 #{@tf_proph}`
|
33
|
+
assert(File.exist?(@tf_summary), "file #{@tf_summary} exists")
|
34
|
+
string = IO.read(@tf_summary)
|
35
|
+
assert_match(/gi\|16132176\|ref\|NP_418775\.1\|/, string)
|
36
|
+
assert_match(/16132176/, string)
|
37
|
+
File.unlink(@tf_summary) unless NODELETE
|
38
|
+
else
|
39
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
40
|
+
end
|
36
41
|
end
|
37
42
|
|
38
43
|
def test_bioworks_basic
|
@@ -43,30 +48,34 @@ class ProphProtSummaryTest < Test::Unit::TestCase
|
|
43
48
|
# @TODO: need to freeze the output here
|
44
49
|
end
|
45
50
|
|
46
|
-
def
|
47
|
-
`#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --
|
51
|
+
def test_bioworks_with_precision
|
52
|
+
`#{@cmd} -f #{@tf_bioworks_small} #{@tf_bioworks_small} --precision`
|
48
53
|
assert_match('TP : 106', IO.read(@tf_bioworks_small_summary_html))
|
49
54
|
assert_match(/False Positive Rate.*: 0.500/, IO.read(@tf_bioworks_small_summary_html))
|
50
55
|
assert(File.exist?(@tf_bioworks_small_summary_html), "file #{@tf_bioworks_small_summary_html} exists")
|
51
56
|
File.unlink @tf_bioworks_small_summary_html unless NODELETE
|
52
57
|
end
|
53
58
|
|
54
|
-
def
|
59
|
+
def Xtest_proph_with_precision
|
55
60
|
#puts @cmd
|
56
|
-
print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --
|
61
|
+
print `#{@cmd} #{@tf_proph_cat_inv} -f INV_ --precision`
|
57
62
|
File.unlink @tf_proph_cat_inv_summary_html unless NODELETE
|
58
63
|
File.unlink @tf_proph_cat_inv_summary_png unless NODELETE
|
59
64
|
end
|
60
65
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
def Xtest_peptide_count
|
67
|
+
if File.exist? @tfiles_l
|
68
|
+
print `#{@cmd} -c 5.0 #{@tf_proph} --peptide_count #{@tf_peptide_count}`
|
69
|
+
assert(File.exist?(@tf_peptide_count), "file #{@tf_peptide_count} exists")
|
70
|
+
file = IO.read(@tf_peptide_count)
|
71
|
+
assert_match("gi|16132176|ref|NP_418775.1|\t2", file)
|
72
|
+
assert_match("gi|16131996|ref|NP_418595.1|\t1", file)
|
73
|
+
assert_match("gi|16131692|ref|NP_418288.1|\t4", file)
|
74
|
+
File.unlink @tf_peptide_count unless NODELETE
|
75
|
+
else
|
76
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
77
|
+
end
|
69
78
|
end
|
70
|
-
|
79
|
+
|
71
80
|
end
|
72
81
|
|
data/test/tc_roc.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
1
|
|
3
2
|
require 'test/unit'
|
4
3
|
|
@@ -8,42 +7,40 @@ class ROCTest < Test::Unit::TestCase
|
|
8
7
|
|
9
8
|
def initialize(arg)
|
10
9
|
super(arg)
|
11
|
-
@tp_methods = [:tps_and_precision, :tps_and_fpr2, :tps_and_fpr2_times2]
|
12
10
|
end
|
13
11
|
|
14
12
|
def test_area_under_curve
|
15
13
|
x = [1,2,3]
|
16
14
|
y = [2,3,4]
|
17
|
-
|
18
|
-
assert_equal(6, area)
|
15
|
+
_test_auc(6,x,y)
|
19
16
|
|
20
17
|
x = [1,2,3]
|
21
18
|
y = [-2,-3,-4]
|
22
|
-
|
23
|
-
assert_equal(-6, area)
|
19
|
+
_test_auc(-6, x, y)
|
24
20
|
|
25
21
|
x = [1,2,3]
|
26
22
|
y = [4,3,2]
|
27
|
-
|
28
|
-
assert_equal(6, area)
|
23
|
+
_test_auc(6, x, y)
|
29
24
|
|
30
25
|
x = [1,2,3]
|
31
26
|
y = [-4,-3,-2]
|
32
|
-
|
33
|
-
assert_equal(-6, area)
|
27
|
+
_test_auc(-6, x, y)
|
34
28
|
|
35
29
|
x = [4,5,6]
|
36
30
|
y = [2,1,2]
|
37
|
-
|
38
|
-
assert_equal(3, area)
|
31
|
+
_test_auc(3, x, y)
|
39
32
|
|
40
33
|
x = [4,5,6]
|
41
34
|
y = [-2,-1,-2]
|
35
|
+
_test_auc(-3, x, y)
|
36
|
+
end
|
37
|
+
|
38
|
+
def _test_auc(expected_area,x,y)
|
42
39
|
area = ROC.new.area_under_curve(x,y)
|
43
|
-
assert_equal(
|
40
|
+
assert_equal(expected_area, area)
|
44
41
|
end
|
45
42
|
|
46
|
-
|
43
|
+
def test_prep_list
|
47
44
|
t = true
|
48
45
|
f = false
|
49
46
|
x,y = ROC.new.prep_list([[0,f],[1,f],[2,f],[3,t],[3,f],[0,f],[4,f],[1,t],[2,t]])
|
@@ -51,71 +48,119 @@ class ROCTest < Test::Unit::TestCase
|
|
51
48
|
assert_equal([0,0,1,2,3,4], y)
|
52
49
|
end
|
53
50
|
|
54
|
-
def
|
51
|
+
def test_tps_and_ppv
|
55
52
|
tp = %w(1 2 3 4 5 6 6 6 7 8 9 10 10 10 10 11 12 ).collect {|c| c.to_f } # 17 total
|
56
53
|
fp = %w(3.5 4 5 5 5 6 6 6.5 7 8 9 9.5 10 15).collect {|c| c.to_f } # 14 total
|
57
54
|
xe = [1, 2, 3, 4, 5, 8, 9, 10, 11, 15, 16, 17]
|
58
55
|
# 1, 2, 3, 4 5, 6, 7, 8, 9
|
59
56
|
# 10 11 12
|
60
57
|
ye = [1, 1, 1, 4.0/6.0, 0.5, 8.0/(7.0+8.0), 9.0/(9.0+9.0), 0.5, 11.0/(11.0+ 11.0), 15.0/(15.0+13.0), 16.0/(16.0+13.0), 17.0/(17.0+13.0)]
|
61
|
-
|
62
|
-
x, y = ROC.new.tps_and_precision(tp,fp)
|
63
|
-
assert_equal(x.size, y.size)
|
64
|
-
assert_equal(xe, x)
|
65
|
-
assert_equal(ye, y, "complex real-life-like scenario")
|
66
|
-
end
|
58
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye,"complex real-life-like scenario")
|
67
59
|
|
68
|
-
def test_tps_and_precision
|
69
60
|
## leading fp's
|
70
61
|
tp = [1,2,3]
|
71
62
|
fp = [0,0,1,2,3,4]
|
72
63
|
xe = [1,2,3]
|
73
|
-
|
74
|
-
|
75
|
-
[6.0/(1+3), 8.0/(2+4), 10.0/(3+5)],
|
76
|
-
]
|
77
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
64
|
+
ye = [1.0/(1+3), 2.0/(2+4), 3.0/(3+5)]
|
65
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "leading fps")
|
78
66
|
|
79
67
|
## leading tp's
|
80
68
|
tp = [-1,2,3]
|
81
69
|
fp = [0,4]
|
82
70
|
xe = [1,2,3]
|
83
|
-
|
84
|
-
|
85
|
-
[0.0/(1+0), 2.0/(2+1), 2.0/(3+1)],
|
86
|
-
]
|
87
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
71
|
+
ye = [1.0/(1+0), 2.0/(2+1), 3.0/(3+1)]
|
72
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "leading tps")
|
88
73
|
|
89
74
|
## equal tp's leading
|
90
75
|
tp = [0.0001,0.0001,0.0001,2]
|
91
76
|
fp = [0.01,4.0]
|
92
77
|
xe = [3,4]
|
93
|
-
|
94
|
-
|
95
|
-
[0.0/(3+0), 2.0/(4+1)]
|
96
|
-
]
|
97
|
-
_test_tp_method(tp,fp,xe,ye_s,@tp_methods)
|
78
|
+
ye = [3.0/(3+0), 4.0/(4+1)]
|
79
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "equal tps leading")
|
98
80
|
|
99
81
|
## equal arrays with some repeated values
|
100
82
|
tp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
101
83
|
fp = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
102
84
|
xe = [1,2,4,5,6,7]
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
85
|
+
ye = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
|
86
|
+
_test_tps_and_ppv_method(tp,fp,xe,ye, "equal arrays with some repeated values")
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
def _test_tps_and_ppv_method(tp,fp,xe,ye,message='')
|
91
|
+
(x,y) = ROC.new.tps_and_ppv(tp,fp)
|
92
|
+
assert_equal(x.size, y.size)
|
93
|
+
assert_equal(xe, x)
|
94
|
+
assert_equal(ye, y, "tps_and_ppv: #{message}")
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
class DecoyROCTest < ROCTest
|
100
|
+
|
101
|
+
def test_pred_tps_ppv__leading_fps
|
102
|
+
## leading fp's
|
103
|
+
hits = [1,2,3]
|
104
|
+
decoys = [0,0,1,2,3,4]
|
105
|
+
num_hits_e = [1,2,3]
|
106
|
+
num_fps = [3,4,5]
|
107
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
108
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
109
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_pred_tps_ppv__leading_tps
|
113
|
+
## leading tp's
|
114
|
+
hits = [-1,2,3]
|
115
|
+
decoys = [0,4]
|
116
|
+
num_hits_e = [1,2,3]
|
117
|
+
num_fps = [0,1,1]
|
118
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
119
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
120
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_pred_tps_ppv__equal_tps_leading
|
124
|
+
hits = [0.0001,0.0001,0.0001,2]
|
125
|
+
decoys = [0.01,4.0]
|
126
|
+
num_hits_e = [3,4]
|
127
|
+
num_fps = [0,1]
|
128
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
129
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
130
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
108
131
|
end
|
109
132
|
|
110
|
-
def
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
133
|
+
def test_pred_tps_ppv__equal_arrays_with_some_repeated_values
|
134
|
+
hits = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
135
|
+
decoys = %w(1 2 3 3 4 5 6 ).collect {|x| x.to_f } # 17 total
|
136
|
+
num_hits_e = [1,2,4,5,6,7]
|
137
|
+
num_fps = [1,2,4,5,6,7]
|
138
|
+
tps_e = make_tps_e(num_fps, num_hits_e)
|
139
|
+
ppv_e = make_ppv_e(tps_e, num_hits_e)
|
140
|
+
_test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
141
|
+
end
|
142
|
+
|
143
|
+
def _test_pred_and_tps_and_ppv(hits, decoys, num_hits_e, tps_e, ppv_e)
|
144
|
+
answer = DecoyROC.new.pred_and_tps_and_ppv(hits, decoys)
|
145
|
+
expected = [num_hits_e, tps_e, ppv_e]
|
146
|
+
%w(num_hits num_tps ppv).each_with_index do |cat, i|
|
147
|
+
assert_equal(expected[i], answer[i], cat)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def make_tps_e(num_fps, num_hits_e)
|
152
|
+
tps_e = []
|
153
|
+
num_hits_e.each_with_index do |v,i|
|
154
|
+
tps_e[i] = v - num_fps[i]
|
118
155
|
end
|
156
|
+
tps_e
|
157
|
+
end
|
158
|
+
|
159
|
+
def make_ppv_e(tps_e, num_hits_e)
|
160
|
+
ppv_e = []
|
161
|
+
tps_e.each_with_index {|v,i| ppv_e[i] = v.to_f/num_hits_e[i] }
|
162
|
+
ppv_e
|
119
163
|
end
|
120
164
|
|
121
165
|
end
|
166
|
+
|