mspire 0.3.1 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +2 -2
- data/bin/bioworks_to_pepxml.rb +15 -3
- data/bin/ms_to_lmat.rb +2 -1
- data/bin/sqt_group.rb +26 -0
- data/changelog.txt +36 -0
- data/lib/ms/msrun.rb +3 -1
- data/lib/ms/parser/mzdata/dom.rb +14 -14
- data/lib/ms/scan.rb +3 -3
- data/lib/mspire.rb +1 -1
- data/lib/sample_enzyme.rb +39 -0
- data/lib/spec_id.rb +18 -0
- data/lib/spec_id/aa_freqs.rb +6 -9
- data/lib/spec_id/digestor.rb +16 -17
- data/lib/spec_id/mass.rb +63 -1
- data/lib/spec_id/parser/proph.rb +101 -2
- data/lib/spec_id/precision/filter.rb +3 -2
- data/lib/spec_id/precision/filter/cmdline.rb +3 -1
- data/lib/spec_id/precision/filter/output.rb +1 -0
- data/lib/spec_id/precision/prob.rb +88 -21
- data/lib/spec_id/precision/prob/cmdline.rb +28 -16
- data/lib/spec_id/precision/prob/output.rb +8 -2
- data/lib/spec_id/proph/pep_summary.rb +25 -12
- data/lib/spec_id/sequest.rb +28 -0
- data/lib/spec_id/sequest/pepxml.rb +142 -197
- data/lib/spec_id/sqt.rb +349 -0
- data/lib/spec_id/srf.rb +33 -23
- data/lib/validator.rb +40 -57
- data/lib/validator/aa.rb +3 -90
- data/lib/validator/aa_est.rb +112 -0
- data/lib/validator/cmdline.rb +163 -31
- data/lib/validator/decoy.rb +15 -7
- data/lib/validator/digestion_based.rb +5 -4
- data/lib/validator/q_value.rb +32 -0
- data/script/peps_per_bin.rb +67 -0
- data/script/sqt_to_meta.rb +24 -0
- data/specs/bin/bioworks_to_pepxml_spec.rb +3 -3
- data/specs/bin/fasta_shaker_spec.rb +2 -2
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +7 -10
- data/specs/bin/filter_and_validate_spec.rb +25 -6
- data/specs/bin/ms_to_lmat_spec.rb +2 -2
- data/specs/bin/prob_validate_spec.rb +5 -3
- data/specs/sample_enzyme_spec.rb +86 -1
- data/specs/spec_helper.rb +11 -9
- data/specs/spec_id/bioworks_spec.rb +2 -1
- data/specs/spec_id/precision/filter_spec.rb +5 -5
- data/specs/spec_id/precision/prob_spec.rb +0 -67
- data/specs/spec_id/proph/pep_summary_spec.rb +42 -87
- data/specs/spec_id/protein_summary_spec.rb +4 -4
- data/specs/spec_id/sequest/pepxml_spec.rb +1 -79
- data/specs/spec_id/sequest_spec.rb +38 -0
- data/specs/spec_id/sqt_spec.rb +111 -3
- data/specs/spec_id_spec.rb +2 -0
- data/specs/transmem/phobius_spec.rb +3 -1
- data/specs/transmem/toppred_spec.rb +1 -1
- data/specs/validator/aa_est_spec.rb +66 -0
- data/specs/validator/aa_spec.rb +1 -68
- data/specs/validator/background_spec.rb +2 -0
- data/specs/validator/bias_spec.rb +3 -27
- data/specs/validator/decoy_spec.rb +2 -2
- data/specs/validator/transmem_spec.rb +2 -1
- data/test_files/small.sqt +87 -0
- metadata +312 -293
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
+
require 'spec_id'
|
3
|
+
require 'spec_id/sequest'
|
4
|
+
require 'set'
|
5
|
+
|
6
|
+
class MockPepHit
|
7
|
+
attr_accessor :first_scan, :xcorr, :idd
|
8
|
+
def initialize(*args)
|
9
|
+
(@first_scan, @xcorr, @idd) = args
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe Sequest, ": with small mock set" do
|
14
|
+
before(:each) do
|
15
|
+
index = 0
|
16
|
+
hits = [[0, 5.0, 0], [0, 4.0, 1], [0, 3.0, 2],
|
17
|
+
[1, 5.0, 3], [1, 4.0, 4],
|
18
|
+
[2, 5.5, 5],
|
19
|
+
[3, 5.5, 6], [3, 5.5, 7], [3, 4.0, 8], [3, 2.4, 9], [3, 2.4, 10]
|
20
|
+
].map do |hit|
|
21
|
+
MockPepHit.new(*hit)
|
22
|
+
end
|
23
|
+
@peps = hits.sort_by {rand}
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns "other" hits' do
|
27
|
+
included = [2, 8, 9, 10]
|
28
|
+
first_index = 2
|
29
|
+
last_index = 10
|
30
|
+
reply = Sequest.other_hits(@peps, first_index,last_index,:first_scan, [:xcorr, {:down => :xcorr}])
|
31
|
+
reply.map {|hit| hit.idd }.to_set.should == included.to_set
|
32
|
+
|
33
|
+
# same, but optimized
|
34
|
+
reply = Sequest.other_hits_sorted_by_xcorr(@peps, first_index,last_index,:first_scan)
|
35
|
+
reply.map {|hit| hit.idd }.to_set.should == included.to_set
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/specs/spec_id/sqt_spec.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
2
|
|
3
|
+
require 'spec_id/sqt'
|
3
4
|
require 'spec_id/srf'
|
4
5
|
|
5
6
|
SpecHelperHeaderHash = {
|
@@ -80,7 +81,7 @@ describe 'converting a large srf to sqt' do
|
|
80
81
|
end
|
81
82
|
it 'converts without bothering with the database' do
|
82
83
|
@srf.to_sqt(@output)
|
83
|
-
@output.should
|
84
|
+
@output.exist_as_a_file?.should be_true
|
84
85
|
lines = File.readlines(@output)
|
85
86
|
lines.size.should == 80910
|
86
87
|
header_lines = lines.grep(/^H/)
|
@@ -104,13 +105,13 @@ describe 'converting a large srf to sqt' do
|
|
104
105
|
my_error_string.should include(wacky_path)
|
105
106
|
@srf.header.db_filename = @original_db_filename
|
106
107
|
$stderr = STDERR
|
107
|
-
@output.should
|
108
|
+
@output.exist_as_a_file?.should be_true
|
108
109
|
IO.readlines(@output).size.should == 80910
|
109
110
|
del(@output)
|
110
111
|
end
|
111
112
|
it 'can get db info with correct path' do
|
112
113
|
@srf.to_sqt(@output, :db_info => true, :new_db_path => Tfiles_l + '/opd1_2runs_2mods/sequest')
|
113
|
-
@output.should
|
114
|
+
@output.exist_as_a_file?.should be_true
|
114
115
|
lines = IO.readlines(@output)
|
115
116
|
has_md5 = lines.any? do |line|
|
116
117
|
line =~ /DBMD5Sum\s+202b1d95e91f2da30191174a7f13a04e/
|
@@ -136,3 +137,110 @@ describe 'converting a large srf to sqt' do
|
|
136
137
|
end
|
137
138
|
end
|
138
139
|
end
|
140
|
+
|
141
|
+
HeaderHash = {}
|
142
|
+
header_doublets = [
|
143
|
+
%w(SQTGenerator mspire),
|
144
|
+
%w(SQTGeneratorVersion 0.3.1),
|
145
|
+
%w(Database C:\Xcalibur\database\ecoli_K12_ncbi_20060321.fasta),
|
146
|
+
%w(FragmentMasses AVG),
|
147
|
+
%w(PrecursorMasses AVG),
|
148
|
+
['StartTime', ''],
|
149
|
+
['Alg-MSModel', 'LCQ Deca XP'],
|
150
|
+
%w(DBLocusCount 4237),
|
151
|
+
%w(Alg-FragMassTol 1.0000),
|
152
|
+
%w(Alg-PreMassTol 25.0000),
|
153
|
+
['Alg-IonSeries', '0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0'],
|
154
|
+
%w(Alg-PreMassUnits ppm),
|
155
|
+
['Alg-Enzyme', 'Trypsin(KR/P) (2)'],
|
156
|
+
|
157
|
+
['Comment', ['ultra small file created for testing', 'Created from Bioworks .srf file']],
|
158
|
+
['DynamicMod', ['M*=+15.99940', 'STY#=+79.97990']],
|
159
|
+
['StaticMod', []],
|
160
|
+
].each do |double|
|
161
|
+
HeaderHash[double[0]] = double[1]
|
162
|
+
end
|
163
|
+
|
164
|
+
TestSpectra = {
|
165
|
+
:first => { :first_scan=>2, :last_scan=>2, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>390.92919921875, :total_intensity=>2653.90307617188, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[]},
|
166
|
+
:last => { :first_scan=>27, :last_scan=>27, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>393.008056640625, :total_intensity=>2896.16967773438, :lowest_sp=>0.0, :num_matched_peptides=>0, :matches=>[] },
|
167
|
+
:seventeenth => {:first_scan=>23, :last_scan=>23, :charge=>1, :time_to_process=>0.0, :node=>"TESLA", :mh=>1022.10571289062, :total_intensity=>3637.86059570312, :lowest_sp=>0.0, :num_matched_peptides=>41},
|
168
|
+
:first_match_17 => { :rxcorr=>1, :rsp=>5, :mh=>1022.11662242, :deltacn_orig=>0.0, :xcorr=>0.725152492523193, :sp=>73.9527359008789, :ions_matched=>6, :ions_total=>24, :sequence=>"-.MGT#TTM*GVK.L", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>0.0672458708286285, :aaseq => 'MGTTTMGVK' },
|
169
|
+
:last_match_17 => {:rxcorr=>10, :rsp=>16, :mh=>1022.09807242, :deltacn_orig=>0.398330867290497, :xcorr=>0.436301857233047, :sp=>49.735767364502, :ions_matched=>5, :ions_total=>21, :sequence=>"-.MRT#TSFAK.V", :manual_validation_status=>"U", :first_scan=>23, :last_scan=>23, :charge=>1, :deltacn=>1.1, :aaseq => 'MRTTSFAK'},
|
170
|
+
:last_match_17_last_loci => {:reference =>'gi|16129390|ref|NP_415948.1|', :first_entry =>'gi|16129390|ref|NP_415948.1|', :locus =>'gi|16129390|ref|NP_415948.1|', :description => 'Fake description' }
|
171
|
+
}
|
172
|
+
|
173
|
+
|
174
|
+
describe SQT, ": reading a small sqt file" do
|
175
|
+
before(:each) do
|
176
|
+
file = Tfiles + '/small.sqt'
|
177
|
+
file.exist_as_a_file?.should be_true
|
178
|
+
@sqt = SQT.new(file)
|
179
|
+
end
|
180
|
+
|
181
|
+
it 'can access header entries like a hash' do
|
182
|
+
header = @sqt.header
|
183
|
+
HeaderHash.each do |k,v|
|
184
|
+
header[k].should == v
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
it 'can access header entries with methods' do
|
189
|
+
header = @sqt.header
|
190
|
+
# for example:
|
191
|
+
header.database.should == HeaderHash['Database']
|
192
|
+
# all working:
|
193
|
+
HeaderHash.each do |k,v|
|
194
|
+
header.send(SQT::Header::KeysToAtts[k]).should == v
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
it 'has spectra, matches, and loci' do
|
200
|
+
svt = @sqt.spectra[16]
|
201
|
+
reply = {:first => @sqt.spectra.first, :last => @sqt.spectra.last, :seventeenth => svt, :first_match_17 => svt.matches.first, :last_match_17 => svt.matches.last, :last_match_17_last_loci => svt.matches.last.loci.last}
|
202
|
+
[:first, :last, :seventeenth, :first_match_17, :last_match_17, :last_match_17_last_loci].each do |key|
|
203
|
+
TestSpectra[key].each do |k,v|
|
204
|
+
if v.is_a? Float
|
205
|
+
reply[key].send(k).should be_close(v, 0.0000000001)
|
206
|
+
else
|
207
|
+
reply[key].send(k).should == v
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
@sqt.spectra[16].matches.first.loci.size.should == 1
|
212
|
+
@sqt.spectra[16].matches.last.loci.size.should == 1
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
describe SQTGroup, ': acting as a SpecID on large files' do
|
218
|
+
spec_large do
|
219
|
+
before(:each) do
|
220
|
+
file1 = Tfiles_l + '/opd1_2runs_2mods/sequest/020.sqt'
|
221
|
+
file2 = Tfiles_l + '/opd1_2runs_2mods/sequest/040.sqt'
|
222
|
+
file1.exist_as_a_file?.should be_true
|
223
|
+
file2.exist_as_a_file?.should be_true
|
224
|
+
@sqg = SQTGroup.new([file1, file2])
|
225
|
+
end
|
226
|
+
|
227
|
+
it 'has peptide hits' do
|
228
|
+
peps = @sqg.peps
|
229
|
+
peps.size.should == 38512 # frozen
|
230
|
+
# first hit in 020
|
231
|
+
peps.first.sequence.should == 'R.Y#RLGGS#T#K.K'
|
232
|
+
peps.first.base_name.should == '020'
|
233
|
+
# last hit in 040
|
234
|
+
peps.last.sequence.should == 'K.NQTNNRFK.T'
|
235
|
+
peps.last.base_name.should == '040'
|
236
|
+
end
|
237
|
+
|
238
|
+
it 'has prots' do
|
239
|
+
## FROZEN:
|
240
|
+
@sqg.prots.size.should == 3994
|
241
|
+
sorted = @sqg.prots.sort_by {|v| v.reference }
|
242
|
+
sorted.first.reference.should == 'gi|16127996|ref|NP_414543.1|'
|
243
|
+
sorted.first.peps.size.should == 33
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
data/specs/spec_id_spec.rb
CHANGED
@@ -219,6 +219,8 @@ describe SpecID, 'class methods' do
|
|
219
219
|
:pepproph => Tfiles + '/opd1_2runs_2mods/interact-opd1_mods__small.xml',
|
220
220
|
:srf => Tfiles + '/head_of_7MIX.srf',
|
221
221
|
:srg => 'whatever.srg',
|
222
|
+
:sqt => Tfiles + '/small.sqt',
|
223
|
+
:sqg => 'whatever.sqg',
|
222
224
|
}
|
223
225
|
files.each do |key,val|
|
224
226
|
SpecID.file_type(val).should == key.to_s
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
4
|
|
3
5
|
# contains shared behavior that we need.
|
@@ -73,7 +75,7 @@ end
|
|
73
75
|
|
74
76
|
describe "a phobius parser", :shared => true do
|
75
77
|
it 'parses a phobius file into a hash structure' do
|
76
|
-
@file_to_hash.should
|
78
|
+
@file_to_hash.exist_as_a_file?.should be_true
|
77
79
|
hash = @class.default_index(@file_to_hash)
|
78
80
|
hash.should == @structure_to_create
|
79
81
|
end
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
describe "a toppred parser", :shared => true do
|
21
21
|
it 'parses a toppred file into a hash structure' do
|
22
|
-
@file_to_hash.should
|
22
|
+
@file_to_hash.exist_as_a_file?.should be_true
|
23
23
|
hash = @class.default_index(@file_to_hash)
|
24
24
|
hash.should == @structure_to_create
|
25
25
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
+
require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
|
3
|
+
|
4
|
+
|
5
|
+
require 'validator/aa_est'
|
6
|
+
require 'spec_id'
|
7
|
+
require 'spec_id/digestor'
|
8
|
+
|
9
|
+
klass = Validator::AAEst
|
10
|
+
|
11
|
+
describe klass, "using frequency estimates" do
|
12
|
+
before(:each) do
|
13
|
+
@small_fasta = Tfiles + '/small.fasta'
|
14
|
+
@sequest_params = Tfiles + '/bioworks32.params'
|
15
|
+
# C/D C/D J (7)
|
16
|
+
@seqs = %w(ABCDEFGC CCDCCC JJJJJ XYZ WXXXYZ TXXXXXYZ ZZXIIPTYZ ZZXTYZZ ZZZZ YYYYYTL)
|
17
|
+
@peps = @seqs.map {|n| v = SpecID::GenericPep.new; v.aaseq = n ; v }
|
18
|
+
val = klass.new('C')
|
19
|
+
val.frequency = 0.11
|
20
|
+
@validator = val
|
21
|
+
end
|
22
|
+
#C: 0.0157714433456144
|
23
|
+
#D: 0.0526145691939758
|
24
|
+
|
25
|
+
it_should_behave_like 'a validator'
|
26
|
+
|
27
|
+
it 'calculates false_to_total_ratio correctly' do
|
28
|
+
obj = klass.new('C', :frequency => 0.0157714433456144)
|
29
|
+
obj.set_false_to_total_ratio(@peps)
|
30
|
+
exp = 0.949318337979434 / @seqs.size
|
31
|
+
obj.false_to_total_ratio.should be_close(exp, 0.0001) # freeze for consistency
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'calculates fttr each time fresh' do
|
35
|
+
myar = @peps.map
|
36
|
+
obj = klass.new('C', :frequency => 0.0157714433456144)
|
37
|
+
obj.pephit_precision(myar)
|
38
|
+
fttr1 = obj.false_to_total_ratio
|
39
|
+
obj.pephit_precision(myar)
|
40
|
+
fttr2 = obj.false_to_total_ratio
|
41
|
+
fttr1.should == fttr2
|
42
|
+
myar.pop
|
43
|
+
obj.pephit_precision(myar)
|
44
|
+
fttr3 = obj.false_to_total_ratio
|
45
|
+
fttr3.should_not == fttr1
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'gives consistent precision of peptides given fastafile and aa (even negative)' do
|
49
|
+
aa = 'C'
|
50
|
+
val = klass.new(aa).set_frequency(Fasta.new(@small_fasta).prots)
|
51
|
+
# I checked this answer out by hand and it is correct
|
52
|
+
val.pephit_precision(@peps).should be_close(-1.10677, 0.001)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'gives same precision done at once or incrementally' do
|
56
|
+
obj = klass.new('C', :frequency => 0.0157714433456144)
|
57
|
+
|
58
|
+
all_at_once = obj.pephit_precision(@peps)
|
59
|
+
|
60
|
+
precisions = @peps.map do |pep|
|
61
|
+
obj.increment_pephits_precision(pep)
|
62
|
+
end
|
63
|
+
precisions.last.should == all_at_once
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
data/specs/validator/aa_spec.rb
CHANGED
@@ -10,60 +10,7 @@ klass = Validator::AA
|
|
10
10
|
|
11
11
|
class MyAA < Validator::AA ; def initialize ; end ; end
|
12
12
|
|
13
|
-
describe klass, "using
|
14
|
-
before(:each) do
|
15
|
-
@small_fasta = Tfiles + '/small.fasta'
|
16
|
-
@sequest_params = Tfiles + '/bioworks32.params'
|
17
|
-
# C/D C/D J (7)
|
18
|
-
@seqs = %w(ABCDEFGC CCDCCC JJJJJ XYZ WXXXYZ TXXXXXYZ ZZXIIPTYZ ZZXTYZZ ZZZZ YYYYYTL)
|
19
|
-
@peps = @seqs.map {|n| v = SpecID::GenericPep.new; v.aaseq = n ; v }
|
20
|
-
val = klass.new('C')
|
21
|
-
val.frequency = 0.11
|
22
|
-
@validator = val
|
23
|
-
end
|
24
|
-
#C: 0.0157714433456144
|
25
|
-
#D: 0.0526145691939758
|
26
|
-
|
27
|
-
it_should_behave_like 'a validator'
|
28
|
-
|
29
|
-
it 'calculates precision from actual and expected correctly' do
|
30
|
-
MyAA.new.pephit_precision_from_actual_and_expected(5, 10, 100).should == 0.5
|
31
|
-
(0..10).each do |actual|
|
32
|
-
precision = MyAA.new.pephit_precision_from_actual_and_expected(actual, 10, 100)
|
33
|
-
answer = 1.0 - (actual.to_f/10.0)
|
34
|
-
precision.should be_close(answer, 0.00000001)
|
35
|
-
#0 -> 1
|
36
|
-
#1 -> 0.9
|
37
|
-
#2 -> 0.8
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'calculates precision (with background) reasonably and consistently' do
|
42
|
-
(0..10).each do |bkg|
|
43
|
-
precision = MyAA.new.pephit_precision_from_actual_and_expected(5, 10, 100, bkg.to_f/100)
|
44
|
-
answer = 0.5 + bkg.to_f/10
|
45
|
-
answer = 1.0 if answer > 1.0
|
46
|
-
precision.should == answer
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
it 'calculates at_least_one correctly' do
|
51
|
-
aa = 'C'
|
52
|
-
freq = 0.0157714433456144 ## from @small_fasta
|
53
|
-
(actual, expected) = MyAA.new.at_least_one(aa, freq, @seqs)
|
54
|
-
actual.should == 2
|
55
|
-
expected.should be_close(0.949318337979434, 0.0001) # freeze for consistency
|
56
|
-
end
|
57
|
-
|
58
|
-
it 'gives consistent precision of peptides given fastafile and aa (even negative)' do
|
59
|
-
aa = 'C'
|
60
|
-
val = klass.new(aa).set_frequency(Fasta.new(@small_fasta))
|
61
|
-
# I checked this answer out by hand and it is correct
|
62
|
-
val.pephit_precision(@peps).should be_close(-1.10677, 0.001)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
describe klass, "using empirical digestion data" do
|
13
|
+
describe klass, "using digestion data" do
|
67
14
|
|
68
15
|
before(:each) do
|
69
16
|
@small_fasta = Tfiles + '/small.fasta'
|
@@ -88,20 +35,6 @@ describe klass, "using empirical digestion data" do
|
|
88
35
|
# correct)
|
89
36
|
val.false_to_total_ratio.should be_close(0.177629264861062, 0.0000000000001)
|
90
37
|
end
|
91
|
-
|
92
|
-
it 'can validate with theoretical digestion or frequency estimate' do
|
93
|
-
aa = 'C'
|
94
|
-
val = klass.new(aa)
|
95
|
-
val.false_to_total_ratio = 0.177629264861062
|
96
|
-
answ = val.pephit_precision(@peps)
|
97
|
-
# frozen
|
98
|
-
answ.should be_close(-0.125940594059407, 0.0000000001)
|
99
|
-
|
100
|
-
val.frequency = 0.0157714433456144 ## from @small_fasta
|
101
|
-
val.false_to_total_ratio = nil
|
102
|
-
answ = val.pephit_precision(@peps)
|
103
|
-
answ.should be_close(-1.10677485094924, 0.0000000001)
|
104
|
-
end
|
105
38
|
end
|
106
39
|
|
107
40
|
|
@@ -57,6 +57,7 @@ describe klass, "on small mock set" do
|
|
57
57
|
|
58
58
|
@fasta_obj = FastaHelper::FastaObj
|
59
59
|
@validator = klass.new(@fasta_obj)
|
60
|
+
@validator.false_to_total_ratio = 0.22 # arbitrary
|
60
61
|
end
|
61
62
|
|
62
63
|
it_should_behave_like 'a validator'
|
@@ -76,7 +77,7 @@ describe klass, "on small mock set" do
|
|
76
77
|
# cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
|
77
78
|
[true, false].each do |correct_wins|
|
78
79
|
[true, false].each do |fasta_expected|
|
79
|
-
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins)
|
80
|
+
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :false_to_total_ratio => 1.0)
|
80
81
|
tp, fp = answ.shift
|
81
82
|
exp = calc_precision(tp, fp)
|
82
83
|
val.pephit_precision(@peps).should == exp
|
@@ -93,32 +94,7 @@ describe klass, "on small mock set" do
|
|
93
94
|
background = 0.24
|
94
95
|
[true, false].each do |correct_wins|
|
95
96
|
[true, false].each do |fasta_expected|
|
96
|
-
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
|
97
|
-
peps_size = @peps.size
|
98
|
-
exp_tp, exp_fp = answ.shift
|
99
|
-
exp = calc_precision(exp_tp, exp_fp)
|
100
|
-
val.pephit_precision(@peps).should_not == exp
|
101
|
-
actual_precision = val.pephit_precision(@peps)
|
102
|
-
act_tp, act_fp = val.partition(@peps)
|
103
|
-
act_tp.size.should == exp_tp
|
104
|
-
act_fp.size.should == exp_fp
|
105
|
-
exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
|
106
|
-
expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
|
107
|
-
# internally, the num of false hits is controlled so as not to bottom
|
108
|
-
# out below zero, here we control the precision (same effect)
|
109
|
-
expected_precision = 1.0 if expected_precision > 1.0
|
110
|
-
actual_precision.should == expected_precision
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
it 'correctly incorporates background' do
|
116
|
-
answ = [[3,4], [6,1], [1,6], [4,3]]
|
117
|
-
# cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
|
118
|
-
background = 0.24
|
119
|
-
[true, false].each do |correct_wins|
|
120
|
-
[true, false].each do |fasta_expected|
|
121
|
-
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
|
97
|
+
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background, :false_to_total_ratio => 1.0)
|
122
98
|
peps_size = @peps.size
|
123
99
|
exp_tp, exp_fp = answ.shift
|
124
100
|
exp = calc_precision(exp_tp, exp_fp)
|
@@ -23,7 +23,7 @@ describe klass, 'reporting precision on peptides from cat prots' do
|
|
23
23
|
peps[4].prots = [prots[5]] # FP
|
24
24
|
peps[5].prots = [prots[4]] # FP
|
25
25
|
@peps = peps
|
26
|
-
@validator = klass.new(/FAKE/)
|
26
|
+
@validator = klass.new(:constraint => /FAKE/)
|
27
27
|
end
|
28
28
|
|
29
29
|
it_should_behave_like 'a validator'
|
@@ -39,7 +39,7 @@ describe klass, 'reporting precision on peptides from cat prots' do
|
|
39
39
|
|
40
40
|
[true, false].each do |incorrect_on_match|
|
41
41
|
[true, false].each do |correct_wins|
|
42
|
-
val = klass.new(protein_matches.shift, incorrect_on_match, correct_wins)
|
42
|
+
val = klass.new(:constraint => protein_matches.shift, :decoy_on_match => incorrect_on_match, :correct_wins => correct_wins)
|
43
43
|
answ = val.pephit_precision(@peps)
|
44
44
|
exp = ValidatorHelper::Decoy.precision_from_partition_array(answ_arr.shift)
|
45
45
|
answ.should == exp
|
@@ -34,6 +34,7 @@ describe klass, "on small mock set" do
|
|
34
34
|
@peps[7].prots = [@prots[8], @prots[9]] # nil pep
|
35
35
|
|
36
36
|
@validator = klass.new(@toppred_file)
|
37
|
+
@validator.false_to_total_ratio = 1.0
|
37
38
|
end
|
38
39
|
|
39
40
|
it_should_behave_like 'a validator'
|
@@ -80,7 +81,7 @@ describe klass, "on small mock set" do
|
|
80
81
|
end
|
81
82
|
|
82
83
|
it 'can calculate precision incrementally' do
|
83
|
-
val = klass.new(@toppred_file, :min_num_tms => 2)
|
84
|
+
val = klass.new(@toppred_file, :min_num_tms => 2, :false_to_total_ratio => 1.0)
|
84
85
|
# usually we'd update the false_to_total_ratio, but not bothering for test
|
85
86
|
# here we HAVE to set the status hash before hand... (we could redo this
|
86
87
|
# section)
|