mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,122 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
-
require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
|
3
|
-
|
4
|
-
require 'validator/bias'
|
5
|
-
|
6
|
-
require File.dirname(__FILE__) + '/fasta_helper'
|
7
|
-
require 'spec_id'
|
8
|
-
|
9
|
-
klass = Validator::Bias
|
10
|
-
|
11
|
-
describe klass, "on small mock set" do
|
12
|
-
before(:each) do
|
13
|
-
@peps = (0..6).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
|
14
|
-
references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C NOT_EXISTING1 NOT_EXISTING2 NOT_EXISTING3 NOT_EXISTING4)
|
15
|
-
# index: 0 1 2 3 4 5 6 7
|
16
|
-
# index: 8
|
17
|
-
@prots = references.map do |ref|
|
18
|
-
v = SpecID::GenericProt.new
|
19
|
-
v.reference = ref + " something else that we don't care about"
|
20
|
-
v
|
21
|
-
end
|
22
|
-
|
23
|
-
# e=t we expect to see the fasta proteins in our hit list
|
24
|
-
# cw=t a single peptide hit from one of these proteins constitutes a true
|
25
|
-
# positive
|
26
|
-
# cw=f all peptide hits must come from one of these proteins to be a true
|
27
|
-
# positive
|
28
|
-
#
|
29
|
-
# e=f we do not expect to see the fasta obj proteins in our hit list
|
30
|
-
# cw=t a single peptide hit from *outside* this list constitues a true
|
31
|
-
# positive
|
32
|
-
# cw=f a single peptide hit from our fasta object constitutes a false
|
33
|
-
# positive
|
34
|
-
#
|
35
|
-
|
36
|
-
@peps[0].prots = [@prots[0], @prots[5], @prots[8]]
|
37
|
-
@peps[1].prots = [@prots[1], @prots[5], @prots[8]]
|
38
|
-
@peps[2].prots = [@prots[3], @prots[4], @prots[1]]
|
39
|
-
@peps[3].prots = [@prots[7], @prots[8]]
|
40
|
-
@peps[4].prots = [@prots[5], @prots[8]]
|
41
|
-
@peps[5].prots = [@prots[8]]
|
42
|
-
@peps[6].prots = [@prots[5], @prots[6]]
|
43
|
-
|
44
|
-
#################################################
|
45
|
-
# REFERENCE for small mock set:
|
46
|
-
#################################################
|
47
|
-
# pep 1inFst? allinFst? cw=t,e=t cw=t,e=f cw=f,e=f cw=f,e=t
|
48
|
-
# 0 y n t t f f
|
49
|
-
# 1 y n t t f f
|
50
|
-
# 2 y y t f f t
|
51
|
-
# 3 n n f t t f
|
52
|
-
# 4 n n f t t f
|
53
|
-
# 5 n n f t t f
|
54
|
-
# 6 n n f t t f
|
55
|
-
# PR: 3/7 6/7 4/7 1/7
|
56
|
-
# tp:fp 3:4 6:1 4:3 1:6
|
57
|
-
|
58
|
-
@fasta_obj = FastaHelper::FastaObj
|
59
|
-
@validator = klass.new(@fasta_obj)
|
60
|
-
@validator.false_to_total_ratio = 0.22 # arbitrary
|
61
|
-
end
|
62
|
-
|
63
|
-
it_should_behave_like 'a validator'
|
64
|
-
|
65
|
-
it 'creates correct reference hash' do
|
66
|
-
expected = {"YAL001C"=>true, "YAL011W"=>true, "YAL010C"=>true,
|
67
|
-
"YAL009W"=>true, "YAL008W"=>true, "YAL007C"=>true, "YAL005C"=>true,
|
68
|
-
"YAL004W"=>true, "YAL003W"=>true, "YAL014C"=>true, "YAL013W"=>true,
|
69
|
-
"YAL002W"=>true, "YAL012W"=>true
|
70
|
-
}
|
71
|
-
val = klass.new(@fasta_obj)
|
72
|
-
val.short_reference_hash.should == expected
|
73
|
-
end
|
74
|
-
|
75
|
-
it 'gives correct precision and partitions (across all option combinations)' do
|
76
|
-
answ = [[3,4], [6,1], [1,6], [4,3]]
|
77
|
-
# cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
|
78
|
-
[true, false].each do |correct_wins|
|
79
|
-
[true, false].each do |fasta_expected|
|
80
|
-
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :false_to_total_ratio => 1.0)
|
81
|
-
tp, fp = answ.shift
|
82
|
-
exp = calc_precision(tp, fp)
|
83
|
-
val.pephit_precision(@peps).should == exp
|
84
|
-
act_tp, act_fp = val.partition(@peps)
|
85
|
-
act_tp.size.should == tp
|
86
|
-
act_fp.size.should == fp
|
87
|
-
end
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
it 'correctly incorporates background' do
|
92
|
-
answ = [[3,4], [6,1], [1,6], [4,3]]
|
93
|
-
# cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
|
94
|
-
background = 0.24
|
95
|
-
[true, false].each do |correct_wins|
|
96
|
-
[true, false].each do |fasta_expected|
|
97
|
-
val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background, :false_to_total_ratio => 1.0)
|
98
|
-
peps_size = @peps.size
|
99
|
-
exp_tp, exp_fp = answ.shift
|
100
|
-
exp = calc_precision(exp_tp, exp_fp)
|
101
|
-
val.pephit_precision(@peps).should_not == exp
|
102
|
-
actual_precision = val.pephit_precision(@peps)
|
103
|
-
act_tp, act_fp = val.partition(@peps)
|
104
|
-
act_tp.size.should == exp_tp
|
105
|
-
act_fp.size.should == exp_fp
|
106
|
-
exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
|
107
|
-
expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
|
108
|
-
# internally, the num of false hits is controlled so as not to bottom
|
109
|
-
# out below zero, here we control the precision (same effect)
|
110
|
-
expected_precision = 1.0 if expected_precision > 1.0
|
111
|
-
actual_precision.should == expected_precision
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
it_should 'work with false_to_total_ratio!'
|
117
|
-
|
118
|
-
def calc_precision(tp, fp)
|
119
|
-
prec = tp.to_f / (tp + fp)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
@@ -1,51 +0,0 @@
|
|
1
|
-
|
2
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
3
|
-
require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
|
4
|
-
|
5
|
-
require 'validator/decoy'
|
6
|
-
require 'spec_id'
|
7
|
-
|
8
|
-
klass = Validator::Decoy
|
9
|
-
|
10
|
-
describe klass, 'reporting precision on peptides from cat prots' do
|
11
|
-
|
12
|
-
before(:each) do
|
13
|
-
peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
|
14
|
-
prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ FALSE_someOthergi FALSE_AnotherGi FALSE_YetAnotherReference).map do |ref|
|
15
|
-
v = SpecID::GenericProt.new
|
16
|
-
v.reference = ref
|
17
|
-
v
|
18
|
-
end
|
19
|
-
peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
|
20
|
-
peps[1].prots = [prots[1], prots[2]] # TP always
|
21
|
-
peps[2].prots = [prots[3], prots[4]] # FP
|
22
|
-
peps[3].prots = [prots[2]] # TP
|
23
|
-
peps[4].prots = [prots[5]] # FP
|
24
|
-
peps[5].prots = [prots[4]] # FP
|
25
|
-
@peps = peps
|
26
|
-
@validator = klass.new(:constraint => /FAKE/)
|
27
|
-
end
|
28
|
-
|
29
|
-
it_should_behave_like 'a validator'
|
30
|
-
|
31
|
-
it 'gives correct precision (across all option combinations)' do
|
32
|
-
answ_arr = [
|
33
|
-
[[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
|
34
|
-
[[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]],
|
35
|
-
[[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
|
36
|
-
[[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
|
37
|
-
]
|
38
|
-
protein_matches = [/^FALSE_/, /^FALSE_/, 'Protein', 'Protein']
|
39
|
-
|
40
|
-
[true, false].each do |incorrect_on_match|
|
41
|
-
[true, false].each do |correct_wins|
|
42
|
-
val = klass.new(:constraint => protein_matches.shift, :decoy_on_match => incorrect_on_match, :correct_wins => correct_wins)
|
43
|
-
answ = val.pephit_precision(@peps)
|
44
|
-
exp = ValidatorHelper::Decoy.precision_from_partition_array(answ_arr.shift)
|
45
|
-
answ.should == exp
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
|
@@ -1,26 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module FastaHelper
|
4
|
-
FastaObj = Fasta.new
|
5
|
-
data = {
|
6
|
-
'>YAL011W otherstuff' => 'MPAVLRTRSKESSIEQKPASRTRTRSRRGKRGRDDDDDDDDEESDDAYDEVGNDYDEYASRAKLATNRPFEIVAGLPASVELPNYNSSLTHPQSIKNSGVLYDSLVSSRRTWVQGEMFELYWRRPKKIVSESTPAATESPTSGTIPLIRDKMQKMCDCVMSGGPHTFKVRLFILKNDKIEQKWQDEQELKKKEKELKRKNDAEAKRLRMEERKRQQMQKKIAKEQKLQLQKENKAKQKLEQEALKLKRKEEMKKLKEQNKNKQGSPSSSMHDPRMIMNLNLMAQEDPKLNTLMETVAKGLANNSQLEEFKKFIEIAKKRSLEENPVNKRPSVTTTRPAPPSKAKDVAEDHRLNSITLVKSSKTAATEPEPKKADDENAEKQQSKEAKTTAESTQVDVKKEEEDVKEKGVKSEDTQKKEDNQVVPKRKRRKNAIKEDKDMQLTAFQQKYVQGAEIILEYLEFTHSRYYLPKKSVVEFLEDTDEIIISWIVIHNSKEIEKFKTKKIKAKLKADQKLNKEDAKPGSDVEKEVSFNPLFEADCPTPLYTPMTMKLSGIHKRFNQIIRNSVSPMEEVVKEMEKILQIGTRLSGYNLWYQLDGYDDEALSESLRFELNEWEHAMRSRRHKR',
|
7
|
-
'>YAL001C otherstuff' => 'MVLTIYPDELVQIVSDKIASNKGKITLNQLWDISGKYFDLSDKKVKQFVLSCVILKKDIEVYCDGAITTKNVTDIIGDANHSYSVGITEDSLWTLLTGYTKKESTIGNSAFELLLEVAKSGEKGINTMDLAQVTGQDPRSVTGRIKKINHLLTSSQLIYKGHVVKQLKLKKFSHDGVDSNPYINIRDHLATIVEVVKRSKNGIRQIIDLKRELKFDKEKRLSKAFIAAIAWLDEKEYLKKVLVVSPKNPAIKIRCVKYVKDIPDSKGSPSFEYDSNSADEDSVSDSKAAFEDEDLVEGLDNFNATDLLQNQGLVMEEKEDAVKNEVLLNRFYPLQNQTYDIADKSGLKGISTMDVVNRITGKEFQRAFTKSSEYYLESVDKQKENTGGYRLFRIYDFEGKKKFFRLFTAQNFQKLTNAEDEISVPKGFDELGKSRTDLKTLNEDNFVALNNTVRFTTDSDGQDIFFWHGELKIPPNSKKTPNKNKRKRQVKNSTNASVAGNISNPKRIKLEQHVSTAQEPKSAEDSPSSNGGTVVKGKVVNFGGFSARSLRSLQRQRAILKVMNTIGGVAYLREQFYESVSKYMGSTTTLDKKTVRGDVDLMVESEKLGARTEPVSGRKIIFLPTVGEDAIQRYILKEKDSKKATFTDVIHDTEIYFFDQTEKNRFHRGKKSVERIRKFQNRQKNAKIKASDDAISKKSTSVNVSDGKIKRRDKKVSAGRTTVVVENTKEDKTVYHAGTKDGVQALIRAVVVTKSIKNEIMWDKITKLFPNNSLDNLKKKWTARRVRMGHSGWRAYVDKWKKMLVLAIKSEKISLRDVEELDLIKLLDIWTSFDEKEIKRPLFLYKNYEENRKKFTLVRDDTLTHSGNDLAMSSMIQREISSLKKTYTRKISASTKDLSKSQSDDYIRTVIRSILIESPSTTRNEIEALKNVGNESIDNVIMDMAKEKQIYLHGSKLECTDTLPDILENRGNYKDFGVAFQYRCKVNELLEAGNAIVINQEPSDISSWVLIDLISGELLNMDVIPMVRNVRPLTYTSRRFEIRTLTPPLIIYANSQTKLNTARKSAVKVPLGKPFSRLWVNGSGSIRPNIWKQVVTMVVNEIIFHPGITLSRLQSRCREVLSLHEISEICKWLLERQVLITTDFDGYWVNHNWYSIYEST',
|
8
|
-
'>YAL010C otherstuff' => 'MLPYMDQVLRAFYQSTHWSTQNSYEDITATSRTLLDFRIPSAIHLQISNKSTPNTFNSLDFSTRSRINGSLSYLYSDAQQLEKFMRNSTDIPLQDATETYRQLQPNLNFSVSSANTLSSDNTTVDNDKKLLHDSKFVKKSLYYGRMYYPSSDLEAMIIKRLSPQTQFMLKGVSSFKESLNVLTCYFQRDSHRNLQEWIFSTSDLLCGYRVLHNFLTTPSKFNTSLYNNSSLSLGAEFWLGLVSLSPGCSTTLRYYTHSTNTGRPLTLTLSWQPLFGHISSTYSAKTGTNSTFCAKYDFNLYSIESNLSFGCEFWQKKHHLLETNKNNNDKLEPISDELVDINPNSRATKLLHENVPDLNSAVNDIPSTLDIPVHKQKLLNDLTYAFSSSLRKIDEERSTIEKFDNKINSSIFTSVWKLSTSLRDKTLKLLWEGKWRGFLISAGTELVFTRGFQESLSDDEKNDNAISISATDTENGNIPVFPAKFGIQFQYST',
|
9
|
-
'>YAL009W otherstuff' => 'MEPESIGDVGNHAQDDSASIVSGPRRRSTSKTSSAKNIRNSSNISPASMIFRNLLILEDDLRRQAHEQKILKWQFTLFLASMAGVGAFTFYELYFTSDYVKGLHRVILQFTLSFISITVVLFHISGQYRRTIVIPRRFFTSTNKGIRQFNVKLVKVQSTWDEKYTDSVRFVSRTIAYCNIYCLKKFLWLKDDNAIVKFWKSVTIQSQPRIGAVDVKLVLNPRAFSAEIREGWEIYRDEFWAREGARRRKQAHELRPKSE',
|
10
|
-
'>YAL008W otherstuff' => 'MTLAFNMQRLVFRNLNVGKRMFKNVPLWRFNVANKLGKPLTRSVGLGGAGIVAGGFYLMNRQPSKLIFNDSLGAAVKQQGPLEPTVGNSTAITEERRNKISSHKQMFLGSLFGVVLGVTVAKISILFMYVGITSMLLCEWLRYKGWIRINLKNIKSVIVLKDVDLKKLLIDGLLGTEYMGFKVFFTLSFVLASLNANK',
|
11
|
-
'>YAL007C otherstuff' => 'MIKSTIALPSFFIVLILALVNSVAASSSYAPVAISLPAFSKECLYYDMVTEDDSLAVGYQVLTGGNFEIDFDITAPDGSVITSEKQKKYSDFLLKSFGVGKYTFCFSNNYGTALKKVEITLEKEKTLTDEHEADVNNDDIIANNAVEEIDRNLNKITKTLNYLRAREWRNMSTVNSTESRLTWLSILIIIIIAVISIAQVLLIQFLFTGRQKNYV',
|
12
|
-
'>YAL005C otherstuff' => 'MSKAVGIDLGTTYSCVAHFANDRVDIIANDQGNRTTPSFVAFTDTERLIGDAAKNQAAMNPSNTVFDAKRLIGRNFNDPEVQADMKHFPFKLIDVDGKPQIQVEFKGETKNFTPEQISSMVLGKMKETAESYLGAKVNDAVVTVPAYFNDSQRQATKDAGTIAGLNVLRIINEPTAAAIAYGLDKKGKEEHVLIFDLGGGTFDVSLLFIEDGIFEVKATAGDTHLGGEDFDNRLVNHFIQEFKRKNKKDLSTNQRALRRLRTACERAKRTLSSSAQTSVEIDSLFEGIDFYTSITRARFEELCADLFRSTLDPVEKVLRDAKLDKSQVDEIVLVGGSTRIPKVQKLVTDYFNGKEPNRSINPDEAVAYGAAVQAAILTGDESSKTQDLLLLDVAPLSLGIETAGGVMTKLIPRNSTISTKKFEIFSTYADNQPGVLIQVFEGERAKTKDNNLLGKFELSGIPPAPRGVPQIEVTFDVDSNGILNVSAVEKGTGKSNKITITNDKGRLSKEDIEKMVAEAEKFKEEDEKESQRIASKNQLESIAYSLKNTISEAGDKLEQADKDTVTKKAEETISWLDSNTTASKEEFDDKLKELQDIANPIMSKLYQAGGAPGGAAGGAPGGFPGGAPPAPEAEGPTVEEVD',
|
13
|
-
'>YAL004W otherstuff' => 'MGVTSGGLNFKDTVFNEQQRDIESTTTQVENQDVFFLTLLVQTVSNGSGGRFVNNTQDIQTSNGTSILGSLSLRIVEVSWDSDDSVIDLGSQVRFGSFLHLTQDHGGDLFWGKVLGFTLKFNLNLRLTVNIDQLEWEVLHVSLHFWVVEVSTDQTLSVENGIRRIHSSLILSSITNQSFSVSESDKRWSGSVTLIVGNNVHTIISKVSNTRVCCT',
|
14
|
-
'>YAL003W otherstuff' => 'MASTDFSKIETLKQLNASLADKSYIEGTAVSQADVTVFKAFQSAYPEFSRWFNHIASKADEFDSFPAASAAAAEEEEDDDVDLFGSDDEEADAEAEKLKAERIAAYNAKKAAKPAKPAAKSIVTLDVKPWDDETNLEEMVANVKAIEMEGLTWGAHQFIPIGFGIKKLQINCVVEDDKVSLDDLQQSIEEDEDHVQSTDIAAMQKL',
|
15
|
-
'>YAL014C otherstuff' => 'MDVLKLGYELDQLSDLVEERTRLVSVLKLAPTSNDNVTLKRQLGSILELLQKCAPNDELISRYNTILDKIPDTAVDKELYRFQQQVARNTDEVSKESLKKVRFKNDDELTVMYKDDDEQDEESPLPSTHTPYKDEPLQSQLQSQSQPQPPQPMVSNQELFINQQQQLLEQDSHLGALSQSIGRTHDISLDLNNEIVSQNDSLLVDLENLIDNNGRNLNRASRSMHGFNNSRFKDNGNCVIILVLIVVLLLLLLVL',
|
16
|
-
'>YAL013W otherstuff' => 'MSQQTPQESEQTTAKEQDLDQESVLSNIDFNTDLNHNLNLSEYCISSDAGTEKMDSDEEKSLANLPELKYAPKLSSLVKQETLTESLKRPHEDEKEAIDEAKKMKVPGENEDESKEEEKSQELEEAIDSKEKSTDARDEQGDEGDNEEENNEEDNENENEHTAPPALVMPSPIEMEEQRMTALKEITDIEYKFAQLRQKLYDNQLVRLQTELQMCLEGSHPELQVYYSKIAAIRDYKLHRAYQRQKYELSCINTETIATRTFIHQDFHKKVTDLRARLLNRTTQTWYDINKERRDMDIVIPDVNYHVPIKLDNKTLSCITGYASAAQLCYPGEPVAEDLACESIEYRYRANPVDKLEVIVDRMRLNNEISDLEGLRKYFHSFPGAPELNPLRDSEINDDFHQWAQCDRHTGPHTTSFCYS',
|
17
|
-
'>YAL002W otherstuff' => 'MEQNGLDHDSRSSIDTTINDTQKTFLEFRSYTQLSEKLASSSSYTAPPLNEDGPKGVASAVSQGSESVVSWTTLTHVYSILGAYGGPTCLYPTATYFLMGTSKGCVLIFNYNEHLQTILVPTLSEDPSIHSIRSPVKSIVICSDGTHVAASYETGNICIWNLNVGYRVKPTSEPTNGMTPTPALPAVLHIDDHVNKEITGLDFFGARHTALIVSDRTGKVSLYNGYRRGFWQLVYNSKKILDVNSSKEKLIRSKLSPLISREKISTNLLSVLTTTHFALILLSPHVSLMFQETVEPSVQNSLVVNSSISWTQNCSRVAYSVNNKISVISISSSDFNVQSASHSPEFAESILSIQWIDQLLLGVLTISHQFLVLHPQHDFKILLRLDFLIHDLMIPPNKYFVISRRSFYLLTNYSFKIGKFVSWSDITLRHILKGDYLGALEFIESLLQPYCPLANLLKLDNNTEERTKQLMEPFYNLSLAALRFLIKKDNADYNRVYQLLMVVVRVLQQSSKKLDSIPSLDVFLEQGLEFFELKDNAVYFEVVANIVAQGSVTSISPVLFRSIIDYYAKEENLKVIEDLIIMLNPTTLDVDLAVKLCQKYNLFDLLIYIWNKIFDDYQTPVVDLIYRISNQSEKCVIFNGPQVPPETTIFDYVTYILTGRQYPQNLSISPSDKCSKIQRELSAFIFSGFSIKWPSNSNHKLYICENPEEEPAFPYFHLLLKSNPSRFLAMLNEVFEASLFNDDNDMVASVGEAELVSRQYVIDLLLDAMKDTGNSDNIRVLVAIFIATSISKYPQFIKVSNQALDCVVNTICSSRVQGIYEISQIALESLLPYYHSRTTENFILELKEKNFNKVLFHIYKSENKYASALSLILETKDIEKEYNTDIVSITDYILKKCPPGSLECGKVTEVIETNFDLLLSRIGIEKCVTIFSDFDYNLHQEILEVKNEETQQKYLDKLFSTPNINNKVDKRLRNLHIELNCKYKSKREMILWLNGTVLSNAESLQILDLLNQDSNFEAAAIIHERLESFNLAVRDLLSFIEQCLNEGKTNISTLLESLRRAFDDCNSAGTEKKSCWILLITFLITLYGKYPSHDERKDLCNKLLQEAFLGLVRSKSSSQKDSGGEFWEIMSSVLEHQDVILMKVQDLKQLLLNVFNTYKLERSLSELIQKIIEDSSQDLVQQYRKFLSEGWSIHTDDCEICGKKIWGAGLDPLLFLAWENVQRHQDMISVDLKTPLVIFKCHHGFHQTCLENLAQKPDEYSCLICQTESNPKIV',
|
18
|
-
'>YAL012W otherstuff' => 'MTLQESDKFATKAIHAGEHVDVHGSVIEPISLSTTFKQSSPANPIGTYEYSRSQNPNRENLERAVAALENAQYGLAFSSGSATTATILQSLPQGSHAVSIGDVYGGTHRYFTKVANAHGVETSFTNDLLNDLPQLIKENTKLVWIETPTNPTLKVTDIQKVADLIKKHAAGQDVILVVDNTFLSPYISNPLNFGADIVVHSATKYINGHSDVVLGVLATNNKPLYERLQFLQNAIGAIPSPFDAWLTHRGLKTLHLRVRQAALSANKIAEFLAADKENVVAVNYPGLKTHPNYDVVLKQHRDALGGGMISFRIKGGAEAASKFASSTRLFTLAESLGGIESLLEVPAVMTHGGIPKEAREASGVFDDLVRISVGIEDTDDLLEDIKQALKQATN',
|
19
|
-
}
|
20
|
-
data.map do |header,aaseq|
|
21
|
-
FastaObj << Fasta::Prot.new(header, aaseq)
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
|
@@ -1,141 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
5
|
-
|
6
|
-
require 'validator/prot_from_pep'
|
7
|
-
|
8
|
-
klass = Validator::ProtFromPep
|
9
|
-
|
10
|
-
describe klass, "on fake, simple prots and peps" do
|
11
|
-
before(:each) do
|
12
|
-
# create some proteins and peptides linked up
|
13
|
-
prots = ('a'..'g').to_a.inject( { } ) do |hash,let|
|
14
|
-
prot = OpenStruct.new
|
15
|
-
prot.peps = []
|
16
|
-
hash[let.to_sym] = prot
|
17
|
-
hash
|
18
|
-
end
|
19
|
-
# prots: a.peps = 0,1,4
|
20
|
-
# b.peps = 1
|
21
|
-
# c.peps = 2
|
22
|
-
# d.peps = 2,5,6
|
23
|
-
# e.peps = 2
|
24
|
-
# f.peps = 3,4
|
25
|
-
# g.peps = 3,4,8,9,10
|
26
|
-
|
27
|
-
# 0 1 2 3 4 5 6 7
|
28
|
-
peps = [[:a], [:a,:b], [:c,:d,:e], [:f,:g], [:a,:f,:g], [:c], [:c], [:d],
|
29
|
-
# 8 9 10
|
30
|
-
[:g], [:g], [:g]].map do |belongs_to|
|
31
|
-
pep = OpenStruct.new
|
32
|
-
pep.prots = belongs_to.map {|v| prots[v].peps << pep ; prots[v]}
|
33
|
-
pep
|
34
|
-
end
|
35
|
-
@peps = peps
|
36
|
-
@prots = prots
|
37
|
-
|
38
|
-
@normal_frozen = [[0.971428571428572, 0.0586273344048647], [0.95, 0.0838775640874857], [0.907142857142857, 0.116103957269609], [0.878571428571428, 0.133328857783819], [0.814285714285714, 0.147299354691691], [0.735714285714286, 0.186982368192933], [0.65, 0.18812775328873], [0.535714285714286, 0.206630166671598], [0.414285714285714, 0.178909454503803], [0.228571428571429, 0.117254668809732]]
|
39
|
-
@worstcase_frozen = [0.857142857142857, 0.714285714285714, 0.571428571428571, 0.571428571428571, 0.428571428571429, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.142857142857143, 0.142857142857143]
|
40
|
-
end
|
41
|
-
|
42
|
-
it 'calculates normal precision edge cases' do
|
43
|
-
val = klass.new
|
44
|
-
all_wrong = @peps.size
|
45
|
-
val.normal_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == [0.0,0.0]
|
46
|
-
val.normal_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
|
47
|
-
|
48
|
-
val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == [0.0,0.0]
|
49
|
-
val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
|
50
|
-
|
51
|
-
all_right = 0
|
52
|
-
val.normal_prothit_precision( @peps, all_right, :num_its => 10).should == [1.0,0.0]
|
53
|
-
val.normal_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
|
54
|
-
end
|
55
|
-
|
56
|
-
it 'calculates normal precision that behaves properly' do
|
57
|
-
val = klass.new
|
58
|
-
prev_mean = 1.0
|
59
|
-
(1...(@peps.size)).to_a.zip( @normal_frozen ) do |num_false, expected|
|
60
|
-
(mean, stdev) = val.normal_prothit_precision( @peps, num_false, :num_its => 20)
|
61
|
-
(mean < prev_mean).should be_true
|
62
|
-
(stdev < 0.4 and stdev > 0.0001).should be_true
|
63
|
-
mean.should be_close(expected[0], 0.000000001)
|
64
|
-
stdev.should be_close(expected[1], 0.000000001)
|
65
|
-
val.normal_prothit_precision( @peps, num_false, :num_its => 1).should be_close(mean, 0.25)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
it 'calculates worstcase edge cases' do
|
70
|
-
val = klass.new
|
71
|
-
all_wrong = @peps.size
|
72
|
-
val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == 0.0
|
73
|
-
val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
|
74
|
-
|
75
|
-
val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == 0.0
|
76
|
-
val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
|
77
|
-
|
78
|
-
all_right = 0
|
79
|
-
val.worstcase_prothit_precision( @peps, all_right, :num_its => 10).should == 1.0
|
80
|
-
val.worstcase_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
|
81
|
-
end
|
82
|
-
|
83
|
-
it 'calculates worstcase precision that behaves properly' do
|
84
|
-
|
85
|
-
val = klass.new
|
86
|
-
prev_worst = 1.0
|
87
|
-
worsts = []
|
88
|
-
(1...(@peps.size)).to_a.zip( @worstcase_frozen ) do |num_false, expected|
|
89
|
-
worst = val.worstcase_prothit_precision( @peps, num_false, :num_its => 20)
|
90
|
-
(worst <= prev_worst).should be_true
|
91
|
-
worst.should be_close(expected, 0.0000000001)
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
|
96
|
-
it 'calculates prothit precision (worstcase + normal)' do
|
97
|
-
val = klass.new
|
98
|
-
(1...(@peps.size)).to_a.zip( @normal_frozen, @worstcase_frozen ) do |num_false, normal_expected, worstcase_expected|
|
99
|
-
(worst, norm_mean, norm_stdev) = val.prothit_precision( @peps, num_false, :num_its_normal => 20, :num_its_worstcase => 10)
|
100
|
-
worst.should be_close(worstcase_expected, 0.0000000001)
|
101
|
-
norm_mean.should be_close(normal_expected[0], 0.0000000001)
|
102
|
-
norm_stdev.should be_close(normal_expected[1], 0.0000000001)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
it 'gives 1.0 precision for no pephits' do
|
107
|
-
val = klass.new
|
108
|
-
val.prothit_precision( [], 0).should == [1.0, 1.0, 0.0]
|
109
|
-
end
|
110
|
-
|
111
|
-
end
|
112
|
-
|
113
|
-
describe klass, "calculating worstcase prothit precision by numbers" do
|
114
|
-
it "calculates precision correctly in easy cases" do
|
115
|
-
peps_per_prot = [4,4,3,2,2]
|
116
|
-
# no prots completely wrong
|
117
|
-
precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 1)
|
118
|
-
precision.should == 1
|
119
|
-
|
120
|
-
# only one protein partially correct
|
121
|
-
precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 14)
|
122
|
-
precision.should == 0.2
|
123
|
-
end
|
124
|
-
|
125
|
-
it 'works correctly on other cases' do
|
126
|
-
# 0 1 2 3 4 5 6 7 8
|
127
|
-
expected = [1.0, 5.0/6, 5.0/6, 4.0/6, 4.0/6, 3.0/6, 3.0/6, 3.0/6, 2.0/6,
|
128
|
-
# 9 10 11 12 13 14 15 16 17
|
129
|
-
2.0/6, 2.0/6, 2.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 0.0]
|
130
|
-
num_peps_per_prot = [5,4,3,2,2,1].sort_by { rand }
|
131
|
-
total_peps = num_peps_per_prot.inject(0) {|memo,obj| obj + memo }
|
132
|
-
val = klass.new
|
133
|
-
(0..total_peps).to_a.zip(expected) do |num_wrong, exp|
|
134
|
-
val.worstcase_prothit_precision_by_numbers(num_peps_per_prot, num_wrong).should == exp
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
end
|
140
|
-
|
141
|
-
|
@@ -1,146 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
-
require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
|
3
|
-
|
4
|
-
require 'validator/transmem'
|
5
|
-
require 'spec_id/digestor'
|
6
|
-
require File.dirname(__FILE__) + '/fasta_helper'
|
7
|
-
require 'spec_id'
|
8
|
-
|
9
|
-
klass = Validator::Transmem::Protein
|
10
|
-
|
11
|
-
describe klass, "on small mock set" do
|
12
|
-
before(:each) do
|
13
|
-
@toppred_file = Tfiles + '/toppred.small.out'
|
14
|
-
@peps = (0..7).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
|
15
|
-
# certain: 3 0 0 0 2 3 2 1
|
16
|
-
references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C YAL008W YAL009W YAL010C NOTEXISTING1 NOTEXISTING2)
|
17
|
-
# index: 0 1 2 3 4 5 6 7
|
18
|
-
@prots = references.map do |ref|
|
19
|
-
v = SpecID::GenericProt.new
|
20
|
-
v.reference = ref
|
21
|
-
v
|
22
|
-
end
|
23
|
-
|
24
|
-
# TM (? = both)
|
25
|
-
# @prots[8] doesn't have a key in the guy (nil)
|
26
|
-
# SHOULD NOT change the results
|
27
|
-
@peps[0].prots = [@prots[0], @prots[5], @prots[8]] # y
|
28
|
-
@peps[1].prots = [@prots[1], @prots[5], @prots[8]] # ?
|
29
|
-
@peps[2].prots = [@prots[3], @prots[4], @prots[8]] # ?
|
30
|
-
@peps[3].prots = [@prots[2], @prots[8]] # n
|
31
|
-
@peps[4].prots = [@prots[5], @prots[8]] # y
|
32
|
-
@peps[5].prots = [@prots[4], @prots[8]] # y
|
33
|
-
@peps[6].prots = [@prots[8]] # nil pep
|
34
|
-
@peps[7].prots = [@prots[8], @prots[9]] # nil pep
|
35
|
-
|
36
|
-
@validator = klass.new(@toppred_file)
|
37
|
-
@validator.false_to_total_ratio = 1.0
|
38
|
-
end
|
39
|
-
|
40
|
-
it_should_behave_like 'a validator'
|
41
|
-
|
42
|
-
it 'gives correct precision with false ratio (across all option combinations)' do
|
43
|
-
answ = [[2,4], [0,6], [0,6], [-2,8]].map {|v| calc_precision(*v) }
|
44
|
-
[true, false].each do |correct_wins|
|
45
|
-
[true, false].each do |soluble_fraction|
|
46
|
-
val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => soluble_fraction, :correct_wins => correct_wins)
|
47
|
-
val.false_to_total_ratio = 0.5
|
48
|
-
val.pephit_precision(@peps).should == answ.shift
|
49
|
-
#p val.pephit_precision(@peps)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
it 'calculates a correct false to total ratio' do
|
55
|
-
val = klass.new(@toppred_file)
|
56
|
-
fasta_obj = FastaHelper::FastaObj
|
57
|
-
sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
|
58
|
-
sequest_params_obj.opts['first_database_name'] = 'not_real'
|
59
|
-
val.set_false_to_total_ratio( Digestor.digest(fasta_obj, sequest_params_obj) )
|
60
|
-
ratio = val.false_to_total_ratio
|
61
|
-
num_tps_soluble_peps = 777
|
62
|
-
num_fps_insoluble_peps = 741
|
63
|
-
expected_ratio = num_tps_soluble_peps.to_f / (num_tps_soluble_peps + num_fps_insoluble_peps)
|
64
|
-
ratio.should == expected_ratio
|
65
|
-
end
|
66
|
-
|
67
|
-
it 'can grant transmem status to proteins for speed' do
|
68
|
-
val = klass.new(@toppred_file)
|
69
|
-
fasta_obj = FastaHelper::FastaObj
|
70
|
-
sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
|
71
|
-
hash = val.create_transmem_status_hash( Digestor.digest(fasta_obj.prots, sequest_params_obj))
|
72
|
-
fasta_obj.prots.each do |prot|
|
73
|
-
hash.key?(prot).should be_true
|
74
|
-
end
|
75
|
-
frozen = [true, true, false, true, false, false, true, false, true, false, true, true, true]
|
76
|
-
fasta_obj.prots.map {|prot| hash[prot] }.should == frozen
|
77
|
-
end
|
78
|
-
|
79
|
-
def calc_precision(norm, trans)
|
80
|
-
prec = norm.to_f / (norm + trans)
|
81
|
-
end
|
82
|
-
|
83
|
-
it 'can calculate precision incrementally' do
|
84
|
-
val = klass.new(@toppred_file, :min_num_tms => 2, :false_to_total_ratio => 1.0)
|
85
|
-
# usually we'd update the false_to_total_ratio, but not bothering for test
|
86
|
-
# here we HAVE to set the status hash before hand... (we could redo this
|
87
|
-
# section)
|
88
|
-
val.transmem_status_hash = val.create_transmem_status_hash(@peps)
|
89
|
-
|
90
|
-
# manually done:
|
91
|
-
precisions = [0.0, 1.0/2, 2.0/3, 3.0/4, 3.0/5, 3.0/6, 3.0/6, 3.0/6]
|
92
|
-
|
93
|
-
#frozen:
|
94
|
-
calc_bkgs = [1.0, 0.5, 0.333333333333333, 0.25, 0.4, 0.5, 0.5, 0.5]
|
95
|
-
#frozen:
|
96
|
-
false_to_total_ratios = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
97
|
-
|
98
|
-
@peps.zip(precisions, calc_bkgs, false_to_total_ratios) do |pep, exp_prec, calc_bkg, false_to_total_ratio|
|
99
|
-
val.increment_pephits_precision(pep).should == exp_prec
|
100
|
-
val.calculated_background.should be_close(calc_bkg, 0.00000000000001)
|
101
|
-
val.false_to_total_ratio.should == false_to_total_ratio
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'creates correct reference hash' do
|
106
|
-
val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => true, :correct_wins => true)
|
107
|
-
val.transmem_by_ti_key.should == {"YAL001C"=>false, "YAL011W"=>false, "YAL009W"=>false, "YAL010C"=>false, "YAL008W"=>true, "YAL007C"=>false, "YAL004W"=>false, "YAL005C"=>false, "YAL003W"=>false, "YAL002W"=>true, "YAL013W"=>false, "YAL014C"=>false, "YAL012W"=>false}
|
108
|
-
end
|
109
|
-
|
110
|
-
|
111
|
-
end
|
112
|
-
|
113
|
-
|
114
|
-
#################################################
|
115
|
-
# REFERENCE for small mock set:
|
116
|
-
#################################################
|
117
|
-
# for mintm >= 3 (T = TP, F = FP, sf = soluble_fraction)
|
118
|
-
# sf=false sf=true
|
119
|
-
# TM cw fw cw fw
|
120
|
-
# 0 y T T F F
|
121
|
-
# 1 ? T F T F
|
122
|
-
# 2 n F F T T
|
123
|
-
# 3 n F F T T
|
124
|
-
# 4 y T T F F
|
125
|
-
# 5 n F F T T
|
126
|
-
#
|
127
|
-
# [tps, fps]
|
128
|
-
# cw=true( sf=true [4,2], sf=false [3,3] )
|
129
|
-
# cw=false( sf=true [3,3], sf=false [2,4] )
|
130
|
-
|
131
|
-
# for mintm >= 2 (T = TP, F = FP, sf = soluble_fraction)
|
132
|
-
# sf=false sf=true
|
133
|
-
# TM cw fw cw fw
|
134
|
-
# 0 y T T F F
|
135
|
-
# 1 ? T F T F
|
136
|
-
# 2 ? T F T F
|
137
|
-
# 3 n F F T T
|
138
|
-
# 4 y T T F F
|
139
|
-
# 5 y T T F F
|
140
|
-
#
|
141
|
-
# [tps, fps]
|
142
|
-
# cw=true( sf=true [3,3], sf=false [5,1] )
|
143
|
-
# cw=false( sf=true [1,5], sf=false [3,3] )
|
144
|
-
#
|
145
|
-
# sf=true( cw=true [3,3], cw=false[1,5] )
|
146
|
-
# sf=false( cw=true [5,1], cw=false[3,3] )
|
@@ -1,58 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
2
|
-
require File.expand_path(File.dirname(__FILE__) + '/../validator_helper')
|
3
|
-
|
4
|
-
require 'validator/true_pos'
|
5
|
-
require 'fasta'
|
6
|
-
require 'spec_id'
|
7
|
-
|
8
|
-
klass = Validator::TruePos
|
9
|
-
describe klass, 'reporting precision on peptides' do
|
10
|
-
|
11
|
-
before(:each) do
|
12
|
-
@myfasta_string =<<END
|
13
|
-
>gi|1245235|ProteinX
|
14
|
-
ABCDEFGHIJKLMNOP
|
15
|
-
>gi|987654|ProteinY
|
16
|
-
AAAAAABBBBBBBBBBBB
|
17
|
-
>gi|1111111|ProteinZ
|
18
|
-
FFFFFFFFFGGGGGGZZZZ
|
19
|
-
END
|
20
|
-
|
21
|
-
@peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
|
22
|
-
prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ someOthergi AnotherGi YetAnotherReference).map do |ref|
|
23
|
-
v = SpecID::GenericProt.new
|
24
|
-
v.reference = ref
|
25
|
-
v
|
26
|
-
end
|
27
|
-
@peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
|
28
|
-
@peps[1].prots = [prots[1], prots[2]] # TP always
|
29
|
-
@peps[2].prots = [prots[3], prots[4]] # FP
|
30
|
-
@peps[3].prots = [prots[2]] # TP
|
31
|
-
@peps[4].prots = [prots[5]] # FP
|
32
|
-
@peps[5].prots = [prots[4]] # FP
|
33
|
-
@myfasta_obj = Fasta.new.load(StringIO.new(@myfasta_string))
|
34
|
-
|
35
|
-
@validator = klass.new(@myfasta_obj)
|
36
|
-
end
|
37
|
-
|
38
|
-
it_should_behave_like 'a validator'
|
39
|
-
|
40
|
-
it 'gives correct precision (across all options)' do
|
41
|
-
answ_ar = [
|
42
|
-
[[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
|
43
|
-
[[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
|
44
|
-
]
|
45
|
-
|
46
|
-
[true, false].each do |correct_wins|
|
47
|
-
val = klass.new(@myfasta_obj, correct_wins)
|
48
|
-
answ = val.pephit_precision(@peps)
|
49
|
-
exp = ValidatorHelper.precision_from_partition_array(answ_ar.shift)
|
50
|
-
answ.should == exp
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
|
data/specs/validator_helper.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
2
|
-
|
3
|
-
class ValidatorHelper
|
4
|
-
def self.precision_from_partition_array(ar)
|
5
|
-
(num_tp, num_fp) = ar.map {|v| v.size}
|
6
|
-
num_tp.to_f / (num_tp + num_fp)
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
module ValidatorHelper::Decoy
|
11
|
-
def self.precision_from_partition_array(ar)
|
12
|
-
(num_maybe_true, num_decoy) = ar.map {|v| v.size}
|
13
|
-
num_tp = num_maybe_true - num_decoy
|
14
|
-
num_fp = num_maybe_true - num_tp
|
15
|
-
num_tp.to_f / (num_tp + num_fp)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
describe 'a validator', :shared => true do
|
20
|
-
before(:each) do
|
21
|
-
@empty_peps = []
|
22
|
-
end
|
23
|
-
it 'gives 1.0 for zero peptides (w/ pephit_precision)' do
|
24
|
-
@validator.pephit_precision(@empty_peps).should == 1.0
|
25
|
-
|
26
|
-
end
|
27
|
-
it 'gives 1.0 for zero peptides (w/ increment_pephits_precision)' do
|
28
|
-
@validator.increment_pephits_precision(@empty_peps).should == 1.0
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
|
33
|
-
|
data/specs/xml_spec.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
|
2
|
-
|
3
|
-
require 'xml'
|
4
|
-
|
5
|
-
describe XML, 'converting duration to seconds' do
|
6
|
-
it 'converts hours/mins/seconds in combinations' do
|
7
|
-
answ = [0.234, 624, 7392.2]
|
8
|
-
%w(PT0.234S PT10M24S PT2H3M12.2S).zip(answ) do |string, answ|
|
9
|
-
XML.duration_to_seconds(string).should == answ
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|