mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,146 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
+
4
+ require 'validator/bias'
5
+
6
+ require File.dirname(__FILE__) + '/fasta_helper'
7
+ require 'spec_id'
8
+
9
+ klass = Validator::Bias
10
+
11
+ describe klass, "on small mock set" do
12
+ before(:each) do
13
+ @peps = (0..6).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
+ references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C NOT_EXISTING1 NOT_EXISTING2 NOT_EXISTING3 NOT_EXISTING4)
15
+ # index: 0 1 2 3 4 5 6 7
16
+ # index: 8
17
+ @prots = references.map do |ref|
18
+ v = SpecID::GenericProt.new
19
+ v.reference = ref + " something else that we don't care about"
20
+ v
21
+ end
22
+
23
+ # e=t we expect to see the fasta proteins in our hit list
24
+ # cw=t a single peptide hit from one of these proteins constitutes a true
25
+ # positive
26
+ # cw=f all peptide hits must come from one of these proteins to be a true
27
+ # positive
28
+ #
29
+ # e=f we do not expect to see the fasta obj proteins in our hit list
30
+ # cw=t a single peptide hit from *outside* this list constitues a true
31
+ # positive
32
+ # cw=f a single peptide hit from our fasta object constitutes a false
33
+ # positive
34
+ #
35
+
36
+ @peps[0].prots = [@prots[0], @prots[5], @prots[8]]
37
+ @peps[1].prots = [@prots[1], @prots[5], @prots[8]]
38
+ @peps[2].prots = [@prots[3], @prots[4], @prots[1]]
39
+ @peps[3].prots = [@prots[7], @prots[8]]
40
+ @peps[4].prots = [@prots[5], @prots[8]]
41
+ @peps[5].prots = [@prots[8]]
42
+ @peps[6].prots = [@prots[5], @prots[6]]
43
+
44
+ #################################################
45
+ # REFERENCE for small mock set:
46
+ #################################################
47
+ # pep 1inFst? allinFst? cw=t,e=t cw=t,e=f cw=f,e=f cw=f,e=t
48
+ # 0 y n t t f f
49
+ # 1 y n t t f f
50
+ # 2 y y t f f t
51
+ # 3 n n f t t f
52
+ # 4 n n f t t f
53
+ # 5 n n f t t f
54
+ # 6 n n f t t f
55
+ # PR: 3/7 6/7 4/7 1/7
56
+ # tp:fp 3:4 6:1 4:3 1:6
57
+
58
+ @fasta_obj = FastaHelper::FastaObj
59
+ @validator = klass.new(@fasta_obj)
60
+ end
61
+
62
+ it_should_behave_like 'a validator'
63
+
64
+ it 'creates correct reference hash' do
65
+ expected = {"YAL001C"=>true, "YAL011W"=>true, "YAL010C"=>true,
66
+ "YAL009W"=>true, "YAL008W"=>true, "YAL007C"=>true, "YAL005C"=>true,
67
+ "YAL004W"=>true, "YAL003W"=>true, "YAL014C"=>true, "YAL013W"=>true,
68
+ "YAL002W"=>true, "YAL012W"=>true
69
+ }
70
+ val = klass.new(@fasta_obj)
71
+ val.short_reference_hash.should == expected
72
+ end
73
+
74
+ it 'gives correct precision and partitions (across all option combinations)' do
75
+ answ = [[3,4], [6,1], [1,6], [4,3]]
76
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
77
+ [true, false].each do |correct_wins|
78
+ [true, false].each do |fasta_expected|
79
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins)
80
+ tp, fp = answ.shift
81
+ exp = calc_precision(tp, fp)
82
+ val.pephit_precision(@peps).should == exp
83
+ act_tp, act_fp = val.partition(@peps)
84
+ act_tp.size.should == tp
85
+ act_fp.size.should == fp
86
+ end
87
+ end
88
+ end
89
+
90
+ it 'correctly incorporates background' do
91
+ answ = [[3,4], [6,1], [1,6], [4,3]]
92
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
93
+ background = 0.24
94
+ [true, false].each do |correct_wins|
95
+ [true, false].each do |fasta_expected|
96
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
97
+ peps_size = @peps.size
98
+ exp_tp, exp_fp = answ.shift
99
+ exp = calc_precision(exp_tp, exp_fp)
100
+ val.pephit_precision(@peps).should_not == exp
101
+ actual_precision = val.pephit_precision(@peps)
102
+ act_tp, act_fp = val.partition(@peps)
103
+ act_tp.size.should == exp_tp
104
+ act_fp.size.should == exp_fp
105
+ exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
106
+ expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
107
+ # internally, the num of false hits is controlled so as not to bottom
108
+ # out below zero, here we control the precision (same effect)
109
+ expected_precision = 1.0 if expected_precision > 1.0
110
+ actual_precision.should == expected_precision
111
+ end
112
+ end
113
+ end
114
+
115
+ it 'correctly incorporates background' do
116
+ answ = [[3,4], [6,1], [1,6], [4,3]]
117
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
118
+ background = 0.24
119
+ [true, false].each do |correct_wins|
120
+ [true, false].each do |fasta_expected|
121
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
122
+ peps_size = @peps.size
123
+ exp_tp, exp_fp = answ.shift
124
+ exp = calc_precision(exp_tp, exp_fp)
125
+ val.pephit_precision(@peps).should_not == exp
126
+ actual_precision = val.pephit_precision(@peps)
127
+ act_tp, act_fp = val.partition(@peps)
128
+ act_tp.size.should == exp_tp
129
+ act_fp.size.should == exp_fp
130
+ exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
131
+ expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
132
+ # internally, the num of false hits is controlled so as not to bottom
133
+ # out below zero, here we control the precision (same effect)
134
+ expected_precision = 1.0 if expected_precision > 1.0
135
+ actual_precision.should == expected_precision
136
+ end
137
+ end
138
+ end
139
+
140
+ it_should 'work with false_to_total_ratio!'
141
+
142
+ def calc_precision(tp, fp)
143
+ prec = tp.to_f / (tp + fp)
144
+ end
145
+ end
146
+
@@ -0,0 +1,51 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
4
+
5
+ require 'validator/decoy'
6
+ require 'spec_id'
7
+
8
+ klass = Validator::Decoy
9
+
10
+ describe klass, 'reporting precision on peptides from cat prots' do
11
+
12
+ before(:each) do
13
+ peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
+ prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ FALSE_someOthergi FALSE_AnotherGi FALSE_YetAnotherReference).map do |ref|
15
+ v = SpecID::GenericProt.new
16
+ v.reference = ref
17
+ v
18
+ end
19
+ peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
20
+ peps[1].prots = [prots[1], prots[2]] # TP always
21
+ peps[2].prots = [prots[3], prots[4]] # FP
22
+ peps[3].prots = [prots[2]] # TP
23
+ peps[4].prots = [prots[5]] # FP
24
+ peps[5].prots = [prots[4]] # FP
25
+ @peps = peps
26
+ @validator = klass.new(/FAKE/)
27
+ end
28
+
29
+ it_should_behave_like 'a validator'
30
+
31
+ it 'gives correct precision (across all option combinations)' do
32
+ answ_arr = [
33
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
34
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]],
35
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
36
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
37
+ ]
38
+ protein_matches = [/^FALSE_/, /^FALSE_/, 'Protein', 'Protein']
39
+
40
+ [true, false].each do |incorrect_on_match|
41
+ [true, false].each do |correct_wins|
42
+ val = klass.new(protein_matches.shift, incorrect_on_match, correct_wins)
43
+ answ = val.pephit_precision(@peps)
44
+ exp = ValidatorHelper::Decoy.precision_from_partition_array(answ_arr.shift)
45
+ answ.should == exp
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,26 @@
1
+
2
+
3
+ module FastaHelper
4
+ FastaObj = Fasta.new
5
+ data = {
6
+ '>YAL011W otherstuff' => 'MPAVLRTRSKESSIEQKPASRTRTRSRRGKRGRDDDDDDDDEESDDAYDEVGNDYDEYASRAKLATNRPFEIVAGLPASVELPNYNSSLTHPQSIKNSGVLYDSLVSSRRTWVQGEMFELYWRRPKKIVSESTPAATESPTSGTIPLIRDKMQKMCDCVMSGGPHTFKVRLFILKNDKIEQKWQDEQELKKKEKELKRKNDAEAKRLRMEERKRQQMQKKIAKEQKLQLQKENKAKQKLEQEALKLKRKEEMKKLKEQNKNKQGSPSSSMHDPRMIMNLNLMAQEDPKLNTLMETVAKGLANNSQLEEFKKFIEIAKKRSLEENPVNKRPSVTTTRPAPPSKAKDVAEDHRLNSITLVKSSKTAATEPEPKKADDENAEKQQSKEAKTTAESTQVDVKKEEEDVKEKGVKSEDTQKKEDNQVVPKRKRRKNAIKEDKDMQLTAFQQKYVQGAEIILEYLEFTHSRYYLPKKSVVEFLEDTDEIIISWIVIHNSKEIEKFKTKKIKAKLKADQKLNKEDAKPGSDVEKEVSFNPLFEADCPTPLYTPMTMKLSGIHKRFNQIIRNSVSPMEEVVKEMEKILQIGTRLSGYNLWYQLDGYDDEALSESLRFELNEWEHAMRSRRHKR',
7
+ '>YAL001C otherstuff' => 'MVLTIYPDELVQIVSDKIASNKGKITLNQLWDISGKYFDLSDKKVKQFVLSCVILKKDIEVYCDGAITTKNVTDIIGDANHSYSVGITEDSLWTLLTGYTKKESTIGNSAFELLLEVAKSGEKGINTMDLAQVTGQDPRSVTGRIKKINHLLTSSQLIYKGHVVKQLKLKKFSHDGVDSNPYINIRDHLATIVEVVKRSKNGIRQIIDLKRELKFDKEKRLSKAFIAAIAWLDEKEYLKKVLVVSPKNPAIKIRCVKYVKDIPDSKGSPSFEYDSNSADEDSVSDSKAAFEDEDLVEGLDNFNATDLLQNQGLVMEEKEDAVKNEVLLNRFYPLQNQTYDIADKSGLKGISTMDVVNRITGKEFQRAFTKSSEYYLESVDKQKENTGGYRLFRIYDFEGKKKFFRLFTAQNFQKLTNAEDEISVPKGFDELGKSRTDLKTLNEDNFVALNNTVRFTTDSDGQDIFFWHGELKIPPNSKKTPNKNKRKRQVKNSTNASVAGNISNPKRIKLEQHVSTAQEPKSAEDSPSSNGGTVVKGKVVNFGGFSARSLRSLQRQRAILKVMNTIGGVAYLREQFYESVSKYMGSTTTLDKKTVRGDVDLMVESEKLGARTEPVSGRKIIFLPTVGEDAIQRYILKEKDSKKATFTDVIHDTEIYFFDQTEKNRFHRGKKSVERIRKFQNRQKNAKIKASDDAISKKSTSVNVSDGKIKRRDKKVSAGRTTVVVENTKEDKTVYHAGTKDGVQALIRAVVVTKSIKNEIMWDKITKLFPNNSLDNLKKKWTARRVRMGHSGWRAYVDKWKKMLVLAIKSEKISLRDVEELDLIKLLDIWTSFDEKEIKRPLFLYKNYEENRKKFTLVRDDTLTHSGNDLAMSSMIQREISSLKKTYTRKISASTKDLSKSQSDDYIRTVIRSILIESPSTTRNEIEALKNVGNESIDNVIMDMAKEKQIYLHGSKLECTDTLPDILENRGNYKDFGVAFQYRCKVNELLEAGNAIVINQEPSDISSWVLIDLISGELLNMDVIPMVRNVRPLTYTSRRFEIRTLTPPLIIYANSQTKLNTARKSAVKVPLGKPFSRLWVNGSGSIRPNIWKQVVTMVVNEIIFHPGITLSRLQSRCREVLSLHEISEICKWLLERQVLITTDFDGYWVNHNWYSIYEST',
8
+ '>YAL010C otherstuff' => 'MLPYMDQVLRAFYQSTHWSTQNSYEDITATSRTLLDFRIPSAIHLQISNKSTPNTFNSLDFSTRSRINGSLSYLYSDAQQLEKFMRNSTDIPLQDATETYRQLQPNLNFSVSSANTLSSDNTTVDNDKKLLHDSKFVKKSLYYGRMYYPSSDLEAMIIKRLSPQTQFMLKGVSSFKESLNVLTCYFQRDSHRNLQEWIFSTSDLLCGYRVLHNFLTTPSKFNTSLYNNSSLSLGAEFWLGLVSLSPGCSTTLRYYTHSTNTGRPLTLTLSWQPLFGHISSTYSAKTGTNSTFCAKYDFNLYSIESNLSFGCEFWQKKHHLLETNKNNNDKLEPISDELVDINPNSRATKLLHENVPDLNSAVNDIPSTLDIPVHKQKLLNDLTYAFSSSLRKIDEERSTIEKFDNKINSSIFTSVWKLSTSLRDKTLKLLWEGKWRGFLISAGTELVFTRGFQESLSDDEKNDNAISISATDTENGNIPVFPAKFGIQFQYST',
9
+ '>YAL009W otherstuff' => 'MEPESIGDVGNHAQDDSASIVSGPRRRSTSKTSSAKNIRNSSNISPASMIFRNLLILEDDLRRQAHEQKILKWQFTLFLASMAGVGAFTFYELYFTSDYVKGLHRVILQFTLSFISITVVLFHISGQYRRTIVIPRRFFTSTNKGIRQFNVKLVKVQSTWDEKYTDSVRFVSRTIAYCNIYCLKKFLWLKDDNAIVKFWKSVTIQSQPRIGAVDVKLVLNPRAFSAEIREGWEIYRDEFWAREGARRRKQAHELRPKSE',
10
+ '>YAL008W otherstuff' => 'MTLAFNMQRLVFRNLNVGKRMFKNVPLWRFNVANKLGKPLTRSVGLGGAGIVAGGFYLMNRQPSKLIFNDSLGAAVKQQGPLEPTVGNSTAITEERRNKISSHKQMFLGSLFGVVLGVTVAKISILFMYVGITSMLLCEWLRYKGWIRINLKNIKSVIVLKDVDLKKLLIDGLLGTEYMGFKVFFTLSFVLASLNANK',
11
+ '>YAL007C otherstuff' => 'MIKSTIALPSFFIVLILALVNSVAASSSYAPVAISLPAFSKECLYYDMVTEDDSLAVGYQVLTGGNFEIDFDITAPDGSVITSEKQKKYSDFLLKSFGVGKYTFCFSNNYGTALKKVEITLEKEKTLTDEHEADVNNDDIIANNAVEEIDRNLNKITKTLNYLRAREWRNMSTVNSTESRLTWLSILIIIIIAVISIAQVLLIQFLFTGRQKNYV',
12
+ '>YAL005C otherstuff' => 'MSKAVGIDLGTTYSCVAHFANDRVDIIANDQGNRTTPSFVAFTDTERLIGDAAKNQAAMNPSNTVFDAKRLIGRNFNDPEVQADMKHFPFKLIDVDGKPQIQVEFKGETKNFTPEQISSMVLGKMKETAESYLGAKVNDAVVTVPAYFNDSQRQATKDAGTIAGLNVLRIINEPTAAAIAYGLDKKGKEEHVLIFDLGGGTFDVSLLFIEDGIFEVKATAGDTHLGGEDFDNRLVNHFIQEFKRKNKKDLSTNQRALRRLRTACERAKRTLSSSAQTSVEIDSLFEGIDFYTSITRARFEELCADLFRSTLDPVEKVLRDAKLDKSQVDEIVLVGGSTRIPKVQKLVTDYFNGKEPNRSINPDEAVAYGAAVQAAILTGDESSKTQDLLLLDVAPLSLGIETAGGVMTKLIPRNSTISTKKFEIFSTYADNQPGVLIQVFEGERAKTKDNNLLGKFELSGIPPAPRGVPQIEVTFDVDSNGILNVSAVEKGTGKSNKITITNDKGRLSKEDIEKMVAEAEKFKEEDEKESQRIASKNQLESIAYSLKNTISEAGDKLEQADKDTVTKKAEETISWLDSNTTASKEEFDDKLKELQDIANPIMSKLYQAGGAPGGAAGGAPGGFPGGAPPAPEAEGPTVEEVD',
13
+ '>YAL004W otherstuff' => 'MGVTSGGLNFKDTVFNEQQRDIESTTTQVENQDVFFLTLLVQTVSNGSGGRFVNNTQDIQTSNGTSILGSLSLRIVEVSWDSDDSVIDLGSQVRFGSFLHLTQDHGGDLFWGKVLGFTLKFNLNLRLTVNIDQLEWEVLHVSLHFWVVEVSTDQTLSVENGIRRIHSSLILSSITNQSFSVSESDKRWSGSVTLIVGNNVHTIISKVSNTRVCCT',
14
+ '>YAL003W otherstuff' => 'MASTDFSKIETLKQLNASLADKSYIEGTAVSQADVTVFKAFQSAYPEFSRWFNHIASKADEFDSFPAASAAAAEEEEDDDVDLFGSDDEEADAEAEKLKAERIAAYNAKKAAKPAKPAAKSIVTLDVKPWDDETNLEEMVANVKAIEMEGLTWGAHQFIPIGFGIKKLQINCVVEDDKVSLDDLQQSIEEDEDHVQSTDIAAMQKL',
15
+ '>YAL014C otherstuff' => 'MDVLKLGYELDQLSDLVEERTRLVSVLKLAPTSNDNVTLKRQLGSILELLQKCAPNDELISRYNTILDKIPDTAVDKELYRFQQQVARNTDEVSKESLKKVRFKNDDELTVMYKDDDEQDEESPLPSTHTPYKDEPLQSQLQSQSQPQPPQPMVSNQELFINQQQQLLEQDSHLGALSQSIGRTHDISLDLNNEIVSQNDSLLVDLENLIDNNGRNLNRASRSMHGFNNSRFKDNGNCVIILVLIVVLLLLLLVL',
16
+ '>YAL013W otherstuff' => 'MSQQTPQESEQTTAKEQDLDQESVLSNIDFNTDLNHNLNLSEYCISSDAGTEKMDSDEEKSLANLPELKYAPKLSSLVKQETLTESLKRPHEDEKEAIDEAKKMKVPGENEDESKEEEKSQELEEAIDSKEKSTDARDEQGDEGDNEEENNEEDNENENEHTAPPALVMPSPIEMEEQRMTALKEITDIEYKFAQLRQKLYDNQLVRLQTELQMCLEGSHPELQVYYSKIAAIRDYKLHRAYQRQKYELSCINTETIATRTFIHQDFHKKVTDLRARLLNRTTQTWYDINKERRDMDIVIPDVNYHVPIKLDNKTLSCITGYASAAQLCYPGEPVAEDLACESIEYRYRANPVDKLEVIVDRMRLNNEISDLEGLRKYFHSFPGAPELNPLRDSEINDDFHQWAQCDRHTGPHTTSFCYS',
17
+ '>YAL002W otherstuff' => 'MEQNGLDHDSRSSIDTTINDTQKTFLEFRSYTQLSEKLASSSSYTAPPLNEDGPKGVASAVSQGSESVVSWTTLTHVYSILGAYGGPTCLYPTATYFLMGTSKGCVLIFNYNEHLQTILVPTLSEDPSIHSIRSPVKSIVICSDGTHVAASYETGNICIWNLNVGYRVKPTSEPTNGMTPTPALPAVLHIDDHVNKEITGLDFFGARHTALIVSDRTGKVSLYNGYRRGFWQLVYNSKKILDVNSSKEKLIRSKLSPLISREKISTNLLSVLTTTHFALILLSPHVSLMFQETVEPSVQNSLVVNSSISWTQNCSRVAYSVNNKISVISISSSDFNVQSASHSPEFAESILSIQWIDQLLLGVLTISHQFLVLHPQHDFKILLRLDFLIHDLMIPPNKYFVISRRSFYLLTNYSFKIGKFVSWSDITLRHILKGDYLGALEFIESLLQPYCPLANLLKLDNNTEERTKQLMEPFYNLSLAALRFLIKKDNADYNRVYQLLMVVVRVLQQSSKKLDSIPSLDVFLEQGLEFFELKDNAVYFEVVANIVAQGSVTSISPVLFRSIIDYYAKEENLKVIEDLIIMLNPTTLDVDLAVKLCQKYNLFDLLIYIWNKIFDDYQTPVVDLIYRISNQSEKCVIFNGPQVPPETTIFDYVTYILTGRQYPQNLSISPSDKCSKIQRELSAFIFSGFSIKWPSNSNHKLYICENPEEEPAFPYFHLLLKSNPSRFLAMLNEVFEASLFNDDNDMVASVGEAELVSRQYVIDLLLDAMKDTGNSDNIRVLVAIFIATSISKYPQFIKVSNQALDCVVNTICSSRVQGIYEISQIALESLLPYYHSRTTENFILELKEKNFNKVLFHIYKSENKYASALSLILETKDIEKEYNTDIVSITDYILKKCPPGSLECGKVTEVIETNFDLLLSRIGIEKCVTIFSDFDYNLHQEILEVKNEETQQKYLDKLFSTPNINNKVDKRLRNLHIELNCKYKSKREMILWLNGTVLSNAESLQILDLLNQDSNFEAAAIIHERLESFNLAVRDLLSFIEQCLNEGKTNISTLLESLRRAFDDCNSAGTEKKSCWILLITFLITLYGKYPSHDERKDLCNKLLQEAFLGLVRSKSSSQKDSGGEFWEIMSSVLEHQDVILMKVQDLKQLLLNVFNTYKLERSLSELIQKIIEDSSQDLVQQYRKFLSEGWSIHTDDCEICGKKIWGAGLDPLLFLAWENVQRHQDMISVDLKTPLVIFKCHHGFHQTCLENLAQKPDEYSCLICQTESNPKIV',
18
+ '>YAL012W otherstuff' => 'MTLQESDKFATKAIHAGEHVDVHGSVIEPISLSTTFKQSSPANPIGTYEYSRSQNPNRENLERAVAALENAQYGLAFSSGSATTATILQSLPQGSHAVSIGDVYGGTHRYFTKVANAHGVETSFTNDLLNDLPQLIKENTKLVWIETPTNPTLKVTDIQKVADLIKKHAAGQDVILVVDNTFLSPYISNPLNFGADIVVHSATKYINGHSDVVLGVLATNNKPLYERLQFLQNAIGAIPSPFDAWLTHRGLKTLHLRVRQAALSANKIAEFLAADKENVVAVNYPGLKTHPNYDVVLKQHRDALGGGMISFRIKGGAEAASKFASSTRLFTLAESLGGIESLLEVPAVMTHGGIPKEAREASGVFDDLVRISVGIEDTDDLLEDIKQALKQATN',
19
+ }
20
+ data.map do |header,aaseq|
21
+ FastaObj << Fasta::Prot.new(header, aaseq)
22
+ end
23
+
24
+ end
25
+
26
+
@@ -0,0 +1,141 @@
1
+
2
+
3
+
4
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
5
+
6
+ require 'validator/prot_from_pep'
7
+
8
+ klass = Validator::ProtFromPep
9
+
10
+ describe klass, "on fake, simple prots and peps" do
11
+ before(:each) do
12
+ # create some proteins and peptides linked up
13
+ prots = ('a'..'g').to_a.inject( { } ) do |hash,let|
14
+ prot = OpenStruct.new
15
+ prot.peps = []
16
+ hash[let.to_sym] = prot
17
+ hash
18
+ end
19
+ # prots: a.peps = 0,1,4
20
+ # b.peps = 1
21
+ # c.peps = 2
22
+ # d.peps = 2,5,6
23
+ # e.peps = 2
24
+ # f.peps = 3,4
25
+ # g.peps = 3,4,8,9,10
26
+
27
+ # 0 1 2 3 4 5 6 7
28
+ peps = [[:a], [:a,:b], [:c,:d,:e], [:f,:g], [:a,:f,:g], [:c], [:c], [:d],
29
+ # 8 9 10
30
+ [:g], [:g], [:g]].map do |belongs_to|
31
+ pep = OpenStruct.new
32
+ pep.prots = belongs_to.map {|v| prots[v].peps << pep ; prots[v]}
33
+ pep
34
+ end
35
+ @peps = peps
36
+ @prots = prots
37
+
38
+ @normal_frozen = [[0.971428571428572, 0.0586273344048647], [0.95, 0.0838775640874857], [0.907142857142857, 0.116103957269609], [0.878571428571428, 0.133328857783819], [0.814285714285714, 0.147299354691691], [0.735714285714286, 0.186982368192933], [0.65, 0.18812775328873], [0.535714285714286, 0.206630166671598], [0.414285714285714, 0.178909454503803], [0.228571428571429, 0.117254668809732]]
39
+ @worstcase_frozen = [0.857142857142857, 0.714285714285714, 0.571428571428571, 0.571428571428571, 0.428571428571429, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.142857142857143, 0.142857142857143]
40
+ end
41
+
42
+ it 'calculates normal precision edge cases' do
43
+ val = klass.new
44
+ all_wrong = @peps.size
45
+ val.normal_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == [0.0,0.0]
46
+ val.normal_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
47
+
48
+ val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == [0.0,0.0]
49
+ val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
50
+
51
+ all_right = 0
52
+ val.normal_prothit_precision( @peps, all_right, :num_its => 10).should == [1.0,0.0]
53
+ val.normal_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
54
+ end
55
+
56
+ it 'calculates normal precision that behaves properly' do
57
+ val = klass.new
58
+ prev_mean = 1.0
59
+ (1...(@peps.size)).to_a.zip( @normal_frozen ) do |num_false, expected|
60
+ (mean, stdev) = val.normal_prothit_precision( @peps, num_false, :num_its => 20)
61
+ (mean < prev_mean).should be_true
62
+ (stdev < 0.4 and stdev > 0.0001).should be_true
63
+ mean.should be_close(expected[0], 0.000000001)
64
+ stdev.should be_close(expected[1], 0.000000001)
65
+ val.normal_prothit_precision( @peps, num_false, :num_its => 1).should be_close(mean, 0.25)
66
+ end
67
+ end
68
+
69
+ it 'calculates worstcase edge cases' do
70
+ val = klass.new
71
+ all_wrong = @peps.size
72
+ val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == 0.0
73
+ val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
74
+
75
+ val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == 0.0
76
+ val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
77
+
78
+ all_right = 0
79
+ val.worstcase_prothit_precision( @peps, all_right, :num_its => 10).should == 1.0
80
+ val.worstcase_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
81
+ end
82
+
83
+ it 'calculates worstcase precision that behaves properly' do
84
+
85
+ val = klass.new
86
+ prev_worst = 1.0
87
+ worsts = []
88
+ (1...(@peps.size)).to_a.zip( @worstcase_frozen ) do |num_false, expected|
89
+ worst = val.worstcase_prothit_precision( @peps, num_false, :num_its => 20)
90
+ (worst <= prev_worst).should be_true
91
+ worst.should be_close(expected, 0.0000000001)
92
+ end
93
+
94
+ end
95
+
96
+ it 'calculates prothit precision (worstcase + normal)' do
97
+ val = klass.new
98
+ (1...(@peps.size)).to_a.zip( @normal_frozen, @worstcase_frozen ) do |num_false, normal_expected, worstcase_expected|
99
+ (worst, norm_mean, norm_stdev) = val.prothit_precision( @peps, num_false, :num_its_normal => 20, :num_its_worstcase => 10)
100
+ worst.should be_close(worstcase_expected, 0.0000000001)
101
+ norm_mean.should be_close(normal_expected[0], 0.0000000001)
102
+ norm_stdev.should be_close(normal_expected[1], 0.0000000001)
103
+ end
104
+ end
105
+
106
+ it 'gives 1.0 precision for no pephits' do
107
+ val = klass.new
108
+ val.prothit_precision( [], 0).should == [1.0, 1.0, 0.0]
109
+ end
110
+
111
+ end
112
+
113
+ describe klass, "calculating worstcase prothit precision by numbers" do
114
+ it "calculates precision correctly in easy cases" do
115
+ peps_per_prot = [4,4,3,2,2]
116
+ # no prots completely wrong
117
+ precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 1)
118
+ precision.should == 1
119
+
120
+ # only one protein partially correct
121
+ precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 14)
122
+ precision.should == 0.2
123
+ end
124
+
125
+ it 'works correctly on other cases' do
126
+ # 0 1 2 3 4 5 6 7 8
127
+ expected = [1.0, 5.0/6, 5.0/6, 4.0/6, 4.0/6, 3.0/6, 3.0/6, 3.0/6, 2.0/6,
128
+ # 9 10 11 12 13 14 15 16 17
129
+ 2.0/6, 2.0/6, 2.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 0.0]
130
+ num_peps_per_prot = [5,4,3,2,2,1].sort_by { rand }
131
+ total_peps = num_peps_per_prot.inject(0) {|memo,obj| obj + memo }
132
+ val = klass.new
133
+ (0..total_peps).to_a.zip(expected) do |num_wrong, exp|
134
+ val.worstcase_prothit_precision_by_numbers(num_peps_per_prot, num_wrong).should == exp
135
+ end
136
+ end
137
+
138
+
139
+ end
140
+
141
+
@@ -0,0 +1,145 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
+
4
+ require 'validator/transmem'
5
+ require 'spec_id/digestor'
6
+ require File.dirname(__FILE__) + '/fasta_helper'
7
+ require 'spec_id'
8
+
9
+ klass = Validator::Transmem::Protein
10
+
11
+ describe klass, "on small mock set" do
12
+ before(:each) do
13
+ @toppred_file = Tfiles + '/toppred.small.out'
14
+ @peps = (0..7).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
15
+ # certain: 3 0 0 0 2 3 2 1
16
+ references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C YAL008W YAL009W YAL010C NOTEXISTING1 NOTEXISTING2)
17
+ # index: 0 1 2 3 4 5 6 7
18
+ @prots = references.map do |ref|
19
+ v = SpecID::GenericProt.new
20
+ v.reference = ref
21
+ v
22
+ end
23
+
24
+ # TM (? = both)
25
+ # @prots[8] doesn't have a key in the guy (nil)
26
+ # SHOULD NOT change the results
27
+ @peps[0].prots = [@prots[0], @prots[5], @prots[8]] # y
28
+ @peps[1].prots = [@prots[1], @prots[5], @prots[8]] # ?
29
+ @peps[2].prots = [@prots[3], @prots[4], @prots[8]] # ?
30
+ @peps[3].prots = [@prots[2], @prots[8]] # n
31
+ @peps[4].prots = [@prots[5], @prots[8]] # y
32
+ @peps[5].prots = [@prots[4], @prots[8]] # y
33
+ @peps[6].prots = [@prots[8]] # nil pep
34
+ @peps[7].prots = [@prots[8], @prots[9]] # nil pep
35
+
36
+ @validator = klass.new(@toppred_file)
37
+ end
38
+
39
+ it_should_behave_like 'a validator'
40
+
41
+ it 'gives correct precision with false ratio (across all option combinations)' do
42
+ answ = [[2,4], [0,6], [0,6], [-2,8]].map {|v| calc_precision(*v) }
43
+ [true, false].each do |correct_wins|
44
+ [true, false].each do |soluble_fraction|
45
+ val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => soluble_fraction, :correct_wins => correct_wins)
46
+ val.false_to_total_ratio = 0.5
47
+ val.pephit_precision(@peps).should == answ.shift
48
+ #p val.pephit_precision(@peps)
49
+ end
50
+ end
51
+ end
52
+
53
+ it 'calculates a correct false to total ratio' do
54
+ val = klass.new(@toppred_file)
55
+ fasta_obj = FastaHelper::FastaObj
56
+ sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
57
+ sequest_params_obj.opts['first_database_name'] = 'not_real'
58
+ val.set_false_to_total_ratio( Digestor.digest(fasta_obj, sequest_params_obj) )
59
+ ratio = val.false_to_total_ratio
60
+ num_tps_soluble_peps = 777
61
+ num_fps_insoluble_peps = 741
62
+ expected_ratio = num_tps_soluble_peps.to_f / (num_tps_soluble_peps + num_fps_insoluble_peps)
63
+ ratio.should == expected_ratio
64
+ end
65
+
66
+ it 'can grant transmem status to proteins for speed' do
67
+ val = klass.new(@toppred_file)
68
+ fasta_obj = FastaHelper::FastaObj
69
+ sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
70
+ hash = val.create_transmem_status_hash( Digestor.digest(fasta_obj.prots, sequest_params_obj))
71
+ fasta_obj.prots.each do |prot|
72
+ hash.key?(prot).should be_true
73
+ end
74
+ frozen = [true, true, false, true, false, false, true, false, true, false, true, true, true]
75
+ fasta_obj.prots.map {|prot| hash[prot] }.should == frozen
76
+ end
77
+
78
+ def calc_precision(norm, trans)
79
+ prec = norm.to_f / (norm + trans)
80
+ end
81
+
82
+ it 'can calculate precision incrementally' do
83
+ val = klass.new(@toppred_file, :min_num_tms => 2)
84
+ # usually we'd update the false_to_total_ratio, but not bothering for test
85
+ # here we HAVE to set the status hash before hand... (we could redo this
86
+ # section)
87
+ val.transmem_status_hash = val.create_transmem_status_hash(@peps)
88
+
89
+ # manually done:
90
+ precisions = [0.0, 1.0/2, 2.0/3, 3.0/4, 3.0/5, 3.0/6, 3.0/6, 3.0/6]
91
+
92
+ #frozen:
93
+ calc_bkgs = [1.0, 0.5, 0.333333333333333, 0.25, 0.4, 0.5, 0.5, 0.5]
94
+ #frozen:
95
+ false_to_total_ratios = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
96
+
97
+ @peps.zip(precisions, calc_bkgs, false_to_total_ratios) do |pep, exp_prec, calc_bkg, false_to_total_ratio|
98
+ val.increment_pephits_precision(pep).should == exp_prec
99
+ val.calculated_background.should be_close(calc_bkg, 0.00000000000001)
100
+ val.false_to_total_ratio.should == false_to_total_ratio
101
+ end
102
+ end
103
+
104
+ it 'creates correct reference hash' do
105
+ val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => true, :correct_wins => true)
106
+ val.transmem_by_ti_key.should == {"YAL001C"=>false, "YAL011W"=>false, "YAL009W"=>false, "YAL010C"=>false, "YAL008W"=>true, "YAL007C"=>false, "YAL004W"=>false, "YAL005C"=>false, "YAL003W"=>false, "YAL002W"=>true, "YAL013W"=>false, "YAL014C"=>false, "YAL012W"=>false}
107
+ end
108
+
109
+
110
+ end
111
+
112
+
113
+ #################################################
114
+ # REFERENCE for small mock set:
115
+ #################################################
116
+ # for mintm >= 3 (T = TP, F = FP, sf = soluble_fraction)
117
+ # sf=false sf=true
118
+ # TM cw fw cw fw
119
+ # 0 y T T F F
120
+ # 1 ? T F T F
121
+ # 2 n F F T T
122
+ # 3 n F F T T
123
+ # 4 y T T F F
124
+ # 5 n F F T T
125
+ #
126
+ # [tps, fps]
127
+ # cw=true( sf=true [4,2], sf=false [3,3] )
128
+ # cw=false( sf=true [3,3], sf=false [2,4] )
129
+
130
+ # for mintm >= 2 (T = TP, F = FP, sf = soluble_fraction)
131
+ # sf=false sf=true
132
+ # TM cw fw cw fw
133
+ # 0 y T T F F
134
+ # 1 ? T F T F
135
+ # 2 ? T F T F
136
+ # 3 n F F T T
137
+ # 4 y T T F F
138
+ # 5 y T T F F
139
+ #
140
+ # [tps, fps]
141
+ # cw=true( sf=true [3,3], sf=false [5,1] )
142
+ # cw=false( sf=true [1,5], sf=false [3,3] )
143
+ #
144
+ # sf=true( cw=true [3,3], cw=false[1,5] )
145
+ # sf=false( cw=true [5,1], cw=false[3,3] )
@@ -0,0 +1,58 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path(File.dirname(__FILE__) + '/../validator_helper')
3
+
4
+ require 'validator/true_pos'
5
+ require 'fasta'
6
+ require 'spec_id'
7
+
8
+ klass = Validator::TruePos
9
+ describe klass, 'reporting precision on peptides' do
10
+
11
+ before(:each) do
12
+ @myfasta_string =<<END
13
+ >gi|1245235|ProteinX
14
+ ABCDEFGHIJKLMNOP
15
+ >gi|987654|ProteinY
16
+ AAAAAABBBBBBBBBBBB
17
+ >gi|1111111|ProteinZ
18
+ FFFFFFFFFGGGGGGZZZZ
19
+ END
20
+
21
+ @peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
22
+ prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ someOthergi AnotherGi YetAnotherReference).map do |ref|
23
+ v = SpecID::GenericProt.new
24
+ v.reference = ref
25
+ v
26
+ end
27
+ @peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
28
+ @peps[1].prots = [prots[1], prots[2]] # TP always
29
+ @peps[2].prots = [prots[3], prots[4]] # FP
30
+ @peps[3].prots = [prots[2]] # TP
31
+ @peps[4].prots = [prots[5]] # FP
32
+ @peps[5].prots = [prots[4]] # FP
33
+ @myfasta_obj = Fasta.new.load(StringIO.new(@myfasta_string))
34
+
35
+ @validator = klass.new(@myfasta_obj)
36
+ end
37
+
38
+ it_should_behave_like 'a validator'
39
+
40
+ it 'gives correct precision (across all options)' do
41
+ answ_ar = [
42
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
43
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
44
+ ]
45
+
46
+ [true, false].each do |correct_wins|
47
+ val = klass.new(@myfasta_obj, correct_wins)
48
+ answ = val.pephit_precision(@peps)
49
+ exp = ValidatorHelper.precision_from_partition_array(answ_ar.shift)
50
+ answ.should == exp
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
57
+
58
+
@@ -0,0 +1,33 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ class ValidatorHelper
4
+ def self.precision_from_partition_array(ar)
5
+ (num_tp, num_fp) = ar.map {|v| v.size}
6
+ num_tp.to_f / (num_tp + num_fp)
7
+ end
8
+ end
9
+
10
+ module ValidatorHelper::Decoy
11
+ def self.precision_from_partition_array(ar)
12
+ (num_maybe_true, num_decoy) = ar.map {|v| v.size}
13
+ num_tp = num_maybe_true - num_decoy
14
+ num_fp = num_maybe_true - num_tp
15
+ num_tp.to_f / (num_tp + num_fp)
16
+ end
17
+ end
18
+
19
+ describe 'a validator', :shared => true do
20
+ before(:each) do
21
+ @empty_peps = []
22
+ end
23
+ it 'gives 1.0 for zero peptides (w/ pephit_precision)' do
24
+ @validator.pephit_precision(@empty_peps).should == 1.0
25
+
26
+ end
27
+ it 'gives 1.0 for zero peptides (w/ increment_pephits_precision)' do
28
+ @validator.increment_pephits_precision(@empty_peps).should == 1.0
29
+ end
30
+
31
+ end
32
+
33
+
data/specs/xml_spec.rb ADDED
@@ -0,0 +1,12 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'xml'
4
+
5
+ describe XML, 'converting duration to seconds' do
6
+ it 'converts hours/mins/seconds in combinations' do
7
+ answ = [0.234, 624, 7392.2]
8
+ %w(PT0.234S PT10M24S PT2H3M12.2S).zip(answ) do |string, answ|
9
+ XML.duration_to_seconds(string).should == answ
10
+ end
11
+ end
12
+ end