mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,146 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
+
4
+ require 'validator/bias'
5
+
6
+ require File.dirname(__FILE__) + '/fasta_helper'
7
+ require 'spec_id'
8
+
9
+ klass = Validator::Bias
10
+
11
+ describe klass, "on small mock set" do
12
+ before(:each) do
13
+ @peps = (0..6).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
+ references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C NOT_EXISTING1 NOT_EXISTING2 NOT_EXISTING3 NOT_EXISTING4)
15
+ # index: 0 1 2 3 4 5 6 7
16
+ # index: 8
17
+ @prots = references.map do |ref|
18
+ v = SpecID::GenericProt.new
19
+ v.reference = ref + " something else that we don't care about"
20
+ v
21
+ end
22
+
23
+ # e=t we expect to see the fasta proteins in our hit list
24
+ # cw=t a single peptide hit from one of these proteins constitutes a true
25
+ # positive
26
+ # cw=f all peptide hits must come from one of these proteins to be a true
27
+ # positive
28
+ #
29
+ # e=f we do not expect to see the fasta obj proteins in our hit list
30
+ # cw=t a single peptide hit from *outside* this list constitues a true
31
+ # positive
32
+ # cw=f a single peptide hit from our fasta object constitutes a false
33
+ # positive
34
+ #
35
+
36
+ @peps[0].prots = [@prots[0], @prots[5], @prots[8]]
37
+ @peps[1].prots = [@prots[1], @prots[5], @prots[8]]
38
+ @peps[2].prots = [@prots[3], @prots[4], @prots[1]]
39
+ @peps[3].prots = [@prots[7], @prots[8]]
40
+ @peps[4].prots = [@prots[5], @prots[8]]
41
+ @peps[5].prots = [@prots[8]]
42
+ @peps[6].prots = [@prots[5], @prots[6]]
43
+
44
+ #################################################
45
+ # REFERENCE for small mock set:
46
+ #################################################
47
+ # pep 1inFst? allinFst? cw=t,e=t cw=t,e=f cw=f,e=f cw=f,e=t
48
+ # 0 y n t t f f
49
+ # 1 y n t t f f
50
+ # 2 y y t f f t
51
+ # 3 n n f t t f
52
+ # 4 n n f t t f
53
+ # 5 n n f t t f
54
+ # 6 n n f t t f
55
+ # PR: 3/7 6/7 4/7 1/7
56
+ # tp:fp 3:4 6:1 4:3 1:6
57
+
58
+ @fasta_obj = FastaHelper::FastaObj
59
+ @validator = klass.new(@fasta_obj)
60
+ end
61
+
62
+ it_should_behave_like 'a validator'
63
+
64
+ it 'creates correct reference hash' do
65
+ expected = {"YAL001C"=>true, "YAL011W"=>true, "YAL010C"=>true,
66
+ "YAL009W"=>true, "YAL008W"=>true, "YAL007C"=>true, "YAL005C"=>true,
67
+ "YAL004W"=>true, "YAL003W"=>true, "YAL014C"=>true, "YAL013W"=>true,
68
+ "YAL002W"=>true, "YAL012W"=>true
69
+ }
70
+ val = klass.new(@fasta_obj)
71
+ val.short_reference_hash.should == expected
72
+ end
73
+
74
+ it 'gives correct precision and partitions (across all option combinations)' do
75
+ answ = [[3,4], [6,1], [1,6], [4,3]]
76
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
77
+ [true, false].each do |correct_wins|
78
+ [true, false].each do |fasta_expected|
79
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins)
80
+ tp, fp = answ.shift
81
+ exp = calc_precision(tp, fp)
82
+ val.pephit_precision(@peps).should == exp
83
+ act_tp, act_fp = val.partition(@peps)
84
+ act_tp.size.should == tp
85
+ act_fp.size.should == fp
86
+ end
87
+ end
88
+ end
89
+
90
+ it 'correctly incorporates background' do
91
+ answ = [[3,4], [6,1], [1,6], [4,3]]
92
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
93
+ background = 0.24
94
+ [true, false].each do |correct_wins|
95
+ [true, false].each do |fasta_expected|
96
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
97
+ peps_size = @peps.size
98
+ exp_tp, exp_fp = answ.shift
99
+ exp = calc_precision(exp_tp, exp_fp)
100
+ val.pephit_precision(@peps).should_not == exp
101
+ actual_precision = val.pephit_precision(@peps)
102
+ act_tp, act_fp = val.partition(@peps)
103
+ act_tp.size.should == exp_tp
104
+ act_fp.size.should == exp_fp
105
+ exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
106
+ expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
107
+ # internally, the num of false hits is controlled so as not to bottom
108
+ # out below zero, here we control the precision (same effect)
109
+ expected_precision = 1.0 if expected_precision > 1.0
110
+ actual_precision.should == expected_precision
111
+ end
112
+ end
113
+ end
114
+
115
+ it 'correctly incorporates background' do
116
+ answ = [[3,4], [6,1], [1,6], [4,3]]
117
+ # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
118
+ background = 0.24
119
+ [true, false].each do |correct_wins|
120
+ [true, false].each do |fasta_expected|
121
+ val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background)
122
+ peps_size = @peps.size
123
+ exp_tp, exp_fp = answ.shift
124
+ exp = calc_precision(exp_tp, exp_fp)
125
+ val.pephit_precision(@peps).should_not == exp
126
+ actual_precision = val.pephit_precision(@peps)
127
+ act_tp, act_fp = val.partition(@peps)
128
+ act_tp.size.should == exp_tp
129
+ act_fp.size.should == exp_fp
130
+ exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
131
+ expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
132
+ # internally, the num of false hits is controlled so as not to bottom
133
+ # out below zero, here we control the precision (same effect)
134
+ expected_precision = 1.0 if expected_precision > 1.0
135
+ actual_precision.should == expected_precision
136
+ end
137
+ end
138
+ end
139
+
140
+ it_should 'work with false_to_total_ratio!'
141
+
142
+ def calc_precision(tp, fp)
143
+ prec = tp.to_f / (tp + fp)
144
+ end
145
+ end
146
+
@@ -0,0 +1,51 @@
1
+
2
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
4
+
5
+ require 'validator/decoy'
6
+ require 'spec_id'
7
+
8
+ klass = Validator::Decoy
9
+
10
+ describe klass, 'reporting precision on peptides from cat prots' do
11
+
12
+ before(:each) do
13
+ peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
+ prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ FALSE_someOthergi FALSE_AnotherGi FALSE_YetAnotherReference).map do |ref|
15
+ v = SpecID::GenericProt.new
16
+ v.reference = ref
17
+ v
18
+ end
19
+ peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
20
+ peps[1].prots = [prots[1], prots[2]] # TP always
21
+ peps[2].prots = [prots[3], prots[4]] # FP
22
+ peps[3].prots = [prots[2]] # TP
23
+ peps[4].prots = [prots[5]] # FP
24
+ peps[5].prots = [prots[4]] # FP
25
+ @peps = peps
26
+ @validator = klass.new(/FAKE/)
27
+ end
28
+
29
+ it_should_behave_like 'a validator'
30
+
31
+ it 'gives correct precision (across all option combinations)' do
32
+ answ_arr = [
33
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
34
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]],
35
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
36
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
37
+ ]
38
+ protein_matches = [/^FALSE_/, /^FALSE_/, 'Protein', 'Protein']
39
+
40
+ [true, false].each do |incorrect_on_match|
41
+ [true, false].each do |correct_wins|
42
+ val = klass.new(protein_matches.shift, incorrect_on_match, correct_wins)
43
+ answ = val.pephit_precision(@peps)
44
+ exp = ValidatorHelper::Decoy.precision_from_partition_array(answ_arr.shift)
45
+ answ.should == exp
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,26 @@
1
+
2
+
3
+ module FastaHelper
4
+ FastaObj = Fasta.new
5
+ data = {
6
+ '>YAL011W otherstuff' => 'MPAVLRTRSKESSIEQKPASRTRTRSRRGKRGRDDDDDDDDEESDDAYDEVGNDYDEYASRAKLATNRPFEIVAGLPASVELPNYNSSLTHPQSIKNSGVLYDSLVSSRRTWVQGEMFELYWRRPKKIVSESTPAATESPTSGTIPLIRDKMQKMCDCVMSGGPHTFKVRLFILKNDKIEQKWQDEQELKKKEKELKRKNDAEAKRLRMEERKRQQMQKKIAKEQKLQLQKENKAKQKLEQEALKLKRKEEMKKLKEQNKNKQGSPSSSMHDPRMIMNLNLMAQEDPKLNTLMETVAKGLANNSQLEEFKKFIEIAKKRSLEENPVNKRPSVTTTRPAPPSKAKDVAEDHRLNSITLVKSSKTAATEPEPKKADDENAEKQQSKEAKTTAESTQVDVKKEEEDVKEKGVKSEDTQKKEDNQVVPKRKRRKNAIKEDKDMQLTAFQQKYVQGAEIILEYLEFTHSRYYLPKKSVVEFLEDTDEIIISWIVIHNSKEIEKFKTKKIKAKLKADQKLNKEDAKPGSDVEKEVSFNPLFEADCPTPLYTPMTMKLSGIHKRFNQIIRNSVSPMEEVVKEMEKILQIGTRLSGYNLWYQLDGYDDEALSESLRFELNEWEHAMRSRRHKR',
7
+ '>YAL001C otherstuff' => 'MVLTIYPDELVQIVSDKIASNKGKITLNQLWDISGKYFDLSDKKVKQFVLSCVILKKDIEVYCDGAITTKNVTDIIGDANHSYSVGITEDSLWTLLTGYTKKESTIGNSAFELLLEVAKSGEKGINTMDLAQVTGQDPRSVTGRIKKINHLLTSSQLIYKGHVVKQLKLKKFSHDGVDSNPYINIRDHLATIVEVVKRSKNGIRQIIDLKRELKFDKEKRLSKAFIAAIAWLDEKEYLKKVLVVSPKNPAIKIRCVKYVKDIPDSKGSPSFEYDSNSADEDSVSDSKAAFEDEDLVEGLDNFNATDLLQNQGLVMEEKEDAVKNEVLLNRFYPLQNQTYDIADKSGLKGISTMDVVNRITGKEFQRAFTKSSEYYLESVDKQKENTGGYRLFRIYDFEGKKKFFRLFTAQNFQKLTNAEDEISVPKGFDELGKSRTDLKTLNEDNFVALNNTVRFTTDSDGQDIFFWHGELKIPPNSKKTPNKNKRKRQVKNSTNASVAGNISNPKRIKLEQHVSTAQEPKSAEDSPSSNGGTVVKGKVVNFGGFSARSLRSLQRQRAILKVMNTIGGVAYLREQFYESVSKYMGSTTTLDKKTVRGDVDLMVESEKLGARTEPVSGRKIIFLPTVGEDAIQRYILKEKDSKKATFTDVIHDTEIYFFDQTEKNRFHRGKKSVERIRKFQNRQKNAKIKASDDAISKKSTSVNVSDGKIKRRDKKVSAGRTTVVVENTKEDKTVYHAGTKDGVQALIRAVVVTKSIKNEIMWDKITKLFPNNSLDNLKKKWTARRVRMGHSGWRAYVDKWKKMLVLAIKSEKISLRDVEELDLIKLLDIWTSFDEKEIKRPLFLYKNYEENRKKFTLVRDDTLTHSGNDLAMSSMIQREISSLKKTYTRKISASTKDLSKSQSDDYIRTVIRSILIESPSTTRNEIEALKNVGNESIDNVIMDMAKEKQIYLHGSKLECTDTLPDILENRGNYKDFGVAFQYRCKVNELLEAGNAIVINQEPSDISSWVLIDLISGELLNMDVIPMVRNVRPLTYTSRRFEIRTLTPPLIIYANSQTKLNTARKSAVKVPLGKPFSRLWVNGSGSIRPNIWKQVVTMVVNEIIFHPGITLSRLQSRCREVLSLHEISEICKWLLERQVLITTDFDGYWVNHNWYSIYEST',
8
+ '>YAL010C otherstuff' => 'MLPYMDQVLRAFYQSTHWSTQNSYEDITATSRTLLDFRIPSAIHLQISNKSTPNTFNSLDFSTRSRINGSLSYLYSDAQQLEKFMRNSTDIPLQDATETYRQLQPNLNFSVSSANTLSSDNTTVDNDKKLLHDSKFVKKSLYYGRMYYPSSDLEAMIIKRLSPQTQFMLKGVSSFKESLNVLTCYFQRDSHRNLQEWIFSTSDLLCGYRVLHNFLTTPSKFNTSLYNNSSLSLGAEFWLGLVSLSPGCSTTLRYYTHSTNTGRPLTLTLSWQPLFGHISSTYSAKTGTNSTFCAKYDFNLYSIESNLSFGCEFWQKKHHLLETNKNNNDKLEPISDELVDINPNSRATKLLHENVPDLNSAVNDIPSTLDIPVHKQKLLNDLTYAFSSSLRKIDEERSTIEKFDNKINSSIFTSVWKLSTSLRDKTLKLLWEGKWRGFLISAGTELVFTRGFQESLSDDEKNDNAISISATDTENGNIPVFPAKFGIQFQYST',
9
+ '>YAL009W otherstuff' => 'MEPESIGDVGNHAQDDSASIVSGPRRRSTSKTSSAKNIRNSSNISPASMIFRNLLILEDDLRRQAHEQKILKWQFTLFLASMAGVGAFTFYELYFTSDYVKGLHRVILQFTLSFISITVVLFHISGQYRRTIVIPRRFFTSTNKGIRQFNVKLVKVQSTWDEKYTDSVRFVSRTIAYCNIYCLKKFLWLKDDNAIVKFWKSVTIQSQPRIGAVDVKLVLNPRAFSAEIREGWEIYRDEFWAREGARRRKQAHELRPKSE',
10
+ '>YAL008W otherstuff' => 'MTLAFNMQRLVFRNLNVGKRMFKNVPLWRFNVANKLGKPLTRSVGLGGAGIVAGGFYLMNRQPSKLIFNDSLGAAVKQQGPLEPTVGNSTAITEERRNKISSHKQMFLGSLFGVVLGVTVAKISILFMYVGITSMLLCEWLRYKGWIRINLKNIKSVIVLKDVDLKKLLIDGLLGTEYMGFKVFFTLSFVLASLNANK',
11
+ '>YAL007C otherstuff' => 'MIKSTIALPSFFIVLILALVNSVAASSSYAPVAISLPAFSKECLYYDMVTEDDSLAVGYQVLTGGNFEIDFDITAPDGSVITSEKQKKYSDFLLKSFGVGKYTFCFSNNYGTALKKVEITLEKEKTLTDEHEADVNNDDIIANNAVEEIDRNLNKITKTLNYLRAREWRNMSTVNSTESRLTWLSILIIIIIAVISIAQVLLIQFLFTGRQKNYV',
12
+ '>YAL005C otherstuff' => 'MSKAVGIDLGTTYSCVAHFANDRVDIIANDQGNRTTPSFVAFTDTERLIGDAAKNQAAMNPSNTVFDAKRLIGRNFNDPEVQADMKHFPFKLIDVDGKPQIQVEFKGETKNFTPEQISSMVLGKMKETAESYLGAKVNDAVVTVPAYFNDSQRQATKDAGTIAGLNVLRIINEPTAAAIAYGLDKKGKEEHVLIFDLGGGTFDVSLLFIEDGIFEVKATAGDTHLGGEDFDNRLVNHFIQEFKRKNKKDLSTNQRALRRLRTACERAKRTLSSSAQTSVEIDSLFEGIDFYTSITRARFEELCADLFRSTLDPVEKVLRDAKLDKSQVDEIVLVGGSTRIPKVQKLVTDYFNGKEPNRSINPDEAVAYGAAVQAAILTGDESSKTQDLLLLDVAPLSLGIETAGGVMTKLIPRNSTISTKKFEIFSTYADNQPGVLIQVFEGERAKTKDNNLLGKFELSGIPPAPRGVPQIEVTFDVDSNGILNVSAVEKGTGKSNKITITNDKGRLSKEDIEKMVAEAEKFKEEDEKESQRIASKNQLESIAYSLKNTISEAGDKLEQADKDTVTKKAEETISWLDSNTTASKEEFDDKLKELQDIANPIMSKLYQAGGAPGGAAGGAPGGFPGGAPPAPEAEGPTVEEVD',
13
+ '>YAL004W otherstuff' => 'MGVTSGGLNFKDTVFNEQQRDIESTTTQVENQDVFFLTLLVQTVSNGSGGRFVNNTQDIQTSNGTSILGSLSLRIVEVSWDSDDSVIDLGSQVRFGSFLHLTQDHGGDLFWGKVLGFTLKFNLNLRLTVNIDQLEWEVLHVSLHFWVVEVSTDQTLSVENGIRRIHSSLILSSITNQSFSVSESDKRWSGSVTLIVGNNVHTIISKVSNTRVCCT',
14
+ '>YAL003W otherstuff' => 'MASTDFSKIETLKQLNASLADKSYIEGTAVSQADVTVFKAFQSAYPEFSRWFNHIASKADEFDSFPAASAAAAEEEEDDDVDLFGSDDEEADAEAEKLKAERIAAYNAKKAAKPAKPAAKSIVTLDVKPWDDETNLEEMVANVKAIEMEGLTWGAHQFIPIGFGIKKLQINCVVEDDKVSLDDLQQSIEEDEDHVQSTDIAAMQKL',
15
+ '>YAL014C otherstuff' => 'MDVLKLGYELDQLSDLVEERTRLVSVLKLAPTSNDNVTLKRQLGSILELLQKCAPNDELISRYNTILDKIPDTAVDKELYRFQQQVARNTDEVSKESLKKVRFKNDDELTVMYKDDDEQDEESPLPSTHTPYKDEPLQSQLQSQSQPQPPQPMVSNQELFINQQQQLLEQDSHLGALSQSIGRTHDISLDLNNEIVSQNDSLLVDLENLIDNNGRNLNRASRSMHGFNNSRFKDNGNCVIILVLIVVLLLLLLVL',
16
+ '>YAL013W otherstuff' => 'MSQQTPQESEQTTAKEQDLDQESVLSNIDFNTDLNHNLNLSEYCISSDAGTEKMDSDEEKSLANLPELKYAPKLSSLVKQETLTESLKRPHEDEKEAIDEAKKMKVPGENEDESKEEEKSQELEEAIDSKEKSTDARDEQGDEGDNEEENNEEDNENENEHTAPPALVMPSPIEMEEQRMTALKEITDIEYKFAQLRQKLYDNQLVRLQTELQMCLEGSHPELQVYYSKIAAIRDYKLHRAYQRQKYELSCINTETIATRTFIHQDFHKKVTDLRARLLNRTTQTWYDINKERRDMDIVIPDVNYHVPIKLDNKTLSCITGYASAAQLCYPGEPVAEDLACESIEYRYRANPVDKLEVIVDRMRLNNEISDLEGLRKYFHSFPGAPELNPLRDSEINDDFHQWAQCDRHTGPHTTSFCYS',
17
+ '>YAL002W otherstuff' => 'MEQNGLDHDSRSSIDTTINDTQKTFLEFRSYTQLSEKLASSSSYTAPPLNEDGPKGVASAVSQGSESVVSWTTLTHVYSILGAYGGPTCLYPTATYFLMGTSKGCVLIFNYNEHLQTILVPTLSEDPSIHSIRSPVKSIVICSDGTHVAASYETGNICIWNLNVGYRVKPTSEPTNGMTPTPALPAVLHIDDHVNKEITGLDFFGARHTALIVSDRTGKVSLYNGYRRGFWQLVYNSKKILDVNSSKEKLIRSKLSPLISREKISTNLLSVLTTTHFALILLSPHVSLMFQETVEPSVQNSLVVNSSISWTQNCSRVAYSVNNKISVISISSSDFNVQSASHSPEFAESILSIQWIDQLLLGVLTISHQFLVLHPQHDFKILLRLDFLIHDLMIPPNKYFVISRRSFYLLTNYSFKIGKFVSWSDITLRHILKGDYLGALEFIESLLQPYCPLANLLKLDNNTEERTKQLMEPFYNLSLAALRFLIKKDNADYNRVYQLLMVVVRVLQQSSKKLDSIPSLDVFLEQGLEFFELKDNAVYFEVVANIVAQGSVTSISPVLFRSIIDYYAKEENLKVIEDLIIMLNPTTLDVDLAVKLCQKYNLFDLLIYIWNKIFDDYQTPVVDLIYRISNQSEKCVIFNGPQVPPETTIFDYVTYILTGRQYPQNLSISPSDKCSKIQRELSAFIFSGFSIKWPSNSNHKLYICENPEEEPAFPYFHLLLKSNPSRFLAMLNEVFEASLFNDDNDMVASVGEAELVSRQYVIDLLLDAMKDTGNSDNIRVLVAIFIATSISKYPQFIKVSNQALDCVVNTICSSRVQGIYEISQIALESLLPYYHSRTTENFILELKEKNFNKVLFHIYKSENKYASALSLILETKDIEKEYNTDIVSITDYILKKCPPGSLECGKVTEVIETNFDLLLSRIGIEKCVTIFSDFDYNLHQEILEVKNEETQQKYLDKLFSTPNINNKVDKRLRNLHIELNCKYKSKREMILWLNGTVLSNAESLQILDLLNQDSNFEAAAIIHERLESFNLAVRDLLSFIEQCLNEGKTNISTLLESLRRAFDDCNSAGTEKKSCWILLITFLITLYGKYPSHDERKDLCNKLLQEAFLGLVRSKSSSQKDSGGEFWEIMSSVLEHQDVILMKVQDLKQLLLNVFNTYKLERSLSELIQKIIEDSSQDLVQQYRKFLSEGWSIHTDDCEICGKKIWGAGLDPLLFLAWENVQRHQDMISVDLKTPLVIFKCHHGFHQTCLENLAQKPDEYSCLICQTESNPKIV',
18
+ '>YAL012W otherstuff' => 'MTLQESDKFATKAIHAGEHVDVHGSVIEPISLSTTFKQSSPANPIGTYEYSRSQNPNRENLERAVAALENAQYGLAFSSGSATTATILQSLPQGSHAVSIGDVYGGTHRYFTKVANAHGVETSFTNDLLNDLPQLIKENTKLVWIETPTNPTLKVTDIQKVADLIKKHAAGQDVILVVDNTFLSPYISNPLNFGADIVVHSATKYINGHSDVVLGVLATNNKPLYERLQFLQNAIGAIPSPFDAWLTHRGLKTLHLRVRQAALSANKIAEFLAADKENVVAVNYPGLKTHPNYDVVLKQHRDALGGGMISFRIKGGAEAASKFASSTRLFTLAESLGGIESLLEVPAVMTHGGIPKEAREASGVFDDLVRISVGIEDTDDLLEDIKQALKQATN',
19
+ }
20
+ data.map do |header,aaseq|
21
+ FastaObj << Fasta::Prot.new(header, aaseq)
22
+ end
23
+
24
+ end
25
+
26
+
@@ -0,0 +1,141 @@
1
+
2
+
3
+
4
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
5
+
6
+ require 'validator/prot_from_pep'
7
+
8
+ klass = Validator::ProtFromPep
9
+
10
+ describe klass, "on fake, simple prots and peps" do
11
+ before(:each) do
12
+ # create some proteins and peptides linked up
13
+ prots = ('a'..'g').to_a.inject( { } ) do |hash,let|
14
+ prot = OpenStruct.new
15
+ prot.peps = []
16
+ hash[let.to_sym] = prot
17
+ hash
18
+ end
19
+ # prots: a.peps = 0,1,4
20
+ # b.peps = 1
21
+ # c.peps = 2
22
+ # d.peps = 2,5,6
23
+ # e.peps = 2
24
+ # f.peps = 3,4
25
+ # g.peps = 3,4,8,9,10
26
+
27
+ # 0 1 2 3 4 5 6 7
28
+ peps = [[:a], [:a,:b], [:c,:d,:e], [:f,:g], [:a,:f,:g], [:c], [:c], [:d],
29
+ # 8 9 10
30
+ [:g], [:g], [:g]].map do |belongs_to|
31
+ pep = OpenStruct.new
32
+ pep.prots = belongs_to.map {|v| prots[v].peps << pep ; prots[v]}
33
+ pep
34
+ end
35
+ @peps = peps
36
+ @prots = prots
37
+
38
+ @normal_frozen = [[0.971428571428572, 0.0586273344048647], [0.95, 0.0838775640874857], [0.907142857142857, 0.116103957269609], [0.878571428571428, 0.133328857783819], [0.814285714285714, 0.147299354691691], [0.735714285714286, 0.186982368192933], [0.65, 0.18812775328873], [0.535714285714286, 0.206630166671598], [0.414285714285714, 0.178909454503803], [0.228571428571429, 0.117254668809732]]
39
+ @worstcase_frozen = [0.857142857142857, 0.714285714285714, 0.571428571428571, 0.571428571428571, 0.428571428571429, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.142857142857143, 0.142857142857143]
40
+ end
41
+
42
+ it 'calculates normal precision edge cases' do
43
+ val = klass.new
44
+ all_wrong = @peps.size
45
+ val.normal_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == [0.0,0.0]
46
+ val.normal_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
47
+
48
+ val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == [0.0,0.0]
49
+ val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
50
+
51
+ all_right = 0
52
+ val.normal_prothit_precision( @peps, all_right, :num_its => 10).should == [1.0,0.0]
53
+ val.normal_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
54
+ end
55
+
56
+ it 'calculates normal precision that behaves properly' do
57
+ val = klass.new
58
+ prev_mean = 1.0
59
+ (1...(@peps.size)).to_a.zip( @normal_frozen ) do |num_false, expected|
60
+ (mean, stdev) = val.normal_prothit_precision( @peps, num_false, :num_its => 20)
61
+ (mean < prev_mean).should be_true
62
+ (stdev < 0.4 and stdev > 0.0001).should be_true
63
+ mean.should be_close(expected[0], 0.000000001)
64
+ stdev.should be_close(expected[1], 0.000000001)
65
+ val.normal_prothit_precision( @peps, num_false, :num_its => 1).should be_close(mean, 0.25)
66
+ end
67
+ end
68
+
69
+ it 'calculates worstcase edge cases' do
70
+ val = klass.new
71
+ all_wrong = @peps.size
72
+ val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == 0.0
73
+ val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
74
+
75
+ val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == 0.0
76
+ val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
77
+
78
+ all_right = 0
79
+ val.worstcase_prothit_precision( @peps, all_right, :num_its => 10).should == 1.0
80
+ val.worstcase_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
81
+ end
82
+
83
+ it 'calculates worstcase precision that behaves properly' do
84
+
85
+ val = klass.new
86
+ prev_worst = 1.0
87
+ worsts = []
88
+ (1...(@peps.size)).to_a.zip( @worstcase_frozen ) do |num_false, expected|
89
+ worst = val.worstcase_prothit_precision( @peps, num_false, :num_its => 20)
90
+ (worst <= prev_worst).should be_true
91
+ worst.should be_close(expected, 0.0000000001)
92
+ end
93
+
94
+ end
95
+
96
+ it 'calculates prothit precision (worstcase + normal)' do
97
+ val = klass.new
98
+ (1...(@peps.size)).to_a.zip( @normal_frozen, @worstcase_frozen ) do |num_false, normal_expected, worstcase_expected|
99
+ (worst, norm_mean, norm_stdev) = val.prothit_precision( @peps, num_false, :num_its_normal => 20, :num_its_worstcase => 10)
100
+ worst.should be_close(worstcase_expected, 0.0000000001)
101
+ norm_mean.should be_close(normal_expected[0], 0.0000000001)
102
+ norm_stdev.should be_close(normal_expected[1], 0.0000000001)
103
+ end
104
+ end
105
+
106
+ it 'gives 1.0 precision for no pephits' do
107
+ val = klass.new
108
+ val.prothit_precision( [], 0).should == [1.0, 1.0, 0.0]
109
+ end
110
+
111
+ end
112
+
113
+ describe klass, "calculating worstcase prothit precision by numbers" do
114
+ it "calculates precision correctly in easy cases" do
115
+ peps_per_prot = [4,4,3,2,2]
116
+ # no prots completely wrong
117
+ precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 1)
118
+ precision.should == 1
119
+
120
+ # only one protein partially correct
121
+ precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 14)
122
+ precision.should == 0.2
123
+ end
124
+
125
+ it 'works correctly on other cases' do
126
+ # 0 1 2 3 4 5 6 7 8
127
+ expected = [1.0, 5.0/6, 5.0/6, 4.0/6, 4.0/6, 3.0/6, 3.0/6, 3.0/6, 2.0/6,
128
+ # 9 10 11 12 13 14 15 16 17
129
+ 2.0/6, 2.0/6, 2.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 0.0]
130
+ num_peps_per_prot = [5,4,3,2,2,1].sort_by { rand }
131
+ total_peps = num_peps_per_prot.inject(0) {|memo,obj| obj + memo }
132
+ val = klass.new
133
+ (0..total_peps).to_a.zip(expected) do |num_wrong, exp|
134
+ val.worstcase_prothit_precision_by_numbers(num_peps_per_prot, num_wrong).should == exp
135
+ end
136
+ end
137
+
138
+
139
+ end
140
+
141
+
@@ -0,0 +1,145 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
+
4
+ require 'validator/transmem'
5
+ require 'spec_id/digestor'
6
+ require File.dirname(__FILE__) + '/fasta_helper'
7
+ require 'spec_id'
8
+
9
+ klass = Validator::Transmem::Protein
10
+
11
+ describe klass, "on small mock set" do
12
+ before(:each) do
13
+ @toppred_file = Tfiles + '/toppred.small.out'
14
+ @peps = (0..7).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
15
+ # certain: 3 0 0 0 2 3 2 1
16
+ references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C YAL008W YAL009W YAL010C NOTEXISTING1 NOTEXISTING2)
17
+ # index: 0 1 2 3 4 5 6 7
18
+ @prots = references.map do |ref|
19
+ v = SpecID::GenericProt.new
20
+ v.reference = ref
21
+ v
22
+ end
23
+
24
+ # TM (? = both)
25
+ # @prots[8] doesn't have a key in the guy (nil)
26
+ # SHOULD NOT change the results
27
+ @peps[0].prots = [@prots[0], @prots[5], @prots[8]] # y
28
+ @peps[1].prots = [@prots[1], @prots[5], @prots[8]] # ?
29
+ @peps[2].prots = [@prots[3], @prots[4], @prots[8]] # ?
30
+ @peps[3].prots = [@prots[2], @prots[8]] # n
31
+ @peps[4].prots = [@prots[5], @prots[8]] # y
32
+ @peps[5].prots = [@prots[4], @prots[8]] # y
33
+ @peps[6].prots = [@prots[8]] # nil pep
34
+ @peps[7].prots = [@prots[8], @prots[9]] # nil pep
35
+
36
+ @validator = klass.new(@toppred_file)
37
+ end
38
+
39
+ it_should_behave_like 'a validator'
40
+
41
+ it 'gives correct precision with false ratio (across all option combinations)' do
42
+ answ = [[2,4], [0,6], [0,6], [-2,8]].map {|v| calc_precision(*v) }
43
+ [true, false].each do |correct_wins|
44
+ [true, false].each do |soluble_fraction|
45
+ val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => soluble_fraction, :correct_wins => correct_wins)
46
+ val.false_to_total_ratio = 0.5
47
+ val.pephit_precision(@peps).should == answ.shift
48
+ #p val.pephit_precision(@peps)
49
+ end
50
+ end
51
+ end
52
+
53
+ it 'calculates a correct false to total ratio' do
54
+ val = klass.new(@toppred_file)
55
+ fasta_obj = FastaHelper::FastaObj
56
+ sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
57
+ sequest_params_obj.opts['first_database_name'] = 'not_real'
58
+ val.set_false_to_total_ratio( Digestor.digest(fasta_obj, sequest_params_obj) )
59
+ ratio = val.false_to_total_ratio
60
+ num_tps_soluble_peps = 777
61
+ num_fps_insoluble_peps = 741
62
+ expected_ratio = num_tps_soluble_peps.to_f / (num_tps_soluble_peps + num_fps_insoluble_peps)
63
+ ratio.should == expected_ratio
64
+ end
65
+
66
+ it 'can grant transmem status to proteins for speed' do
67
+ val = klass.new(@toppred_file)
68
+ fasta_obj = FastaHelper::FastaObj
69
+ sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
70
+ hash = val.create_transmem_status_hash( Digestor.digest(fasta_obj.prots, sequest_params_obj))
71
+ fasta_obj.prots.each do |prot|
72
+ hash.key?(prot).should be_true
73
+ end
74
+ frozen = [true, true, false, true, false, false, true, false, true, false, true, true, true]
75
+ fasta_obj.prots.map {|prot| hash[prot] }.should == frozen
76
+ end
77
+
78
+ def calc_precision(norm, trans)
79
+ prec = norm.to_f / (norm + trans)
80
+ end
81
+
82
+ it 'can calculate precision incrementally' do
83
+ val = klass.new(@toppred_file, :min_num_tms => 2)
84
+ # usually we'd update the false_to_total_ratio, but not bothering for test
85
+ # here we HAVE to set the status hash before hand... (we could redo this
86
+ # section)
87
+ val.transmem_status_hash = val.create_transmem_status_hash(@peps)
88
+
89
+ # manually done:
90
+ precisions = [0.0, 1.0/2, 2.0/3, 3.0/4, 3.0/5, 3.0/6, 3.0/6, 3.0/6]
91
+
92
+ #frozen:
93
+ calc_bkgs = [1.0, 0.5, 0.333333333333333, 0.25, 0.4, 0.5, 0.5, 0.5]
94
+ #frozen:
95
+ false_to_total_ratios = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
96
+
97
+ @peps.zip(precisions, calc_bkgs, false_to_total_ratios) do |pep, exp_prec, calc_bkg, false_to_total_ratio|
98
+ val.increment_pephits_precision(pep).should == exp_prec
99
+ val.calculated_background.should be_close(calc_bkg, 0.00000000000001)
100
+ val.false_to_total_ratio.should == false_to_total_ratio
101
+ end
102
+ end
103
+
104
+ it 'creates correct reference hash' do
105
+ val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => true, :correct_wins => true)
106
+ val.transmem_by_ti_key.should == {"YAL001C"=>false, "YAL011W"=>false, "YAL009W"=>false, "YAL010C"=>false, "YAL008W"=>true, "YAL007C"=>false, "YAL004W"=>false, "YAL005C"=>false, "YAL003W"=>false, "YAL002W"=>true, "YAL013W"=>false, "YAL014C"=>false, "YAL012W"=>false}
107
+ end
108
+
109
+
110
+ end
111
+
112
+
113
+ #################################################
114
+ # REFERENCE for small mock set:
115
+ #################################################
116
+ # for mintm >= 3 (T = TP, F = FP, sf = soluble_fraction)
117
+ # sf=false sf=true
118
+ # TM cw fw cw fw
119
+ # 0 y T T F F
120
+ # 1 ? T F T F
121
+ # 2 n F F T T
122
+ # 3 n F F T T
123
+ # 4 y T T F F
124
+ # 5 n F F T T
125
+ #
126
+ # [tps, fps]
127
+ # cw=true( sf=true [4,2], sf=false [3,3] )
128
+ # cw=false( sf=true [3,3], sf=false [2,4] )
129
+
130
+ # for mintm >= 2 (T = TP, F = FP, sf = soluble_fraction)
131
+ # sf=false sf=true
132
+ # TM cw fw cw fw
133
+ # 0 y T T F F
134
+ # 1 ? T F T F
135
+ # 2 ? T F T F
136
+ # 3 n F F T T
137
+ # 4 y T T F F
138
+ # 5 y T T F F
139
+ #
140
+ # [tps, fps]
141
+ # cw=true( sf=true [3,3], sf=false [5,1] )
142
+ # cw=false( sf=true [1,5], sf=false [3,3] )
143
+ #
144
+ # sf=true( cw=true [3,3], cw=false[1,5] )
145
+ # sf=false( cw=true [5,1], cw=false[3,3] )
@@ -0,0 +1,58 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+ require File.expand_path(File.dirname(__FILE__) + '/../validator_helper')
3
+
4
+ require 'validator/true_pos'
5
+ require 'fasta'
6
+ require 'spec_id'
7
+
8
+ klass = Validator::TruePos
9
+ describe klass, 'reporting precision on peptides' do
10
+
11
+ before(:each) do
12
+ @myfasta_string =<<END
13
+ >gi|1245235|ProteinX
14
+ ABCDEFGHIJKLMNOP
15
+ >gi|987654|ProteinY
16
+ AAAAAABBBBBBBBBBBB
17
+ >gi|1111111|ProteinZ
18
+ FFFFFFFFFGGGGGGZZZZ
19
+ END
20
+
21
+ @peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
22
+ prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ someOthergi AnotherGi YetAnotherReference).map do |ref|
23
+ v = SpecID::GenericProt.new
24
+ v.reference = ref
25
+ v
26
+ end
27
+ @peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
28
+ @peps[1].prots = [prots[1], prots[2]] # TP always
29
+ @peps[2].prots = [prots[3], prots[4]] # FP
30
+ @peps[3].prots = [prots[2]] # TP
31
+ @peps[4].prots = [prots[5]] # FP
32
+ @peps[5].prots = [prots[4]] # FP
33
+ @myfasta_obj = Fasta.new.load(StringIO.new(@myfasta_string))
34
+
35
+ @validator = klass.new(@myfasta_obj)
36
+ end
37
+
38
+ it_should_behave_like 'a validator'
39
+
40
+ it 'gives correct precision (across all options)' do
41
+ answ_ar = [
42
+ [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
43
+ [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
44
+ ]
45
+
46
+ [true, false].each do |correct_wins|
47
+ val = klass.new(@myfasta_obj, correct_wins)
48
+ answ = val.pephit_precision(@peps)
49
+ exp = ValidatorHelper.precision_from_partition_array(answ_ar.shift)
50
+ answ.should == exp
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
57
+
58
+
@@ -0,0 +1,33 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ class ValidatorHelper
4
+ def self.precision_from_partition_array(ar)
5
+ (num_tp, num_fp) = ar.map {|v| v.size}
6
+ num_tp.to_f / (num_tp + num_fp)
7
+ end
8
+ end
9
+
10
+ module ValidatorHelper::Decoy
11
+ def self.precision_from_partition_array(ar)
12
+ (num_maybe_true, num_decoy) = ar.map {|v| v.size}
13
+ num_tp = num_maybe_true - num_decoy
14
+ num_fp = num_maybe_true - num_tp
15
+ num_tp.to_f / (num_tp + num_fp)
16
+ end
17
+ end
18
+
19
+ describe 'a validator', :shared => true do
20
+ before(:each) do
21
+ @empty_peps = []
22
+ end
23
+ it 'gives 1.0 for zero peptides (w/ pephit_precision)' do
24
+ @validator.pephit_precision(@empty_peps).should == 1.0
25
+
26
+ end
27
+ it 'gives 1.0 for zero peptides (w/ increment_pephits_precision)' do
28
+ @validator.increment_pephits_precision(@empty_peps).should == 1.0
29
+ end
30
+
31
+ end
32
+
33
+
data/specs/xml_spec.rb ADDED
@@ -0,0 +1,12 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
+
3
+ require 'xml'
4
+
5
+ describe XML, 'converting duration to seconds' do
6
+ it 'converts hours/mins/seconds in combinations' do
7
+ answ = [0.234, 624, 7392.2]
8
+ %w(PT0.234S PT10M24S PT2H3M12.2S).zip(answ) do |string, answ|
9
+ XML.duration_to_seconds(string).should == answ
10
+ end
11
+ end
12
+ end