mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,122 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
- require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
-
4
- require 'validator/bias'
5
-
6
- require File.dirname(__FILE__) + '/fasta_helper'
7
- require 'spec_id'
8
-
9
- klass = Validator::Bias
10
-
11
- describe klass, "on small mock set" do
12
- before(:each) do
13
- @peps = (0..6).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
- references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C NOT_EXISTING1 NOT_EXISTING2 NOT_EXISTING3 NOT_EXISTING4)
15
- # index: 0 1 2 3 4 5 6 7
16
- # index: 8
17
- @prots = references.map do |ref|
18
- v = SpecID::GenericProt.new
19
- v.reference = ref + " something else that we don't care about"
20
- v
21
- end
22
-
23
- # e=t we expect to see the fasta proteins in our hit list
24
- # cw=t a single peptide hit from one of these proteins constitutes a true
25
- # positive
26
- # cw=f all peptide hits must come from one of these proteins to be a true
27
- # positive
28
- #
29
- # e=f we do not expect to see the fasta obj proteins in our hit list
30
- # cw=t a single peptide hit from *outside* this list constitues a true
31
- # positive
32
- # cw=f a single peptide hit from our fasta object constitutes a false
33
- # positive
34
- #
35
-
36
- @peps[0].prots = [@prots[0], @prots[5], @prots[8]]
37
- @peps[1].prots = [@prots[1], @prots[5], @prots[8]]
38
- @peps[2].prots = [@prots[3], @prots[4], @prots[1]]
39
- @peps[3].prots = [@prots[7], @prots[8]]
40
- @peps[4].prots = [@prots[5], @prots[8]]
41
- @peps[5].prots = [@prots[8]]
42
- @peps[6].prots = [@prots[5], @prots[6]]
43
-
44
- #################################################
45
- # REFERENCE for small mock set:
46
- #################################################
47
- # pep 1inFst? allinFst? cw=t,e=t cw=t,e=f cw=f,e=f cw=f,e=t
48
- # 0 y n t t f f
49
- # 1 y n t t f f
50
- # 2 y y t f f t
51
- # 3 n n f t t f
52
- # 4 n n f t t f
53
- # 5 n n f t t f
54
- # 6 n n f t t f
55
- # PR: 3/7 6/7 4/7 1/7
56
- # tp:fp 3:4 6:1 4:3 1:6
57
-
58
- @fasta_obj = FastaHelper::FastaObj
59
- @validator = klass.new(@fasta_obj)
60
- @validator.false_to_total_ratio = 0.22 # arbitrary
61
- end
62
-
63
- it_should_behave_like 'a validator'
64
-
65
- it 'creates correct reference hash' do
66
- expected = {"YAL001C"=>true, "YAL011W"=>true, "YAL010C"=>true,
67
- "YAL009W"=>true, "YAL008W"=>true, "YAL007C"=>true, "YAL005C"=>true,
68
- "YAL004W"=>true, "YAL003W"=>true, "YAL014C"=>true, "YAL013W"=>true,
69
- "YAL002W"=>true, "YAL012W"=>true
70
- }
71
- val = klass.new(@fasta_obj)
72
- val.short_reference_hash.should == expected
73
- end
74
-
75
- it 'gives correct precision and partitions (across all option combinations)' do
76
- answ = [[3,4], [6,1], [1,6], [4,3]]
77
- # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
78
- [true, false].each do |correct_wins|
79
- [true, false].each do |fasta_expected|
80
- val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :false_to_total_ratio => 1.0)
81
- tp, fp = answ.shift
82
- exp = calc_precision(tp, fp)
83
- val.pephit_precision(@peps).should == exp
84
- act_tp, act_fp = val.partition(@peps)
85
- act_tp.size.should == tp
86
- act_fp.size.should == fp
87
- end
88
- end
89
- end
90
-
91
- it 'correctly incorporates background' do
92
- answ = [[3,4], [6,1], [1,6], [4,3]]
93
- # cw=t,e=t; cw=t,e=f; cw=f,e=t; cw=f,e=f
94
- background = 0.24
95
- [true, false].each do |correct_wins|
96
- [true, false].each do |fasta_expected|
97
- val = klass.new(@fasta_obj, :proteins_expected => fasta_expected, :correct_wins => correct_wins, :background => background, :false_to_total_ratio => 1.0)
98
- peps_size = @peps.size
99
- exp_tp, exp_fp = answ.shift
100
- exp = calc_precision(exp_tp, exp_fp)
101
- val.pephit_precision(@peps).should_not == exp
102
- actual_precision = val.pephit_precision(@peps)
103
- act_tp, act_fp = val.partition(@peps)
104
- act_tp.size.should == exp_tp
105
- act_fp.size.should == exp_fp
106
- exp_fp_correctd = exp_fp.to_f - (peps_size.to_f * background)
107
- expected_precision = calc_precision(peps_size.to_f - exp_fp_correctd, exp_fp_correctd)
108
- # internally, the num of false hits is controlled so as not to bottom
109
- # out below zero, here we control the precision (same effect)
110
- expected_precision = 1.0 if expected_precision > 1.0
111
- actual_precision.should == expected_precision
112
- end
113
- end
114
- end
115
-
116
- it_should 'work with false_to_total_ratio!'
117
-
118
- def calc_precision(tp, fp)
119
- prec = tp.to_f / (tp + fp)
120
- end
121
- end
122
-
@@ -1,51 +0,0 @@
1
-
2
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
3
- require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
4
-
5
- require 'validator/decoy'
6
- require 'spec_id'
7
-
8
- klass = Validator::Decoy
9
-
10
- describe klass, 'reporting precision on peptides from cat prots' do
11
-
12
- before(:each) do
13
- peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
14
- prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ FALSE_someOthergi FALSE_AnotherGi FALSE_YetAnotherReference).map do |ref|
15
- v = SpecID::GenericProt.new
16
- v.reference = ref
17
- v
18
- end
19
- peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
20
- peps[1].prots = [prots[1], prots[2]] # TP always
21
- peps[2].prots = [prots[3], prots[4]] # FP
22
- peps[3].prots = [prots[2]] # TP
23
- peps[4].prots = [prots[5]] # FP
24
- peps[5].prots = [prots[4]] # FP
25
- @peps = peps
26
- @validator = klass.new(:constraint => /FAKE/)
27
- end
28
-
29
- it_should_behave_like 'a validator'
30
-
31
- it 'gives correct precision (across all option combinations)' do
32
- answ_arr = [
33
- [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
34
- [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]],
35
- [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
36
- [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
37
- ]
38
- protein_matches = [/^FALSE_/, /^FALSE_/, 'Protein', 'Protein']
39
-
40
- [true, false].each do |incorrect_on_match|
41
- [true, false].each do |correct_wins|
42
- val = klass.new(:constraint => protein_matches.shift, :decoy_on_match => incorrect_on_match, :correct_wins => correct_wins)
43
- answ = val.pephit_precision(@peps)
44
- exp = ValidatorHelper::Decoy.precision_from_partition_array(answ_arr.shift)
45
- answ.should == exp
46
- end
47
- end
48
- end
49
-
50
- end
51
-
@@ -1,26 +0,0 @@
1
-
2
-
3
- module FastaHelper
4
- FastaObj = Fasta.new
5
- data = {
6
- '>YAL011W otherstuff' => 'MPAVLRTRSKESSIEQKPASRTRTRSRRGKRGRDDDDDDDDEESDDAYDEVGNDYDEYASRAKLATNRPFEIVAGLPASVELPNYNSSLTHPQSIKNSGVLYDSLVSSRRTWVQGEMFELYWRRPKKIVSESTPAATESPTSGTIPLIRDKMQKMCDCVMSGGPHTFKVRLFILKNDKIEQKWQDEQELKKKEKELKRKNDAEAKRLRMEERKRQQMQKKIAKEQKLQLQKENKAKQKLEQEALKLKRKEEMKKLKEQNKNKQGSPSSSMHDPRMIMNLNLMAQEDPKLNTLMETVAKGLANNSQLEEFKKFIEIAKKRSLEENPVNKRPSVTTTRPAPPSKAKDVAEDHRLNSITLVKSSKTAATEPEPKKADDENAEKQQSKEAKTTAESTQVDVKKEEEDVKEKGVKSEDTQKKEDNQVVPKRKRRKNAIKEDKDMQLTAFQQKYVQGAEIILEYLEFTHSRYYLPKKSVVEFLEDTDEIIISWIVIHNSKEIEKFKTKKIKAKLKADQKLNKEDAKPGSDVEKEVSFNPLFEADCPTPLYTPMTMKLSGIHKRFNQIIRNSVSPMEEVVKEMEKILQIGTRLSGYNLWYQLDGYDDEALSESLRFELNEWEHAMRSRRHKR',
7
- '>YAL001C otherstuff' => 'MVLTIYPDELVQIVSDKIASNKGKITLNQLWDISGKYFDLSDKKVKQFVLSCVILKKDIEVYCDGAITTKNVTDIIGDANHSYSVGITEDSLWTLLTGYTKKESTIGNSAFELLLEVAKSGEKGINTMDLAQVTGQDPRSVTGRIKKINHLLTSSQLIYKGHVVKQLKLKKFSHDGVDSNPYINIRDHLATIVEVVKRSKNGIRQIIDLKRELKFDKEKRLSKAFIAAIAWLDEKEYLKKVLVVSPKNPAIKIRCVKYVKDIPDSKGSPSFEYDSNSADEDSVSDSKAAFEDEDLVEGLDNFNATDLLQNQGLVMEEKEDAVKNEVLLNRFYPLQNQTYDIADKSGLKGISTMDVVNRITGKEFQRAFTKSSEYYLESVDKQKENTGGYRLFRIYDFEGKKKFFRLFTAQNFQKLTNAEDEISVPKGFDELGKSRTDLKTLNEDNFVALNNTVRFTTDSDGQDIFFWHGELKIPPNSKKTPNKNKRKRQVKNSTNASVAGNISNPKRIKLEQHVSTAQEPKSAEDSPSSNGGTVVKGKVVNFGGFSARSLRSLQRQRAILKVMNTIGGVAYLREQFYESVSKYMGSTTTLDKKTVRGDVDLMVESEKLGARTEPVSGRKIIFLPTVGEDAIQRYILKEKDSKKATFTDVIHDTEIYFFDQTEKNRFHRGKKSVERIRKFQNRQKNAKIKASDDAISKKSTSVNVSDGKIKRRDKKVSAGRTTVVVENTKEDKTVYHAGTKDGVQALIRAVVVTKSIKNEIMWDKITKLFPNNSLDNLKKKWTARRVRMGHSGWRAYVDKWKKMLVLAIKSEKISLRDVEELDLIKLLDIWTSFDEKEIKRPLFLYKNYEENRKKFTLVRDDTLTHSGNDLAMSSMIQREISSLKKTYTRKISASTKDLSKSQSDDYIRTVIRSILIESPSTTRNEIEALKNVGNESIDNVIMDMAKEKQIYLHGSKLECTDTLPDILENRGNYKDFGVAFQYRCKVNELLEAGNAIVINQEPSDISSWVLIDLISGELLNMDVIPMVRNVRPLTYTSRRFEIRTLTPPLIIYANSQTKLNTARKSAVKVPLGKPFSRLWVNGSGSIRPNIWKQVVTMVVNEIIFHPGITLSRLQSRCREVLSLHEISEICKWLLERQVLITTDFDGYWVNHNWYSIYEST',
8
- '>YAL010C otherstuff' => 'MLPYMDQVLRAFYQSTHWSTQNSYEDITATSRTLLDFRIPSAIHLQISNKSTPNTFNSLDFSTRSRINGSLSYLYSDAQQLEKFMRNSTDIPLQDATETYRQLQPNLNFSVSSANTLSSDNTTVDNDKKLLHDSKFVKKSLYYGRMYYPSSDLEAMIIKRLSPQTQFMLKGVSSFKESLNVLTCYFQRDSHRNLQEWIFSTSDLLCGYRVLHNFLTTPSKFNTSLYNNSSLSLGAEFWLGLVSLSPGCSTTLRYYTHSTNTGRPLTLTLSWQPLFGHISSTYSAKTGTNSTFCAKYDFNLYSIESNLSFGCEFWQKKHHLLETNKNNNDKLEPISDELVDINPNSRATKLLHENVPDLNSAVNDIPSTLDIPVHKQKLLNDLTYAFSSSLRKIDEERSTIEKFDNKINSSIFTSVWKLSTSLRDKTLKLLWEGKWRGFLISAGTELVFTRGFQESLSDDEKNDNAISISATDTENGNIPVFPAKFGIQFQYST',
9
- '>YAL009W otherstuff' => 'MEPESIGDVGNHAQDDSASIVSGPRRRSTSKTSSAKNIRNSSNISPASMIFRNLLILEDDLRRQAHEQKILKWQFTLFLASMAGVGAFTFYELYFTSDYVKGLHRVILQFTLSFISITVVLFHISGQYRRTIVIPRRFFTSTNKGIRQFNVKLVKVQSTWDEKYTDSVRFVSRTIAYCNIYCLKKFLWLKDDNAIVKFWKSVTIQSQPRIGAVDVKLVLNPRAFSAEIREGWEIYRDEFWAREGARRRKQAHELRPKSE',
10
- '>YAL008W otherstuff' => 'MTLAFNMQRLVFRNLNVGKRMFKNVPLWRFNVANKLGKPLTRSVGLGGAGIVAGGFYLMNRQPSKLIFNDSLGAAVKQQGPLEPTVGNSTAITEERRNKISSHKQMFLGSLFGVVLGVTVAKISILFMYVGITSMLLCEWLRYKGWIRINLKNIKSVIVLKDVDLKKLLIDGLLGTEYMGFKVFFTLSFVLASLNANK',
11
- '>YAL007C otherstuff' => 'MIKSTIALPSFFIVLILALVNSVAASSSYAPVAISLPAFSKECLYYDMVTEDDSLAVGYQVLTGGNFEIDFDITAPDGSVITSEKQKKYSDFLLKSFGVGKYTFCFSNNYGTALKKVEITLEKEKTLTDEHEADVNNDDIIANNAVEEIDRNLNKITKTLNYLRAREWRNMSTVNSTESRLTWLSILIIIIIAVISIAQVLLIQFLFTGRQKNYV',
12
- '>YAL005C otherstuff' => 'MSKAVGIDLGTTYSCVAHFANDRVDIIANDQGNRTTPSFVAFTDTERLIGDAAKNQAAMNPSNTVFDAKRLIGRNFNDPEVQADMKHFPFKLIDVDGKPQIQVEFKGETKNFTPEQISSMVLGKMKETAESYLGAKVNDAVVTVPAYFNDSQRQATKDAGTIAGLNVLRIINEPTAAAIAYGLDKKGKEEHVLIFDLGGGTFDVSLLFIEDGIFEVKATAGDTHLGGEDFDNRLVNHFIQEFKRKNKKDLSTNQRALRRLRTACERAKRTLSSSAQTSVEIDSLFEGIDFYTSITRARFEELCADLFRSTLDPVEKVLRDAKLDKSQVDEIVLVGGSTRIPKVQKLVTDYFNGKEPNRSINPDEAVAYGAAVQAAILTGDESSKTQDLLLLDVAPLSLGIETAGGVMTKLIPRNSTISTKKFEIFSTYADNQPGVLIQVFEGERAKTKDNNLLGKFELSGIPPAPRGVPQIEVTFDVDSNGILNVSAVEKGTGKSNKITITNDKGRLSKEDIEKMVAEAEKFKEEDEKESQRIASKNQLESIAYSLKNTISEAGDKLEQADKDTVTKKAEETISWLDSNTTASKEEFDDKLKELQDIANPIMSKLYQAGGAPGGAAGGAPGGFPGGAPPAPEAEGPTVEEVD',
13
- '>YAL004W otherstuff' => 'MGVTSGGLNFKDTVFNEQQRDIESTTTQVENQDVFFLTLLVQTVSNGSGGRFVNNTQDIQTSNGTSILGSLSLRIVEVSWDSDDSVIDLGSQVRFGSFLHLTQDHGGDLFWGKVLGFTLKFNLNLRLTVNIDQLEWEVLHVSLHFWVVEVSTDQTLSVENGIRRIHSSLILSSITNQSFSVSESDKRWSGSVTLIVGNNVHTIISKVSNTRVCCT',
14
- '>YAL003W otherstuff' => 'MASTDFSKIETLKQLNASLADKSYIEGTAVSQADVTVFKAFQSAYPEFSRWFNHIASKADEFDSFPAASAAAAEEEEDDDVDLFGSDDEEADAEAEKLKAERIAAYNAKKAAKPAKPAAKSIVTLDVKPWDDETNLEEMVANVKAIEMEGLTWGAHQFIPIGFGIKKLQINCVVEDDKVSLDDLQQSIEEDEDHVQSTDIAAMQKL',
15
- '>YAL014C otherstuff' => 'MDVLKLGYELDQLSDLVEERTRLVSVLKLAPTSNDNVTLKRQLGSILELLQKCAPNDELISRYNTILDKIPDTAVDKELYRFQQQVARNTDEVSKESLKKVRFKNDDELTVMYKDDDEQDEESPLPSTHTPYKDEPLQSQLQSQSQPQPPQPMVSNQELFINQQQQLLEQDSHLGALSQSIGRTHDISLDLNNEIVSQNDSLLVDLENLIDNNGRNLNRASRSMHGFNNSRFKDNGNCVIILVLIVVLLLLLLVL',
16
- '>YAL013W otherstuff' => 'MSQQTPQESEQTTAKEQDLDQESVLSNIDFNTDLNHNLNLSEYCISSDAGTEKMDSDEEKSLANLPELKYAPKLSSLVKQETLTESLKRPHEDEKEAIDEAKKMKVPGENEDESKEEEKSQELEEAIDSKEKSTDARDEQGDEGDNEEENNEEDNENENEHTAPPALVMPSPIEMEEQRMTALKEITDIEYKFAQLRQKLYDNQLVRLQTELQMCLEGSHPELQVYYSKIAAIRDYKLHRAYQRQKYELSCINTETIATRTFIHQDFHKKVTDLRARLLNRTTQTWYDINKERRDMDIVIPDVNYHVPIKLDNKTLSCITGYASAAQLCYPGEPVAEDLACESIEYRYRANPVDKLEVIVDRMRLNNEISDLEGLRKYFHSFPGAPELNPLRDSEINDDFHQWAQCDRHTGPHTTSFCYS',
17
- '>YAL002W otherstuff' => 'MEQNGLDHDSRSSIDTTINDTQKTFLEFRSYTQLSEKLASSSSYTAPPLNEDGPKGVASAVSQGSESVVSWTTLTHVYSILGAYGGPTCLYPTATYFLMGTSKGCVLIFNYNEHLQTILVPTLSEDPSIHSIRSPVKSIVICSDGTHVAASYETGNICIWNLNVGYRVKPTSEPTNGMTPTPALPAVLHIDDHVNKEITGLDFFGARHTALIVSDRTGKVSLYNGYRRGFWQLVYNSKKILDVNSSKEKLIRSKLSPLISREKISTNLLSVLTTTHFALILLSPHVSLMFQETVEPSVQNSLVVNSSISWTQNCSRVAYSVNNKISVISISSSDFNVQSASHSPEFAESILSIQWIDQLLLGVLTISHQFLVLHPQHDFKILLRLDFLIHDLMIPPNKYFVISRRSFYLLTNYSFKIGKFVSWSDITLRHILKGDYLGALEFIESLLQPYCPLANLLKLDNNTEERTKQLMEPFYNLSLAALRFLIKKDNADYNRVYQLLMVVVRVLQQSSKKLDSIPSLDVFLEQGLEFFELKDNAVYFEVVANIVAQGSVTSISPVLFRSIIDYYAKEENLKVIEDLIIMLNPTTLDVDLAVKLCQKYNLFDLLIYIWNKIFDDYQTPVVDLIYRISNQSEKCVIFNGPQVPPETTIFDYVTYILTGRQYPQNLSISPSDKCSKIQRELSAFIFSGFSIKWPSNSNHKLYICENPEEEPAFPYFHLLLKSNPSRFLAMLNEVFEASLFNDDNDMVASVGEAELVSRQYVIDLLLDAMKDTGNSDNIRVLVAIFIATSISKYPQFIKVSNQALDCVVNTICSSRVQGIYEISQIALESLLPYYHSRTTENFILELKEKNFNKVLFHIYKSENKYASALSLILETKDIEKEYNTDIVSITDYILKKCPPGSLECGKVTEVIETNFDLLLSRIGIEKCVTIFSDFDYNLHQEILEVKNEETQQKYLDKLFSTPNINNKVDKRLRNLHIELNCKYKSKREMILWLNGTVLSNAESLQILDLLNQDSNFEAAAIIHERLESFNLAVRDLLSFIEQCLNEGKTNISTLLESLRRAFDDCNSAGTEKKSCWILLITFLITLYGKYPSHDERKDLCNKLLQEAFLGLVRSKSSSQKDSGGEFWEIMSSVLEHQDVILMKVQDLKQLLLNVFNTYKLERSLSELIQKIIEDSSQDLVQQYRKFLSEGWSIHTDDCEICGKKIWGAGLDPLLFLAWENVQRHQDMISVDLKTPLVIFKCHHGFHQTCLENLAQKPDEYSCLICQTESNPKIV',
18
- '>YAL012W otherstuff' => 'MTLQESDKFATKAIHAGEHVDVHGSVIEPISLSTTFKQSSPANPIGTYEYSRSQNPNRENLERAVAALENAQYGLAFSSGSATTATILQSLPQGSHAVSIGDVYGGTHRYFTKVANAHGVETSFTNDLLNDLPQLIKENTKLVWIETPTNPTLKVTDIQKVADLIKKHAAGQDVILVVDNTFLSPYISNPLNFGADIVVHSATKYINGHSDVVLGVLATNNKPLYERLQFLQNAIGAIPSPFDAWLTHRGLKTLHLRVRQAALSANKIAEFLAADKENVVAVNYPGLKTHPNYDVVLKQHRDALGGGMISFRIKGGAEAASKFASSTRLFTLAESLGGIESLLEVPAVMTHGGIPKEAREASGVFDDLVRISVGIEDTDDLLEDIKQALKQATN',
19
- }
20
- data.map do |header,aaseq|
21
- FastaObj << Fasta::Prot.new(header, aaseq)
22
- end
23
-
24
- end
25
-
26
-
@@ -1,141 +0,0 @@
1
-
2
-
3
-
4
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
5
-
6
- require 'validator/prot_from_pep'
7
-
8
- klass = Validator::ProtFromPep
9
-
10
- describe klass, "on fake, simple prots and peps" do
11
- before(:each) do
12
- # create some proteins and peptides linked up
13
- prots = ('a'..'g').to_a.inject( { } ) do |hash,let|
14
- prot = OpenStruct.new
15
- prot.peps = []
16
- hash[let.to_sym] = prot
17
- hash
18
- end
19
- # prots: a.peps = 0,1,4
20
- # b.peps = 1
21
- # c.peps = 2
22
- # d.peps = 2,5,6
23
- # e.peps = 2
24
- # f.peps = 3,4
25
- # g.peps = 3,4,8,9,10
26
-
27
- # 0 1 2 3 4 5 6 7
28
- peps = [[:a], [:a,:b], [:c,:d,:e], [:f,:g], [:a,:f,:g], [:c], [:c], [:d],
29
- # 8 9 10
30
- [:g], [:g], [:g]].map do |belongs_to|
31
- pep = OpenStruct.new
32
- pep.prots = belongs_to.map {|v| prots[v].peps << pep ; prots[v]}
33
- pep
34
- end
35
- @peps = peps
36
- @prots = prots
37
-
38
- @normal_frozen = [[0.971428571428572, 0.0586273344048647], [0.95, 0.0838775640874857], [0.907142857142857, 0.116103957269609], [0.878571428571428, 0.133328857783819], [0.814285714285714, 0.147299354691691], [0.735714285714286, 0.186982368192933], [0.65, 0.18812775328873], [0.535714285714286, 0.206630166671598], [0.414285714285714, 0.178909454503803], [0.228571428571429, 0.117254668809732]]
39
- @worstcase_frozen = [0.857142857142857, 0.714285714285714, 0.571428571428571, 0.571428571428571, 0.428571428571429, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.142857142857143, 0.142857142857143]
40
- end
41
-
42
- it 'calculates normal precision edge cases' do
43
- val = klass.new
44
- all_wrong = @peps.size
45
- val.normal_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == [0.0,0.0]
46
- val.normal_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
47
-
48
- val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == [0.0,0.0]
49
- val.normal_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
50
-
51
- all_right = 0
52
- val.normal_prothit_precision( @peps, all_right, :num_its => 10).should == [1.0,0.0]
53
- val.normal_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
54
- end
55
-
56
- it 'calculates normal precision that behaves properly' do
57
- val = klass.new
58
- prev_mean = 1.0
59
- (1...(@peps.size)).to_a.zip( @normal_frozen ) do |num_false, expected|
60
- (mean, stdev) = val.normal_prothit_precision( @peps, num_false, :num_its => 20)
61
- (mean < prev_mean).should be_true
62
- (stdev < 0.4 and stdev > 0.0001).should be_true
63
- mean.should be_close(expected[0], 0.000000001)
64
- stdev.should be_close(expected[1], 0.000000001)
65
- val.normal_prothit_precision( @peps, num_false, :num_its => 1).should be_close(mean, 0.25)
66
- end
67
- end
68
-
69
- it 'calculates worstcase edge cases' do
70
- val = klass.new
71
- all_wrong = @peps.size
72
- val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 10 ).should == 0.0
73
- val.worstcase_prothit_precision( @peps, all_wrong, :num_its => 1).should == 0.0
74
-
75
- val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 10).should == 0.0
76
- val.worstcase_prothit_precision( @peps, all_wrong+10, :num_its => 1).should == 0.0
77
-
78
- all_right = 0
79
- val.worstcase_prothit_precision( @peps, all_right, :num_its => 10).should == 1.0
80
- val.worstcase_prothit_precision( @peps, all_right, :num_its => 1).should == 1.0
81
- end
82
-
83
- it 'calculates worstcase precision that behaves properly' do
84
-
85
- val = klass.new
86
- prev_worst = 1.0
87
- worsts = []
88
- (1...(@peps.size)).to_a.zip( @worstcase_frozen ) do |num_false, expected|
89
- worst = val.worstcase_prothit_precision( @peps, num_false, :num_its => 20)
90
- (worst <= prev_worst).should be_true
91
- worst.should be_close(expected, 0.0000000001)
92
- end
93
-
94
- end
95
-
96
- it 'calculates prothit precision (worstcase + normal)' do
97
- val = klass.new
98
- (1...(@peps.size)).to_a.zip( @normal_frozen, @worstcase_frozen ) do |num_false, normal_expected, worstcase_expected|
99
- (worst, norm_mean, norm_stdev) = val.prothit_precision( @peps, num_false, :num_its_normal => 20, :num_its_worstcase => 10)
100
- worst.should be_close(worstcase_expected, 0.0000000001)
101
- norm_mean.should be_close(normal_expected[0], 0.0000000001)
102
- norm_stdev.should be_close(normal_expected[1], 0.0000000001)
103
- end
104
- end
105
-
106
- it 'gives 1.0 precision for no pephits' do
107
- val = klass.new
108
- val.prothit_precision( [], 0).should == [1.0, 1.0, 0.0]
109
- end
110
-
111
- end
112
-
113
- describe klass, "calculating worstcase prothit precision by numbers" do
114
- it "calculates precision correctly in easy cases" do
115
- peps_per_prot = [4,4,3,2,2]
116
- # no prots completely wrong
117
- precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 1)
118
- precision.should == 1
119
-
120
- # only one protein partially correct
121
- precision = klass.new.worstcase_prothit_precision_by_numbers(peps_per_prot, 14)
122
- precision.should == 0.2
123
- end
124
-
125
- it 'works correctly on other cases' do
126
- # 0 1 2 3 4 5 6 7 8
127
- expected = [1.0, 5.0/6, 5.0/6, 4.0/6, 4.0/6, 3.0/6, 3.0/6, 3.0/6, 2.0/6,
128
- # 9 10 11 12 13 14 15 16 17
129
- 2.0/6, 2.0/6, 2.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 0.0]
130
- num_peps_per_prot = [5,4,3,2,2,1].sort_by { rand }
131
- total_peps = num_peps_per_prot.inject(0) {|memo,obj| obj + memo }
132
- val = klass.new
133
- (0..total_peps).to_a.zip(expected) do |num_wrong, exp|
134
- val.worstcase_prothit_precision_by_numbers(num_peps_per_prot, num_wrong).should == exp
135
- end
136
- end
137
-
138
-
139
- end
140
-
141
-
@@ -1,146 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
- require File.expand_path( File.dirname(__FILE__) + '/../validator_helper' )
3
-
4
- require 'validator/transmem'
5
- require 'spec_id/digestor'
6
- require File.dirname(__FILE__) + '/fasta_helper'
7
- require 'spec_id'
8
-
9
- klass = Validator::Transmem::Protein
10
-
11
- describe klass, "on small mock set" do
12
- before(:each) do
13
- @toppred_file = Tfiles + '/toppred.small.out'
14
- @peps = (0..7).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
15
- # certain: 3 0 0 0 2 3 2 1
16
- references = %w(YAL002W YAL001C YAL003W YAL004W YAL007C YAL008W YAL009W YAL010C NOTEXISTING1 NOTEXISTING2)
17
- # index: 0 1 2 3 4 5 6 7
18
- @prots = references.map do |ref|
19
- v = SpecID::GenericProt.new
20
- v.reference = ref
21
- v
22
- end
23
-
24
- # TM (? = both)
25
- # @prots[8] doesn't have a key in the guy (nil)
26
- # SHOULD NOT change the results
27
- @peps[0].prots = [@prots[0], @prots[5], @prots[8]] # y
28
- @peps[1].prots = [@prots[1], @prots[5], @prots[8]] # ?
29
- @peps[2].prots = [@prots[3], @prots[4], @prots[8]] # ?
30
- @peps[3].prots = [@prots[2], @prots[8]] # n
31
- @peps[4].prots = [@prots[5], @prots[8]] # y
32
- @peps[5].prots = [@prots[4], @prots[8]] # y
33
- @peps[6].prots = [@prots[8]] # nil pep
34
- @peps[7].prots = [@prots[8], @prots[9]] # nil pep
35
-
36
- @validator = klass.new(@toppred_file)
37
- @validator.false_to_total_ratio = 1.0
38
- end
39
-
40
- it_should_behave_like 'a validator'
41
-
42
- it 'gives correct precision with false ratio (across all option combinations)' do
43
- answ = [[2,4], [0,6], [0,6], [-2,8]].map {|v| calc_precision(*v) }
44
- [true, false].each do |correct_wins|
45
- [true, false].each do |soluble_fraction|
46
- val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => soluble_fraction, :correct_wins => correct_wins)
47
- val.false_to_total_ratio = 0.5
48
- val.pephit_precision(@peps).should == answ.shift
49
- #p val.pephit_precision(@peps)
50
- end
51
- end
52
- end
53
-
54
- it 'calculates a correct false to total ratio' do
55
- val = klass.new(@toppred_file)
56
- fasta_obj = FastaHelper::FastaObj
57
- sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
58
- sequest_params_obj.opts['first_database_name'] = 'not_real'
59
- val.set_false_to_total_ratio( Digestor.digest(fasta_obj, sequest_params_obj) )
60
- ratio = val.false_to_total_ratio
61
- num_tps_soluble_peps = 777
62
- num_fps_insoluble_peps = 741
63
- expected_ratio = num_tps_soluble_peps.to_f / (num_tps_soluble_peps + num_fps_insoluble_peps)
64
- ratio.should == expected_ratio
65
- end
66
-
67
- it 'can grant transmem status to proteins for speed' do
68
- val = klass.new(@toppred_file)
69
- fasta_obj = FastaHelper::FastaObj
70
- sequest_params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
71
- hash = val.create_transmem_status_hash( Digestor.digest(fasta_obj.prots, sequest_params_obj))
72
- fasta_obj.prots.each do |prot|
73
- hash.key?(prot).should be_true
74
- end
75
- frozen = [true, true, false, true, false, false, true, false, true, false, true, true, true]
76
- fasta_obj.prots.map {|prot| hash[prot] }.should == frozen
77
- end
78
-
79
- def calc_precision(norm, trans)
80
- prec = norm.to_f / (norm + trans)
81
- end
82
-
83
- it 'can calculate precision incrementally' do
84
- val = klass.new(@toppred_file, :min_num_tms => 2, :false_to_total_ratio => 1.0)
85
- # usually we'd update the false_to_total_ratio, but not bothering for test
86
- # here we HAVE to set the status hash before hand... (we could redo this
87
- # section)
88
- val.transmem_status_hash = val.create_transmem_status_hash(@peps)
89
-
90
- # manually done:
91
- precisions = [0.0, 1.0/2, 2.0/3, 3.0/4, 3.0/5, 3.0/6, 3.0/6, 3.0/6]
92
-
93
- #frozen:
94
- calc_bkgs = [1.0, 0.5, 0.333333333333333, 0.25, 0.4, 0.5, 0.5, 0.5]
95
- #frozen:
96
- false_to_total_ratios = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
97
-
98
- @peps.zip(precisions, calc_bkgs, false_to_total_ratios) do |pep, exp_prec, calc_bkg, false_to_total_ratio|
99
- val.increment_pephits_precision(pep).should == exp_prec
100
- val.calculated_background.should be_close(calc_bkg, 0.00000000000001)
101
- val.false_to_total_ratio.should == false_to_total_ratio
102
- end
103
- end
104
-
105
- it 'creates correct reference hash' do
106
- val = klass.new(@toppred_file, :min_num_tms => 3, :soluble_fraction => true, :correct_wins => true)
107
- val.transmem_by_ti_key.should == {"YAL001C"=>false, "YAL011W"=>false, "YAL009W"=>false, "YAL010C"=>false, "YAL008W"=>true, "YAL007C"=>false, "YAL004W"=>false, "YAL005C"=>false, "YAL003W"=>false, "YAL002W"=>true, "YAL013W"=>false, "YAL014C"=>false, "YAL012W"=>false}
108
- end
109
-
110
-
111
- end
112
-
113
-
114
- #################################################
115
- # REFERENCE for small mock set:
116
- #################################################
117
- # for mintm >= 3 (T = TP, F = FP, sf = soluble_fraction)
118
- # sf=false sf=true
119
- # TM cw fw cw fw
120
- # 0 y T T F F
121
- # 1 ? T F T F
122
- # 2 n F F T T
123
- # 3 n F F T T
124
- # 4 y T T F F
125
- # 5 n F F T T
126
- #
127
- # [tps, fps]
128
- # cw=true( sf=true [4,2], sf=false [3,3] )
129
- # cw=false( sf=true [3,3], sf=false [2,4] )
130
-
131
- # for mintm >= 2 (T = TP, F = FP, sf = soluble_fraction)
132
- # sf=false sf=true
133
- # TM cw fw cw fw
134
- # 0 y T T F F
135
- # 1 ? T F T F
136
- # 2 ? T F T F
137
- # 3 n F F T T
138
- # 4 y T T F F
139
- # 5 y T T F F
140
- #
141
- # [tps, fps]
142
- # cw=true( sf=true [3,3], sf=false [5,1] )
143
- # cw=false( sf=true [1,5], sf=false [3,3] )
144
- #
145
- # sf=true( cw=true [3,3], cw=false[1,5] )
146
- # sf=false( cw=true [5,1], cw=false[3,3] )
@@ -1,58 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
- require File.expand_path(File.dirname(__FILE__) + '/../validator_helper')
3
-
4
- require 'validator/true_pos'
5
- require 'fasta'
6
- require 'spec_id'
7
-
8
- klass = Validator::TruePos
9
- describe klass, 'reporting precision on peptides' do
10
-
11
- before(:each) do
12
- @myfasta_string =<<END
13
- >gi|1245235|ProteinX
14
- ABCDEFGHIJKLMNOP
15
- >gi|987654|ProteinY
16
- AAAAAABBBBBBBBBBBB
17
- >gi|1111111|ProteinZ
18
- FFFFFFFFFGGGGGGZZZZ
19
- END
20
-
21
- @peps = (0..5).to_a.map {|n| v = SpecID::GenericPep.new; v.aaseq = n.to_s ; v }
22
- prots = %w(gi|1245235|ProteinX gi|987654|ProteinY gi|1111111|ProteinZ someOthergi AnotherGi YetAnotherReference).map do |ref|
23
- v = SpecID::GenericProt.new
24
- v.reference = ref
25
- v
26
- end
27
- @peps[0].prots = [prots[0], prots[5]] # TP (only in tp wins)
28
- @peps[1].prots = [prots[1], prots[2]] # TP always
29
- @peps[2].prots = [prots[3], prots[4]] # FP
30
- @peps[3].prots = [prots[2]] # TP
31
- @peps[4].prots = [prots[5]] # FP
32
- @peps[5].prots = [prots[4]] # FP
33
- @myfasta_obj = Fasta.new.load(StringIO.new(@myfasta_string))
34
-
35
- @validator = klass.new(@myfasta_obj)
36
- end
37
-
38
- it_should_behave_like 'a validator'
39
-
40
- it 'gives correct precision (across all options)' do
41
- answ_ar = [
42
- [[@peps[0], @peps[1], @peps[3]], [@peps[2], @peps[4], @peps[5]]],
43
- [[@peps[1], @peps[3]], [@peps[0], @peps[2], @peps[4], @peps[5]]]
44
- ]
45
-
46
- [true, false].each do |correct_wins|
47
- val = klass.new(@myfasta_obj, correct_wins)
48
- answ = val.pephit_precision(@peps)
49
- exp = ValidatorHelper.precision_from_partition_array(answ_ar.shift)
50
- answ.should == exp
51
- end
52
-
53
- end
54
-
55
- end
56
-
57
-
58
-
@@ -1,33 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- class ValidatorHelper
4
- def self.precision_from_partition_array(ar)
5
- (num_tp, num_fp) = ar.map {|v| v.size}
6
- num_tp.to_f / (num_tp + num_fp)
7
- end
8
- end
9
-
10
- module ValidatorHelper::Decoy
11
- def self.precision_from_partition_array(ar)
12
- (num_maybe_true, num_decoy) = ar.map {|v| v.size}
13
- num_tp = num_maybe_true - num_decoy
14
- num_fp = num_maybe_true - num_tp
15
- num_tp.to_f / (num_tp + num_fp)
16
- end
17
- end
18
-
19
- describe 'a validator', :shared => true do
20
- before(:each) do
21
- @empty_peps = []
22
- end
23
- it 'gives 1.0 for zero peptides (w/ pephit_precision)' do
24
- @validator.pephit_precision(@empty_peps).should == 1.0
25
-
26
- end
27
- it 'gives 1.0 for zero peptides (w/ increment_pephits_precision)' do
28
- @validator.increment_pephits_precision(@empty_peps).should == 1.0
29
- end
30
-
31
- end
32
-
33
-
data/specs/xml_spec.rb DELETED
@@ -1,12 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'xml'
4
-
5
- describe XML, 'converting duration to seconds' do
6
- it 'converts hours/mins/seconds in combinations' do
7
- answ = [0.234, 624, 7392.2]
8
- %w(PT0.234S PT10M24S PT2H3M12.2S).zip(answ) do |string, answ|
9
- XML.duration_to_seconds(string).should == answ
10
- end
11
- end
12
- end