mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,202 @@
1
+ ---
2
+ pephits_precision:
3
+ - validator: decoy
4
+ value: 0.992932862190813
5
+ - validator: badAA
6
+ value: 0.178006237270664
7
+ - validator: badAA
8
+ value: -0.0247654296463377
9
+ - validator: badAA
10
+ value: 0.301413862599215
11
+ - validator: bias
12
+ value: 0.19471183136347
13
+ - validator: bias
14
+ value: -4.79308077902868
15
+ - validator: bias
16
+ value: 0.402836536134372
17
+ - validator: tmm
18
+ value: 0.437921755087444
19
+ - validator: tmm
20
+ value: -0.267185328932326
21
+ - validator: tmm
22
+ value: -0.155831020815818
23
+ - validator: tmm
24
+ value: -0.0201924414730414
25
+ - validator: tps
26
+ value: 0.226148409893993
27
+ pephits: 283
28
+ prothits_precision:
29
+ - validator: decoy
30
+ values:
31
+ :normal_stdev: 0.00659628206397061
32
+ :normal: 0.99622641509434
33
+ :worst: 0.971698113207547
34
+ - validator: badAA
35
+ values:
36
+ :normal_stdev: 0.0305695315962635
37
+ :normal: 0.334905660377358
38
+ :worst: 0.0188679245283019
39
+ - validator: badAA
40
+ values:
41
+ :normal_stdev: 0.0
42
+ :normal: 0.0
43
+ :worst: 0.0
44
+ - validator: badAA
45
+ values:
46
+ :normal_stdev: 0.0263288499231859
47
+ :normal: 0.497169811320755
48
+ :worst: 0.0377358490566038
49
+ - validator: bias
50
+ values:
51
+ :normal_stdev: 0.0329963984643387
52
+ :normal: 0.355660377358491
53
+ :worst: 0.0188679245283019
54
+ - validator: bias
55
+ values:
56
+ :normal_stdev: 0.0
57
+ :normal: 0.0
58
+ :worst: 0.0
59
+ - validator: bias
60
+ values:
61
+ :normal_stdev: 0.0266832747617573
62
+ :normal: 0.613207547169811
63
+ :worst: 0.0943396226415094
64
+ - validator: tmm
65
+ values:
66
+ :normal_stdev: 0.0281442148871296
67
+ :normal: 0.65377358490566
68
+ :worst: 0.122641509433962
69
+ - validator: tmm
70
+ values:
71
+ :normal_stdev: 0.0
72
+ :normal: 0.0
73
+ :worst: 0.0
74
+ - validator: tmm
75
+ values:
76
+ :normal_stdev: 0.0
77
+ :normal: 0.0
78
+ :worst: 0.0
79
+ - validator: tmm
80
+ values:
81
+ :normal_stdev: 0.0
82
+ :normal: 0.0
83
+ :worst: 0.0
84
+ - validator: tps
85
+ values:
86
+ :normal_stdev: 0.0317750140353944
87
+ :normal: 0.402830188679245
88
+ :worst: 0.0283018867924528
89
+ params:
90
+ validators:
91
+ - :type: decoy
92
+ :class: Validator::Decoy
93
+ :constraint: /^DECOY_/
94
+ :decoy_on_match: true
95
+ :correct_wins: true
96
+ - :calculated_background: 0.127208480565371
97
+ :type: badAA
98
+ :class: Validator::AA
99
+ :background: 0.001
100
+ :frequency: 0.0147528119278054
101
+ :false_to_total_ratio: 1.0
102
+ - :calculated_background: 0.402826855123675
103
+ :type: badAA
104
+ :class: Validator::AA
105
+ :background: 0.0
106
+ :frequency: 0.0463510332199843
107
+ :false_to_total_ratio: 1.0
108
+ - :calculated_background: 0.127208480565371
109
+ :type: badAA
110
+ :class: Validator::AA
111
+ :background: 0.001
112
+ :frequency:
113
+ :false_to_total_ratio: 0.180662732637313
114
+ - :calculated_background: 0.773851590106007
115
+ :type: bias
116
+ :class: Validator::Bias
117
+ :proteins_expected: true
118
+ :correct_wins: true
119
+ :background: 0.0
120
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
121
+ :false_to_total_ratio: 0.960962324103495
122
+ - :calculated_background: 0.226148409893993
123
+ :type: bias
124
+ :class: Validator::Bias
125
+ :proteins_expected: false
126
+ :correct_wins: true
127
+ :background: 0.0
128
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
129
+ :false_to_total_ratio: 0.0390376758965048
130
+ - :calculated_background: 0.773851590106007
131
+ :type: bias
132
+ :class: Validator::Bias
133
+ :proteins_expected: true
134
+ :correct_wins: true
135
+ :background: 0.2
136
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
137
+ :false_to_total_ratio: 0.960962324103495
138
+ - :calculated_background: 0.359430604982206
139
+ :type: tmm
140
+ :soluble_fraction: true
141
+ :class: Validator::Transmem::Protein
142
+ :no_include_tm_peps: 0.8
143
+ :correct_wins: true
144
+ :background: 0.2
145
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.phobius
146
+ :false_to_total_ratio: 0.283644859813084
147
+ :min_num_tms: 1
148
+ - :calculated_background: 0.359430604982206
149
+ :type: tmm
150
+ :soluble_fraction: true
151
+ :class: Validator::Transmem::Protein
152
+ :no_include_tm_peps: 0.8
153
+ :correct_wins: true
154
+ :background: 0.0
155
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.phobius
156
+ :false_to_total_ratio: 0.283644859813084
157
+ :min_num_tms: 1
158
+ - :calculated_background: 0.293286219081272
159
+ :type: tmm
160
+ :soluble_fraction: true
161
+ :class: Validator::Transmem::Protein
162
+ :no_include_tm_peps: false
163
+ :correct_wins: true
164
+ :background: 0.0
165
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.toppred.out
166
+ :false_to_total_ratio: 0.253744893327281
167
+ :min_num_tms: 3
168
+ - :calculated_background: 0.472924187725632
169
+ :type: tmm
170
+ :soluble_fraction: true
171
+ :class: Validator::Transmem::Protein
172
+ :no_include_tm_peps: 0.8
173
+ :correct_wins: true
174
+ :background: 0.0
175
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.toppred.out
176
+ :false_to_total_ratio: 0.463563704748472
177
+ :min_num_tms: 1
178
+ - :type: tps
179
+ :class: Validator::TruePos
180
+ ties: true
181
+ digestion:
182
+ - /work/john/mspire/specs/../test_files/bioworks_small.fasta
183
+ - /work/john/mspire/specs/../test_files/bioworks_small.params
184
+ prefilter: false
185
+ output:
186
+ - text_table => /work/john/mspire/specs/../test_files/table_output.tmp
187
+ - yaml => /work/john/mspire/specs/../test_files/filter_and_validate.tmp
188
+ sequest:
189
+ include_deltacnstar: true
190
+ xcorr1: 0.0
191
+ ppm: 1000000.0
192
+ xcorr2: 0.0
193
+ deltacn: 0.01
194
+ xcorr3: 0.0
195
+ top_hit_by: xcorr
196
+ decoy_on_match: true
197
+ postfilter: top_per_scan
198
+ include_ties_in_top_hit_postfilter: false
199
+ hits_together: true
200
+ proteins: true
201
+ include_ties_in_top_hit_prefilter: true
202
+ prothits: 106
@@ -0,0 +1,124 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ require 'spec_id/precision/filter'
4
+
5
+ describe 'filter_and_validate.rb on small bioworks file' do
6
+ before(:all) do
7
+ @progname = 'filter_and_validate.rb'
8
+
9
+ @outfile = Tfiles + '/filter_and_validate.tmp'
10
+
11
+ # direct call with an array
12
+ @direct_call = Proc.new {|ar| SpecID::Precision::Filter.new.filter_and_validate_cmdline(ar) }
13
+ # direct call with a string
14
+ @direct_call_st = Proc.new {|st| @direct_call.call(st.split(/\s+/)) }
15
+ @st_to_yaml = Proc.new do |st|
16
+ to_call = st + " -o yaml:#{@outfile} "
17
+ @direct_call.call(to_call.split(/\s+/))
18
+ YAML.load_file(@outfile)
19
+ end
20
+
21
+ @args = ["-1 0.6 -2 0.8 -3 0.9 -d 0.2", (Tfiles + '/bioworks_small.xml ')].join(' ')
22
+ @interactive_file = Tfiles + '/interactive.tmp'
23
+ File.open(@interactive_file,'w') do |fh|
24
+ string = ["0.6 0.8 0.9 0.2 5000", "dcns:f", "0.6 0.8 dcns:t", "pf:s", "pf:ac", "pf:a"].join("\n")
25
+ fh.puts string
26
+ end
27
+ # uses DECOY_ prefix on two
28
+ @fake_bioworks_file = Tfiles + '/validator_hits_separate/bioworks_small_HS.xml'
29
+ @small_bias_fasta_file = Tfiles + '/validator_hits_separate/bias_bioworks_small_HS.fasta'
30
+ @small_fasta_file = Tfiles + '/bioworks_small.fasta'
31
+ @params_file = Tfiles + '/bioworks_small.params'
32
+ @toppred_file = Tfiles + '/bioworks_small.toppred.out'
33
+ @phobius_file = Tfiles + '/bioworks_small.phobius'
34
+ @table_output_file = Tfiles + '/table_output.tmp'
35
+ end
36
+
37
+ after(:all) do
38
+ [@outfile, @interactive_file, @table_output_file].each do |file|
39
+ File.unlink(file) if File.exist?(file)
40
+ end
41
+ end
42
+
43
+ # this ensures that the actual commandline version gives usage.
44
+ it_should_behave_like "a cmdline program"
45
+
46
+ it 'filters a file and outputs to table or yaml' do
47
+ @direct_call_st.call( @args + " -o text_table:#{@outfile}")
48
+ IO.read(@outfile).should =~ /66/
49
+ struct = @st_to_yaml.call( @args )
50
+ struct['pephits'].should == 66
51
+ end
52
+
53
+ it 'responds to --no_deltacnstar' do
54
+ reply_without = @st_to_yaml.call( @args + " --no_deltacnstar" )
55
+ reply_without['pephits'].should == 34
56
+ end
57
+
58
+ it 'works with interactive input (includes dcnstar and postfilter)' do
59
+ @direct_call_st.call( "-o text_table:#{@outfile} -i #{@interactive_file} " + Tfiles + '/bioworks_small.xml ' )
60
+ reply = IO.read(@outfile)
61
+
62
+ exp = %w(73 40 73 73 33 33)
63
+ reply.scan(/^peps\s+(\d+)/) do |v|
64
+ Regexp.last_match[1] == exp.shift
65
+ end
66
+ end
67
+
68
+ it 'responds to ppm filter' do
69
+ reply_without = @st_to_yaml.call( @args + " -p 280" )
70
+ reply_without['pephits'].should == 11
71
+ end
72
+
73
+ it 'responds to --hits_separate' do
74
+ # this file has two decoy peps that score better than the real peps at
75
+ # those scans
76
+ ht_file = Tfiles + '/test_together.tmp.yaml'
77
+ hs_file = Tfiles + '/test_separate.tmp.yaml'
78
+ outputs = [ht_file, hs_file].zip(['', ' --hits_separate']).map do |output_file, flag|
79
+ run_normal = @cmd + " --bias #{@small_bias_fasta_file} --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} #{@fake_bioworks_file} -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 -o yaml:#{output_file} #{flag}"
80
+ `#{run_normal}`
81
+ end
82
+ structs = [ht_file, hs_file].map do |file|
83
+ file.should exist
84
+ struct = YAML.load_file(file)
85
+ File.unlink file
86
+ struct
87
+ end
88
+
89
+ comparisons = %w(precision calc_bkg hits_together_param)
90
+ comps = structs.map do |st|
91
+ # note that calculated_background may need to be a string if we get our
92
+ # act together...
93
+ [ st['pephits_precision'][0]['value'], st['params']['validators'][0][:calculated_background], st['params']['hits_together'] ]
94
+ end
95
+ comparisons.zip( *comps ) do |tp, ht, hs|
96
+ ht.should_not == hs
97
+ end
98
+
99
+ end
100
+
101
+ it 'raises error on > 1 decoy validator' do
102
+ lambda { @st_to_yaml.call( @args + " --decoy /hello/ --decoy path/to/file" ) }.should raise_error(ArgumentError)
103
+ end
104
+
105
+ it 'handles multiple validators of the same kind (except, of course, decoy)' do
106
+
107
+ struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa C,true,0.001 --bad_aa E,true --bad_aa C,false,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
108
+ frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
109
+ struct.should == frozen
110
+
111
+ text_table = IO.read(@table_output_file)
112
+
113
+ # frozen
114
+ headings_re = Regexp.new( %w(num decoy badAA badAA badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
115
+ data_re = Regexp.new( %w(peps 283 0.993 0.178 -0.025 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
116
+ prot_re = Regexp.new( %w(106 0.972 0.019 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
117
+ text_table.should =~ headings_re
118
+ text_table.should =~ data_re
119
+ text_table.should =~ prot_re
120
+ end
121
+
122
+ end
123
+
124
+
@@ -0,0 +1,34 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+
4
+ describe 'ms_to_lmat.rb' do
5
+
6
+ before(:all) do
7
+ @progname = 'ms_to_lmat.rb'
8
+ @mzxml = Tfiles + "/opd1/twenty_scans.mzXML"
9
+ @ans_lmata = Tfiles + "/opd1/twenty_scans_answ.lmata"
10
+ @ans_lmat = Tfiles + "/opd1/twenty_scans_answ.lmat"
11
+ end
12
+
13
+ it_should_behave_like "a cmdline program"
14
+
15
+ it 'creates the correct lmata (ascii) file' do
16
+ cmd = "#{@cmd} #{@mzxml} --ascii"
17
+ `#{cmd}`
18
+ newfile = @mzxml.sub(".mzXML", ".lmata")
19
+ newfile.should exist
20
+ IO.read(newfile).should == IO.read(@ans_lmata)
21
+ File.unlink(newfile)
22
+ end
23
+
24
+
25
+ it 'creates the correct lmat (binary) file' do
26
+ cmd = "#{@cmd} #{@mzxml}"
27
+ `#{cmd}`
28
+ newfile = @mzxml.sub(".mzXML", ".lmat")
29
+ newfile.should exist
30
+ IO.read(newfile).should == IO.read(@ans_lmat)
31
+ File.unlink(newfile)
32
+ end
33
+ end
34
+
@@ -0,0 +1,62 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ require 'spec_id/precision/prob'
4
+
5
+ describe 'filter_and_validate.rb on small bioworks file' do
6
+ before(:all) do
7
+ @progname = 'prob_validate.rb'
8
+
9
+ @outfile = Tfiles + '/prob_and_validate.tmp'
10
+
11
+ # direct call with an array
12
+ @direct_call = Proc.new {|ar| SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ar) }
13
+ # direct call with a string
14
+ @direct_call_st = Proc.new {|st| @direct_call.call(st.split(/\s+/)) }
15
+ @st_to_yaml = Proc.new do |st|
16
+ to_call = st + " -o yaml:#{@outfile} "
17
+ @direct_call.call(to_call.split(/\s+/))
18
+ YAML.load_file(@outfile)
19
+ end
20
+
21
+ file = Tfiles + '/opd1/000_020_3prots-prot.mod_initprob.xml'
22
+ @args = [file].join(' ')
23
+ # uses DECOY_ prefix on two
24
+ @fake_bioworks_file = Tfiles + '/validator_hits_separate/bioworks_small_HS.xml'
25
+ @small_bias_fasta_file = Tfiles + '/validator_hits_separate/bias_bioworks_small_HS.fasta'
26
+ @small_fasta_file = Tfiles + '/bioworks_small.fasta'
27
+ @params_file = Tfiles + '/bioworks_small.params'
28
+ @toppred_file = Tfiles + '/bioworks_small.toppred.out'
29
+ @phobius_file = Tfiles + '/bioworks_small.phobius'
30
+ end
31
+
32
+ after(:all) do
33
+ [@outfile].each do |file|
34
+ File.unlink(file) if File.exist?(file)
35
+ end
36
+ end
37
+
38
+ # this ensures that the actual commandline version gives usage.
39
+ it_should_behave_like "a cmdline program"
40
+
41
+ it 'outputs to yaml' do
42
+ reply = @st_to_yaml.call( @args )
43
+ keys = [:probabilities, :params, :pephits_precision, :charges, :aaseqs, :count].map {|v| v.to_s }.sort
44
+ reply.keys.map {|v| v.to_s}.sort.should == keys
45
+ end
46
+
47
+ it 'responds to --prob init' do
48
+ normal = @st_to_yaml.call( @args + " --prob" )
49
+ normal[:pephits_precision].first[:values].should == [1.0, 1.0, 0.996655518394649, 0.918918918918919]
50
+ #normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
51
+ #normal.should == normal_nsp
52
+ init = @st_to_yaml.call( @args + " --prob init" )
53
+ init.should_not == normal
54
+ init[:pephits_precision].first[:values].should == [1.0, 0.974358974358974, 0.981324278438031, 0.890429958391123]
55
+ with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
56
+ # frozen
57
+ with_sort_by[:pephits_precision].first[:values].should == [1.0, 0.994974874371859, 0.996655518394649, 0.918918918918919]
58
+ end
59
+
60
+ end
61
+
62
+
@@ -0,0 +1,10 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ xdescribe 'protein_summary.rb' do
4
+
5
+ before(:all) do
6
+ @progname = 'protein_summary.rb'
7
+ end
8
+ it_should_behave_like 'a cmdline program'
9
+
10
+ end