mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
@@ -0,0 +1,202 @@
1
+ ---
2
+ pephits_precision:
3
+ - validator: decoy
4
+ value: 0.992932862190813
5
+ - validator: badAA
6
+ value: 0.178006237270664
7
+ - validator: badAA
8
+ value: -0.0247654296463377
9
+ - validator: badAA
10
+ value: 0.301413862599215
11
+ - validator: bias
12
+ value: 0.19471183136347
13
+ - validator: bias
14
+ value: -4.79308077902868
15
+ - validator: bias
16
+ value: 0.402836536134372
17
+ - validator: tmm
18
+ value: 0.437921755087444
19
+ - validator: tmm
20
+ value: -0.267185328932326
21
+ - validator: tmm
22
+ value: -0.155831020815818
23
+ - validator: tmm
24
+ value: -0.0201924414730414
25
+ - validator: tps
26
+ value: 0.226148409893993
27
+ pephits: 283
28
+ prothits_precision:
29
+ - validator: decoy
30
+ values:
31
+ :normal_stdev: 0.00659628206397061
32
+ :normal: 0.99622641509434
33
+ :worst: 0.971698113207547
34
+ - validator: badAA
35
+ values:
36
+ :normal_stdev: 0.0305695315962635
37
+ :normal: 0.334905660377358
38
+ :worst: 0.0188679245283019
39
+ - validator: badAA
40
+ values:
41
+ :normal_stdev: 0.0
42
+ :normal: 0.0
43
+ :worst: 0.0
44
+ - validator: badAA
45
+ values:
46
+ :normal_stdev: 0.0263288499231859
47
+ :normal: 0.497169811320755
48
+ :worst: 0.0377358490566038
49
+ - validator: bias
50
+ values:
51
+ :normal_stdev: 0.0329963984643387
52
+ :normal: 0.355660377358491
53
+ :worst: 0.0188679245283019
54
+ - validator: bias
55
+ values:
56
+ :normal_stdev: 0.0
57
+ :normal: 0.0
58
+ :worst: 0.0
59
+ - validator: bias
60
+ values:
61
+ :normal_stdev: 0.0266832747617573
62
+ :normal: 0.613207547169811
63
+ :worst: 0.0943396226415094
64
+ - validator: tmm
65
+ values:
66
+ :normal_stdev: 0.0281442148871296
67
+ :normal: 0.65377358490566
68
+ :worst: 0.122641509433962
69
+ - validator: tmm
70
+ values:
71
+ :normal_stdev: 0.0
72
+ :normal: 0.0
73
+ :worst: 0.0
74
+ - validator: tmm
75
+ values:
76
+ :normal_stdev: 0.0
77
+ :normal: 0.0
78
+ :worst: 0.0
79
+ - validator: tmm
80
+ values:
81
+ :normal_stdev: 0.0
82
+ :normal: 0.0
83
+ :worst: 0.0
84
+ - validator: tps
85
+ values:
86
+ :normal_stdev: 0.0317750140353944
87
+ :normal: 0.402830188679245
88
+ :worst: 0.0283018867924528
89
+ params:
90
+ validators:
91
+ - :type: decoy
92
+ :class: Validator::Decoy
93
+ :constraint: /^DECOY_/
94
+ :decoy_on_match: true
95
+ :correct_wins: true
96
+ - :calculated_background: 0.127208480565371
97
+ :type: badAA
98
+ :class: Validator::AA
99
+ :background: 0.001
100
+ :frequency: 0.0147528119278054
101
+ :false_to_total_ratio: 1.0
102
+ - :calculated_background: 0.402826855123675
103
+ :type: badAA
104
+ :class: Validator::AA
105
+ :background: 0.0
106
+ :frequency: 0.0463510332199843
107
+ :false_to_total_ratio: 1.0
108
+ - :calculated_background: 0.127208480565371
109
+ :type: badAA
110
+ :class: Validator::AA
111
+ :background: 0.001
112
+ :frequency:
113
+ :false_to_total_ratio: 0.180662732637313
114
+ - :calculated_background: 0.773851590106007
115
+ :type: bias
116
+ :class: Validator::Bias
117
+ :proteins_expected: true
118
+ :correct_wins: true
119
+ :background: 0.0
120
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
121
+ :false_to_total_ratio: 0.960962324103495
122
+ - :calculated_background: 0.226148409893993
123
+ :type: bias
124
+ :class: Validator::Bias
125
+ :proteins_expected: false
126
+ :correct_wins: true
127
+ :background: 0.0
128
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
129
+ :false_to_total_ratio: 0.0390376758965048
130
+ - :calculated_background: 0.773851590106007
131
+ :type: bias
132
+ :class: Validator::Bias
133
+ :proteins_expected: true
134
+ :correct_wins: true
135
+ :background: 0.2
136
+ :file: /work/john/mspire/specs/../test_files/validator_hits_separate/bias_bioworks_small_HS.fasta
137
+ :false_to_total_ratio: 0.960962324103495
138
+ - :calculated_background: 0.359430604982206
139
+ :type: tmm
140
+ :soluble_fraction: true
141
+ :class: Validator::Transmem::Protein
142
+ :no_include_tm_peps: 0.8
143
+ :correct_wins: true
144
+ :background: 0.2
145
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.phobius
146
+ :false_to_total_ratio: 0.283644859813084
147
+ :min_num_tms: 1
148
+ - :calculated_background: 0.359430604982206
149
+ :type: tmm
150
+ :soluble_fraction: true
151
+ :class: Validator::Transmem::Protein
152
+ :no_include_tm_peps: 0.8
153
+ :correct_wins: true
154
+ :background: 0.0
155
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.phobius
156
+ :false_to_total_ratio: 0.283644859813084
157
+ :min_num_tms: 1
158
+ - :calculated_background: 0.293286219081272
159
+ :type: tmm
160
+ :soluble_fraction: true
161
+ :class: Validator::Transmem::Protein
162
+ :no_include_tm_peps: false
163
+ :correct_wins: true
164
+ :background: 0.0
165
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.toppred.out
166
+ :false_to_total_ratio: 0.253744893327281
167
+ :min_num_tms: 3
168
+ - :calculated_background: 0.472924187725632
169
+ :type: tmm
170
+ :soluble_fraction: true
171
+ :class: Validator::Transmem::Protein
172
+ :no_include_tm_peps: 0.8
173
+ :correct_wins: true
174
+ :background: 0.0
175
+ :transmem_file: /work/john/mspire/specs/../test_files/bioworks_small.toppred.out
176
+ :false_to_total_ratio: 0.463563704748472
177
+ :min_num_tms: 1
178
+ - :type: tps
179
+ :class: Validator::TruePos
180
+ ties: true
181
+ digestion:
182
+ - /work/john/mspire/specs/../test_files/bioworks_small.fasta
183
+ - /work/john/mspire/specs/../test_files/bioworks_small.params
184
+ prefilter: false
185
+ output:
186
+ - text_table => /work/john/mspire/specs/../test_files/table_output.tmp
187
+ - yaml => /work/john/mspire/specs/../test_files/filter_and_validate.tmp
188
+ sequest:
189
+ include_deltacnstar: true
190
+ xcorr1: 0.0
191
+ ppm: 1000000.0
192
+ xcorr2: 0.0
193
+ deltacn: 0.01
194
+ xcorr3: 0.0
195
+ top_hit_by: xcorr
196
+ decoy_on_match: true
197
+ postfilter: top_per_scan
198
+ include_ties_in_top_hit_postfilter: false
199
+ hits_together: true
200
+ proteins: true
201
+ include_ties_in_top_hit_prefilter: true
202
+ prothits: 106
@@ -0,0 +1,124 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ require 'spec_id/precision/filter'
4
+
5
+ describe 'filter_and_validate.rb on small bioworks file' do
6
+ before(:all) do
7
+ @progname = 'filter_and_validate.rb'
8
+
9
+ @outfile = Tfiles + '/filter_and_validate.tmp'
10
+
11
+ # direct call with an array
12
+ @direct_call = Proc.new {|ar| SpecID::Precision::Filter.new.filter_and_validate_cmdline(ar) }
13
+ # direct call with a string
14
+ @direct_call_st = Proc.new {|st| @direct_call.call(st.split(/\s+/)) }
15
+ @st_to_yaml = Proc.new do |st|
16
+ to_call = st + " -o yaml:#{@outfile} "
17
+ @direct_call.call(to_call.split(/\s+/))
18
+ YAML.load_file(@outfile)
19
+ end
20
+
21
+ @args = ["-1 0.6 -2 0.8 -3 0.9 -d 0.2", (Tfiles + '/bioworks_small.xml ')].join(' ')
22
+ @interactive_file = Tfiles + '/interactive.tmp'
23
+ File.open(@interactive_file,'w') do |fh|
24
+ string = ["0.6 0.8 0.9 0.2 5000", "dcns:f", "0.6 0.8 dcns:t", "pf:s", "pf:ac", "pf:a"].join("\n")
25
+ fh.puts string
26
+ end
27
+ # uses DECOY_ prefix on two
28
+ @fake_bioworks_file = Tfiles + '/validator_hits_separate/bioworks_small_HS.xml'
29
+ @small_bias_fasta_file = Tfiles + '/validator_hits_separate/bias_bioworks_small_HS.fasta'
30
+ @small_fasta_file = Tfiles + '/bioworks_small.fasta'
31
+ @params_file = Tfiles + '/bioworks_small.params'
32
+ @toppred_file = Tfiles + '/bioworks_small.toppred.out'
33
+ @phobius_file = Tfiles + '/bioworks_small.phobius'
34
+ @table_output_file = Tfiles + '/table_output.tmp'
35
+ end
36
+
37
+ after(:all) do
38
+ [@outfile, @interactive_file, @table_output_file].each do |file|
39
+ File.unlink(file) if File.exist?(file)
40
+ end
41
+ end
42
+
43
+ # this ensures that the actual commandline version gives usage.
44
+ it_should_behave_like "a cmdline program"
45
+
46
+ it 'filters a file and outputs to table or yaml' do
47
+ @direct_call_st.call( @args + " -o text_table:#{@outfile}")
48
+ IO.read(@outfile).should =~ /66/
49
+ struct = @st_to_yaml.call( @args )
50
+ struct['pephits'].should == 66
51
+ end
52
+
53
+ it 'responds to --no_deltacnstar' do
54
+ reply_without = @st_to_yaml.call( @args + " --no_deltacnstar" )
55
+ reply_without['pephits'].should == 34
56
+ end
57
+
58
+ it 'works with interactive input (includes dcnstar and postfilter)' do
59
+ @direct_call_st.call( "-o text_table:#{@outfile} -i #{@interactive_file} " + Tfiles + '/bioworks_small.xml ' )
60
+ reply = IO.read(@outfile)
61
+
62
+ exp = %w(73 40 73 73 33 33)
63
+ reply.scan(/^peps\s+(\d+)/) do |v|
64
+ Regexp.last_match[1] == exp.shift
65
+ end
66
+ end
67
+
68
+ it 'responds to ppm filter' do
69
+ reply_without = @st_to_yaml.call( @args + " -p 280" )
70
+ reply_without['pephits'].should == 11
71
+ end
72
+
73
+ it 'responds to --hits_separate' do
74
+ # this file has two decoy peps that score better than the real peps at
75
+ # those scans
76
+ ht_file = Tfiles + '/test_together.tmp.yaml'
77
+ hs_file = Tfiles + '/test_separate.tmp.yaml'
78
+ outputs = [ht_file, hs_file].zip(['', ' --hits_separate']).map do |output_file, flag|
79
+ run_normal = @cmd + " --bias #{@small_bias_fasta_file} --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} #{@fake_bioworks_file} -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 -o yaml:#{output_file} #{flag}"
80
+ `#{run_normal}`
81
+ end
82
+ structs = [ht_file, hs_file].map do |file|
83
+ file.should exist
84
+ struct = YAML.load_file(file)
85
+ File.unlink file
86
+ struct
87
+ end
88
+
89
+ comparisons = %w(precision calc_bkg hits_together_param)
90
+ comps = structs.map do |st|
91
+ # note that calculated_background may need to be a string if we get our
92
+ # act together...
93
+ [ st['pephits_precision'][0]['value'], st['params']['validators'][0][:calculated_background], st['params']['hits_together'] ]
94
+ end
95
+ comparisons.zip( *comps ) do |tp, ht, hs|
96
+ ht.should_not == hs
97
+ end
98
+
99
+ end
100
+
101
+ it 'raises error on > 1 decoy validator' do
102
+ lambda { @st_to_yaml.call( @args + " --decoy /hello/ --decoy path/to/file" ) }.should raise_error(ArgumentError)
103
+ end
104
+
105
+ it 'handles multiple validators of the same kind (except, of course, decoy)' do
106
+
107
+ struct = @st_to_yaml.call( "#{@fake_bioworks_file} --proteins -1 0.0 -2 0.0 -3 0.0 -d 0.01 -p 1000000 --decoy /^DECOY_/ --digestion #{@small_fasta_file},#{@params_file} --bad_aa C,true,0.001 --bad_aa E,true --bad_aa C,false,0.001 --bias #{@small_bias_fasta_file},true --bias #{@small_bias_fasta_file},false --bias #{@small_bias_fasta_file},true,0.2 --tmm #{@phobius_file},1,true,0.8,0.2 --tmm #{@phobius_file} --tmm #{@toppred_file},3,true,false --tmm #{@toppred_file} --tps #{@small_bias_fasta_file} -o text_table:#{@table_output_file} " )
108
+ frozen = YAML.load_file( File.dirname(__FILE__) + "/filter_and_validate__multiple_vals_helper.yaml" )
109
+ struct.should == frozen
110
+
111
+ text_table = IO.read(@table_output_file)
112
+
113
+ # frozen
114
+ headings_re = Regexp.new( %w(num decoy badAA badAA badAA bias bias bias tmm tmm tmm tmm tps).join("\\s+") )
115
+ data_re = Regexp.new( %w(peps 283 0.993 0.178 -0.025 0.301 0.195 -4.793 0.403 0.438 -0.267 -0.156 -0.020 0.226).join("\\s+") )
116
+ prot_re = Regexp.new( %w(106 0.972 0.019 0.0 0.038 0.019 0.0 0.094 0.123 0.0 0.0 0.0 0.028).join("\\s+") )
117
+ text_table.should =~ headings_re
118
+ text_table.should =~ data_re
119
+ text_table.should =~ prot_re
120
+ end
121
+
122
+ end
123
+
124
+
@@ -0,0 +1,34 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+
4
+ describe 'ms_to_lmat.rb' do
5
+
6
+ before(:all) do
7
+ @progname = 'ms_to_lmat.rb'
8
+ @mzxml = Tfiles + "/opd1/twenty_scans.mzXML"
9
+ @ans_lmata = Tfiles + "/opd1/twenty_scans_answ.lmata"
10
+ @ans_lmat = Tfiles + "/opd1/twenty_scans_answ.lmat"
11
+ end
12
+
13
+ it_should_behave_like "a cmdline program"
14
+
15
+ it 'creates the correct lmata (ascii) file' do
16
+ cmd = "#{@cmd} #{@mzxml} --ascii"
17
+ `#{cmd}`
18
+ newfile = @mzxml.sub(".mzXML", ".lmata")
19
+ newfile.should exist
20
+ IO.read(newfile).should == IO.read(@ans_lmata)
21
+ File.unlink(newfile)
22
+ end
23
+
24
+
25
+ it 'creates the correct lmat (binary) file' do
26
+ cmd = "#{@cmd} #{@mzxml}"
27
+ `#{cmd}`
28
+ newfile = @mzxml.sub(".mzXML", ".lmat")
29
+ newfile.should exist
30
+ IO.read(newfile).should == IO.read(@ans_lmat)
31
+ File.unlink(newfile)
32
+ end
33
+ end
34
+
@@ -0,0 +1,62 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ require 'spec_id/precision/prob'
4
+
5
+ describe 'filter_and_validate.rb on small bioworks file' do
6
+ before(:all) do
7
+ @progname = 'prob_validate.rb'
8
+
9
+ @outfile = Tfiles + '/prob_and_validate.tmp'
10
+
11
+ # direct call with an array
12
+ @direct_call = Proc.new {|ar| SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ar) }
13
+ # direct call with a string
14
+ @direct_call_st = Proc.new {|st| @direct_call.call(st.split(/\s+/)) }
15
+ @st_to_yaml = Proc.new do |st|
16
+ to_call = st + " -o yaml:#{@outfile} "
17
+ @direct_call.call(to_call.split(/\s+/))
18
+ YAML.load_file(@outfile)
19
+ end
20
+
21
+ file = Tfiles + '/opd1/000_020_3prots-prot.mod_initprob.xml'
22
+ @args = [file].join(' ')
23
+ # uses DECOY_ prefix on two
24
+ @fake_bioworks_file = Tfiles + '/validator_hits_separate/bioworks_small_HS.xml'
25
+ @small_bias_fasta_file = Tfiles + '/validator_hits_separate/bias_bioworks_small_HS.fasta'
26
+ @small_fasta_file = Tfiles + '/bioworks_small.fasta'
27
+ @params_file = Tfiles + '/bioworks_small.params'
28
+ @toppred_file = Tfiles + '/bioworks_small.toppred.out'
29
+ @phobius_file = Tfiles + '/bioworks_small.phobius'
30
+ end
31
+
32
+ after(:all) do
33
+ [@outfile].each do |file|
34
+ File.unlink(file) if File.exist?(file)
35
+ end
36
+ end
37
+
38
+ # this ensures that the actual commandline version gives usage.
39
+ it_should_behave_like "a cmdline program"
40
+
41
+ it 'outputs to yaml' do
42
+ reply = @st_to_yaml.call( @args )
43
+ keys = [:probabilities, :params, :pephits_precision, :charges, :aaseqs, :count].map {|v| v.to_s }.sort
44
+ reply.keys.map {|v| v.to_s}.sort.should == keys
45
+ end
46
+
47
+ it 'responds to --prob init' do
48
+ normal = @st_to_yaml.call( @args + " --prob" )
49
+ normal[:pephits_precision].first[:values].should == [1.0, 1.0, 0.996655518394649, 0.918918918918919]
50
+ #normal_nsp = @st_to_yaml.call( @args + " --prob nsp" )
51
+ #normal.should == normal_nsp
52
+ init = @st_to_yaml.call( @args + " --prob init" )
53
+ init.should_not == normal
54
+ init[:pephits_precision].first[:values].should == [1.0, 0.974358974358974, 0.981324278438031, 0.890429958391123]
55
+ with_sort_by = @st_to_yaml.call( @args + " --prob nsp --sort_by_init" )
56
+ # frozen
57
+ with_sort_by[:pephits_precision].first[:values].should == [1.0, 0.994974874371859, 0.996655518394649, 0.918918918918919]
58
+ end
59
+
60
+ end
61
+
62
+
@@ -0,0 +1,10 @@
1
+ require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
+
3
+ xdescribe 'protein_summary.rb' do
4
+
5
+ before(:all) do
6
+ @progname = 'protein_summary.rb'
7
+ end
8
+ it_should_behave_like 'a cmdline program'
9
+
10
+ end