mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/lib/validator.rb ADDED
@@ -0,0 +1,214 @@
1
+
2
+ class Validator
3
+
4
+ Validator_to_string = {
5
+ 'Validator::AA' => 'badAA',
6
+ 'Validator::Decoy' => 'decoy',
7
+ 'Validator::Transmem::Protein' => 'tmm',
8
+ 'Validator::TruePos' => 'tps',
9
+ 'Validator::Bias' => 'bias',
10
+ 'Validator::Probability' => 'prob',
11
+ :bad_aa => 'badAA',
12
+ :decoy => 'decoy',
13
+ :tmm => 'tmm',
14
+ :tps => 'tps',
15
+ :bias => 'bias',
16
+ :prob => 'prob',
17
+ }
18
+
19
+ def initialize_increment
20
+ @increment_tps = 0
21
+ @increment_fps = 0
22
+ @increment_total_submitted = 0
23
+ @increment_initialized = true
24
+ end
25
+
26
+ # if adding pephits in groups at a time, the entire group does not need to be
27
+ # queried, just the individual hit. Use this OR pephits_precision (NOT
28
+ # both). The initial query to this method will begin a running tally that
29
+ # is saved by the validator.
30
+ # takes either an array or a single pephit (determined by if it is a
31
+ # SpecID::Pep)
32
+ def increment_pephits_precision(peps)
33
+ tmp = $VERBOSE; $VERBOSE = nil
34
+ initialize_increment unless @increment_initialized
35
+ $VERBOSE = tmp
36
+
37
+ to_submit =
38
+ if peps.is_a? SpecID::Pep
39
+ [peps]
40
+ else
41
+ peps
42
+ end
43
+ @increment_total_submitted += to_submit.size
44
+ (tps, fps) = partition(to_submit)
45
+ @increment_tps += tps.size
46
+ @increment_fps += fps.size
47
+ (num_tps, num_fps) =
48
+ if self.respond_to?(:calc_precision_prep) # for digestion based validators
49
+ (num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
50
+ [num_tps, num_fps]
51
+ else
52
+ [@increment_tps, @increment_fps]
53
+ end
54
+ calc_precision(num_tps, num_fps)
55
+ end
56
+
57
+
58
+ # returns an adjusted false positive rate (a float not to drop below 0.0)
59
+ # based on a background of 'false'-false positive hits to total hits. Also
60
+ # sets the @calculated_background attribute. Accepts floats or ints
61
+ def adjust_fps_for_background(num_tps, num_fps, background)
62
+ num_fps = num_fps.to_f
63
+ total_peps = num_tps + num_fps
64
+ @calculated_background = num_fps / total_peps
65
+ num_fps -= (total_peps.to_f * background)
66
+ num_fps = 0.0 if num_fps < 0.0
67
+ num_fps
68
+ end
69
+
70
+ # copied from libjtp: vec
71
+ # returns the mean and std_dev
72
+ def sample_stats(array)
73
+ _len = array.size
74
+ _sum = 0.0
75
+ _sum_sq = 0.0
76
+ array.each do |val|
77
+ _sum += val
78
+ _sum_sq += val * val
79
+ end
80
+ std_dev = _sum_sq - ((_sum * _sum)/_len)
81
+ std_dev /= ( (_len > 1) ? (_len-1) : 1 )
82
+ # on occasion, a very small negative number occurs
83
+ if std_dev < 0.0
84
+ std_dev = 0.0
85
+ else
86
+ std_dev = Math.sqrt(std_dev)
87
+ end
88
+ mean = _sum.to_f/_len
89
+ [mean, std_dev]
90
+ end
91
+
92
+ # takes an array of validators and returns a fresh array where each has been
93
+ # turned into a sensible hash (with symbols as the keys!)
94
+ def self.sensible_validator_hashes(validators)
95
+ validators.map do |val|
96
+ hash = {}
97
+ case val
98
+ when Validator::TruePos
99
+ hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
100
+ when Validator::AA
101
+ %w(frequency false_to_total_ratio background calculated_background false_to_total_ratio).each do |cat|
102
+ hash[cat.to_sym] = val.send(cat.to_sym)
103
+ end
104
+ when Validator::Decoy
105
+ %w(correct_wins decoy_on_match).each do |cat|
106
+ hash[cat.to_sym] = val.send(cat.to_sym)
107
+ end
108
+ hash[:constraint] = val.constraint.inspect if val.constraint
109
+ when Validator::Bias
110
+ %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
111
+ hash[cat.to_sym] = val.send(cat.to_sym)
112
+ end
113
+ hash[:file] = val.fasta.filename
114
+ when Validator::Transmem::Protein
115
+ %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
116
+ hash[cat.to_sym] = val.send(cat.to_sym)
117
+ end
118
+ when Validator::Probability
119
+ %w(prob_method).each do |cat|
120
+ hash[cat.to_sym] = val.send(cat.to_sym)
121
+ end
122
+ else ; raise ArgumentError, "Don't know the validator class #{val}"
123
+ end
124
+ klass_as_s = val.class.to_s
125
+ hash[:type] = Validator_to_string[klass_as_s]
126
+ hash[:class] = klass_as_s
127
+ hash
128
+ end
129
+ end
130
+
131
+ =begin
132
+ ## THIS IS WITH STRINGS AS KEYS!
133
+ # takes an array of validators and returns a fresh array where each has been
134
+ # turned into a sensible hash (with symbols as the keys!)
135
+ def self.sensible_validator_hashes(validators)
136
+ validators.map do |val|
137
+ hash = {}
138
+ case val
139
+ when Validator::TruePos
140
+ hash.merge( {'correct_wins' => val.correct_wins, 'file' => val.fasta.filename } )
141
+ when Validator::AA
142
+ %w(frequency false_to_total_ratio background calculated_background false_to_total_ratio).each do |cat|
143
+ hash[cat] = val.send(cat.to_sym)
144
+ end
145
+ when Validator::Decoy
146
+ %w(correct_wins decoy_on_match).each do |cat|
147
+ hash[cat] = val.send(cat.to_sym)
148
+ end
149
+ hash['constraint'] = val.constraint.inspect if val.constraint
150
+ when Validator::Bias
151
+ %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
152
+ hash[cat] = val.send(cat.to_sym)
153
+ end
154
+ hash['file'] = val.fasta.filename
155
+ when Validator::Transmem::Protein
156
+ %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
157
+ hash[cat] = val.send(cat.to_sym)
158
+ end
159
+ when Validator::Probability
160
+ else ; raise ArgumentError, "Don't know the validator class #{val}"
161
+ end
162
+ klass_as_s = val.class.to_s
163
+ hash['type'] = Validator_to_string[klass_as_s]
164
+ hash['class'] = klass_as_s
165
+ hash
166
+ end
167
+ end
168
+ =end
169
+
170
+ end
171
+
172
+ module Precision::Calculator
173
+ # calculates precision by the assumption that the first group are all true
174
+ # hits and the second are all false hits
175
+ # (0,0) is returned as 1.0
176
+ def calc_precision(num_true_hits, num_false_hits)
177
+ if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
178
+ 1.0
179
+ else
180
+ num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
181
+ end
182
+ end
183
+ end
184
+
185
+ # will calculate precision for groups of proteins where the first group are
186
+ # normal hits (which may be true or false) and the second are decoy hits.
187
+ # edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
188
+ module Precision::Calculator::Decoy
189
+ def calc_precision(num_normal, num_decoy)
190
+ # will calculate as floats in case fractional amounts passed in for
191
+ # whatever reason
192
+ num_normal_f = num_normal.to_f
193
+ num_true_pos = num_normal.to_f - num_decoy
194
+ precision =
195
+ if num_normal_f == 0.0
196
+ if num_decoy.to_f > 0.0
197
+ 0.0
198
+ else
199
+ 1.0
200
+ end
201
+ else
202
+ num_true_pos/num_normal_f
203
+ end
204
+ end
205
+ end
206
+
207
+ require 'validator/true_pos'
208
+ require 'validator/aa'
209
+ require 'validator/bias'
210
+ require 'validator/decoy'
211
+ require 'validator/transmem'
212
+ require 'validator/probability'
213
+ require 'validator/prot_from_pep'
214
+
data/lib/xml.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ module XML
3
+ HourMinuteMatch = /[MH]/o
4
+ # returns a float object of seconds
5
+ # doesn't support year month, etc, yet
6
+ def self.duration_to_seconds(string)
7
+ case x = string[0,2]
8
+ when 'PT'
9
+ rest = string[2..-1]
10
+ # usually it will be this 'PT1.223434S':
11
+ if rest !~ HourMinuteMatch
12
+ rest[0...-1].to_f
13
+ else
14
+ addit = ''
15
+ total_secs = 0
16
+ total_secs_as_float = nil
17
+ rest.split('').each do |let|
18
+ case let
19
+ when 'H'
20
+ total_secs += addit.to_i * 3600
21
+ addit = ''
22
+ when 'M'
23
+ total_secs += addit.to_i * 60
24
+ addit = ''
25
+ when 'S'
26
+ total_secs_as_float = total_secs.to_f
27
+ total_secs_as_float += addit.to_f
28
+ else
29
+ addit << let
30
+ end
31
+ end
32
+ total_secs_as_float
33
+ end
34
+ else
35
+ abort 'need to include support for other durations'
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,105 @@
1
+
2
+ module XMLStyleParser
3
+ @done_once = nil
4
+
5
+ Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
6
+ # currently AXML requires 'xmlparser' to be installed.... (may not always be
7
+ # the case...)
8
+ File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
9
+
10
+ # the method that the parser will call on the given file at parse!
11
+ attr_accessor :method
12
+
13
+ # parses the given file by sending to @method
14
+ def parse(file, opts={})
15
+ if respond_to? @method
16
+ send(@method, file, opts)
17
+ else
18
+ raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
19
+ end
20
+ end
21
+
22
+ # XMLParser and xml/libxml are incompatible, so if xmlparser is available,
23
+ # libxml will not be loaded (XMLParser#parse is clobbered by
24
+ # XML::Parser#parse [don't ask me why])
25
+ def self.require_parsers
26
+ if !@done_once
27
+ have_xmlparser = false
28
+ begin
29
+ require 'xmlparser'
30
+ puts "Loaded XMLParser" if $VERBOSE
31
+ have_xmlparser = true
32
+ rescue LoadError
33
+ end
34
+
35
+ begin
36
+ require 'axml'
37
+ puts "Loaded AXML" if $VERBOSE
38
+ rescue LoadError
39
+ end
40
+
41
+ begin
42
+ unless have_xmlparser
43
+ require 'xml/libxml'
44
+ puts "Loaded xml/libxml" if $VERBOSE
45
+ ################################################################
46
+ # IMPORTANT!
47
+ # This magic line makes the parser behave like it ought to!!
48
+ XML::Parser.default_keep_blanks = false
49
+ ################################################################
50
+ end
51
+ rescue LoadError
52
+ end
53
+ end
54
+ @done_once = true
55
+ end
56
+
57
+ # returns an array of strings depending on File_required (in the order of
58
+ # Parser_precedence)
59
+ def self.available_xml_parsers
60
+ require_parsers
61
+ parser_precedence = Parser_precedence.dup
62
+ File_required.map do |k,v|
63
+ unless $".any? {|req_file| req_file.match(v) }
64
+ parser_precedence.delete(k)
65
+ end
66
+ end
67
+ parser_precedence
68
+ end
69
+
70
+ ## appends downcase to each parser type here and tries to require it
71
+ # returns all those that were required without a load error
72
+ def self.require_parse_files(base_dir)
73
+ XMLStyleParser.available_xml_parsers.select do |v|
74
+ to_require = base_dir + '/' + v.downcase
75
+ begin
76
+ require to_require
77
+ true
78
+ rescue LoadError
79
+ false
80
+ end
81
+ end
82
+ end
83
+
84
+ # seeks a subclass that has the public_method @method
85
+ def self.choose_parser(const, method)
86
+ ## First update @@parser_precedence to ensure we should get these guys
87
+ parser_precedence = available_xml_parsers
88
+
89
+ available_constants = parser_precedence.select do |v|
90
+ const.const_defined?(v)
91
+ end
92
+ available_subclasses = available_constants.map do |v|
93
+ const.const_get(v)
94
+ end
95
+ available = available_subclasses.select do |subclass|
96
+ subclass.public_method_defined? method
97
+ end
98
+ if available.size > 0
99
+ available.first
100
+ else
101
+ raise NoMethodError, "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
102
+ end
103
+ end
104
+
105
+ end
@@ -0,0 +1,19 @@
1
+
2
+
3
+ module XMLParserWrapper
4
+ def parse_and_report(file, const, report_method=:report)
5
+ parse_and_report_string(IO.read(file), const, report_method)
6
+ end
7
+
8
+ def parse_and_report_string(string, const, report_method=:report)
9
+ parser = self.class.const_get(const).new
10
+ parser.parse(string)
11
+ parser.send(report_method)
12
+ end
13
+
14
+ def parse_and_report_io(io, const, report_method=:report)
15
+ parser = self.class.const_get(const).new
16
+ parser.parse(io)
17
+ parser.send(report_method)
18
+ end
19
+ end
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'roc'
5
+ require 'optparse'
6
+ require 'generator'
7
+
8
+ $decoy = false
9
+ $base = "precision_vs_numhits"
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
13
+ op.separator ""
14
+ op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
15
+ op.separator ""
16
+ op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
17
+ op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
18
+ end
19
+
20
+ opts.parse!
21
+
22
+ if ARGV.size <= 0
23
+ puts opts
24
+ exit
25
+ end
26
+
27
+ files = ARGV.to_a
28
+
29
+ xys = files.map do |file|
30
+ triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
31
+ line.chomp.split("\t")
32
+ end
33
+
34
+ # check that they're all OK:
35
+ triplets.each do |trip|
36
+ if trip.size != 3 ; abort "bad triplet" end
37
+ end
38
+
39
+ # figure out the ordering (and correct if necessary):
40
+ higher_better = triplets[0][0].to_f > triplets.last[0].to_f
41
+
42
+ doublets = triplets.map do |trip|
43
+ value = trip[0].to_f
44
+ value *= -1 if higher_better
45
+ [value, ((trip[2] == 'T') ? true : false)]
46
+ end
47
+
48
+ roc = ROC.new
49
+
50
+ (tps, fps) = roc.doublets_to_separate(doublets)
51
+
52
+ (x, y) =
53
+ if $decoy
54
+ (numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
55
+ [numhits, precision]
56
+ else
57
+ (numhits, precision) = roc.numhits_and_ppv(doublets)
58
+ [numhits, precision]
59
+ end
60
+ [x,y]
61
+
62
+ end
63
+
64
+
65
+ ## PLOT TO to_plot
66
+ File.open( $base + ".to_plot", 'w') do |fh|
67
+ fh.puts "XYData"
68
+ fh.puts $base
69
+ fh.puts "precision vs. num hits"
70
+ fh.puts "num hits"
71
+ fh.puts "precision"
72
+ files.zip(xys) do |file,xy|
73
+ (x,y) = xy
74
+ x.unshift(0)
75
+ y.unshift(1)
76
+ fh.puts file.sub(/\.[^\.]$/,'')
77
+ fh.puts x.join(" ")
78
+ fh.puts y.join(" ")
79
+ end
80
+ end
81
+
82
+ File.open( $base + ".csv", 'w') do |fh|
83
+ columns = []
84
+ files.zip(xys) do |file,xy|
85
+ f = file.sub(/\.[^\.]$/,'')
86
+ (x,y) = xy
87
+ x.unshift("#Hits: #{f}")
88
+ y.unshift("Precision: #{f}")
89
+ columns << x << y
90
+ end
91
+ SyncEnumerator.new(*columns).each do |row|
92
+ fh.puts row.join("\t")
93
+ end
94
+ end
95
+
96
+
97
+
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'table'
5
+
6
+ require 'spec/gradient_program'
7
+
8
+ delimiter = "\t"
9
+ table_format = false
10
+ opts = OptionParser.new do |op|
11
+ op.banner = "#{File.basename(__FILE__)} [OPTIONS] <file>.meth"
12
+ op.on("-d", "--delimiter <tab|space|format>", "delimiter (tab default)", "format = space delimited, formatted ascii table") do |v|
13
+ if v == 'space'
14
+ delimiter = " "
15
+ elsif v == 'tab'
16
+ delimiter = "\t"
17
+ elsif v == 'format'
18
+ table_format = true
19
+ else
20
+ abort "don't recognize #{v}"
21
+ end
22
+ end
23
+ end
24
+
25
+ opts.parse!
26
+
27
+ if ARGV.size == 0
28
+ puts opts
29
+ exit
30
+ end
31
+
32
+
33
+ sets_of_tables = {}
34
+ ARGV.each do |file|
35
+ File.open(file) do |fh|
36
+ sets_of_tables[file] = GradientProgram.all_from_handle(fh)
37
+ end
38
+ end
39
+
40
+ sets_of_tables.each do |file, tables|
41
+ puts "FILE: #{file}"
42
+ tables.each do |gp|
43
+ puts "PUMP_TYPE: #{gp.pump_type}"
44
+ col_labels = ["time(min)", "%A", "%B", "%C", "%D", "ul/min"]
45
+ data = gp.time_points.map do |tp|
46
+ line = [tp.time, *(tp.percentages)]
47
+ line << tp.flow_rate
48
+ end
49
+ table = Table.new(data, nil, col_labels)
50
+ if table_format
51
+ puts table.to_formatted_string
52
+ else
53
+ puts table.to_s(delimiter)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rexml/document'
4
+
5
+ if ARGV.size == 0
6
+ puts "usage: #{File.basename(__FILE__)} <file>-prot.xml ..."
7
+ puts "outputs a .csv file"
8
+ exit
9
+ end
10
+
11
+ class Protein
12
+ attr_accessor :name, :pi, :ni
13
+ def initialize(name, pi, ni)
14
+ @name, @pi, @ni = name, pi, ni
15
+ end
16
+ end
17
+
18
+ class Listener
19
+ attr_accessor :proteins
20
+
21
+ def initialize
22
+ @proteins = []
23
+ end
24
+
25
+ def tag_start(name, attrs)
26
+ if name == "protein"
27
+ protein = Protein.new( attrs['protein_name'], attrs['probability'].to_f, attrs['total_number_peptides'].to_i)
28
+ @proteins.push( protein )
29
+ end
30
+ end
31
+
32
+ def method_missing(*args) ; end
33
+
34
+ end
35
+
36
+ ARGV.each do |file|
37
+ File.open("output.csv", 'w') do |out|
38
+ listener = Listener.new
39
+ REXML::Document.parse_stream(File.new(file), listener)
40
+ listener.proteins.sort_by {|prot| [prot.pi, prot.ni, prot.name] }.reverse.each do |protein|
41
+ out.puts [protein.name, protein.pi, protein.ni].join("\t")
42
+ end
43
+ end
44
+ end
@@ -18,7 +18,7 @@ end
18
18
  ARGV.each do |file|
19
19
  puts "READING: " + file
20
20
  outfile = file + '.timeIndex'
21
- obj = Spec::MSRunIndex.new(file)
21
+ obj = MS::MSRunIndex.new(file)
22
22
  puts "WRITING: " + outfile
23
23
  obj.to_index_file(outfile)
24
24
  end
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'spec_id'
4
+ require 'fasta'
5
+ require 'optparse'
6
+
7
+ $top = false
8
+ opts = OptionParser.new do |op|
9
+ op.banner = "usage: #{File.basename(__FILE__)} bioworks.xml <file>.fasta|prefix"
10
+ op.separator "outputs stdout (tab del sorted by probability) probability, file:aaseq:charge T/F"
11
+ op.separator "hashes on file+aaseq+charge"
12
+ op.on("-t", "--top", "only top peptide (by prob) per scan+charge") do
13
+ $top = true
14
+ end
15
+ end
16
+
17
+ opts.parse!
18
+
19
+ if ARGV.size < 2
20
+ puts opts.to_s
21
+ exit
22
+ end
23
+
24
+ specid_file = ARGV.shift
25
+ file_or_prefix = ARGV.shift
26
+
27
+ specid = SpecID.new(specid_file)
28
+
29
+ indicator =
30
+ if File.exist? file_or_prefix
31
+ Fasta.new.read_file(file_or_prefix)
32
+ else
33
+ file_or_prefix
34
+ end
35
+
36
+
37
+ # returns an array containing the min prob peptides (in case of a tie)
38
+ def lowest_peps(ar)
39
+ min_prob = ar.min {|a,b| a.probability.to_f <=> b.probability.to_f }.probability.to_f
40
+ ar.select {|v| v.probability.to_f == min_prob }
41
+ end
42
+
43
+ peps = specid.peps
44
+ if $top
45
+ top_by_scan = []
46
+ peps.hash_by(:base_name, :first_scan).each do |k,v|
47
+ low_peps = lowest_peps(v)
48
+ top_by_scan.push( *low_peps )
49
+ end
50
+ end
51
+
52
+ results = top_by_scan.hash_by(:base_name, :aaseq, :charge).map do |k,v|
53
+ low_peps = lowest_peps(v)
54
+ #min_pep = v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
55
+ all_prots = []
56
+ low_peps.each do |pep|
57
+ all_prot_references.push( *(pep.prots.map {|v| v.reference }) )
58
+ end
59
+ all_prot_references.uniq!
60
+ is_true =
61
+ if indicator.is_a? Fasta
62
+ all_prot_references.any? do |ref|
63
+ indicator.included_in_header?(ref)
64
+ end
65
+ else
66
+ !(all_prot_references.all? {|ref| ref.include?( indicator )})
67
+ end
68
+ [min_pep.probability.to_f, k, is_true]
69
+ end
70
+
71
+ results.sort.each do |result|
72
+ report = [result[0], result[1].join(':'), (result[2] ? 'T' : 'F')]
73
+ puts report.join("\t")
74
+ end
75
+
76
+ =begin
77
+ # ORIGINAL CODE
78
+ peps = specid.peps
79
+ if $top
80
+ peps = peps.hash_by(:base_name, :first_scan).map do |k,v|
81
+ v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
82
+ end
83
+ end
84
+
85
+ results = peps.hash_by(:base_name, :aaseq, :charge).map do |k,v|
86
+ min_pep = v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
87
+ references = min_pep.prots.map {|v| v.reference }.uniq
88
+ is_true =
89
+ if indicator.is_a? Fasta
90
+ references.any? do |ref|
91
+ indicator.included_in_header?(ref)
92
+ end
93
+ else
94
+ !(references.all? {|ref| ref.include?( indicator )})
95
+ end
96
+ [min_pep.probability.to_f, k, is_true]
97
+ end
98
+
99
+ results.sort.each do |result|
100
+ report = [result[0], result[1].join(':'), (result[2] ? 'T' : 'F')]
101
+ puts report.join("\t")
102
+ end
103
+ =end