mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/lib/validator.rb ADDED
@@ -0,0 +1,214 @@
1
+
2
+ class Validator
3
+
4
+ Validator_to_string = {
5
+ 'Validator::AA' => 'badAA',
6
+ 'Validator::Decoy' => 'decoy',
7
+ 'Validator::Transmem::Protein' => 'tmm',
8
+ 'Validator::TruePos' => 'tps',
9
+ 'Validator::Bias' => 'bias',
10
+ 'Validator::Probability' => 'prob',
11
+ :bad_aa => 'badAA',
12
+ :decoy => 'decoy',
13
+ :tmm => 'tmm',
14
+ :tps => 'tps',
15
+ :bias => 'bias',
16
+ :prob => 'prob',
17
+ }
18
+
19
+ def initialize_increment
20
+ @increment_tps = 0
21
+ @increment_fps = 0
22
+ @increment_total_submitted = 0
23
+ @increment_initialized = true
24
+ end
25
+
26
+ # if adding pephits in groups at a time, the entire group does not need to be
27
+ # queried, just the individual hit. Use this OR pephits_precision (NOT
28
+ # both). The initial query to this method will begin a running tally that
29
+ # is saved by the validator.
30
+ # takes either an array or a single pephit (determined by if it is a
31
+ # SpecID::Pep)
32
+ def increment_pephits_precision(peps)
33
+ tmp = $VERBOSE; $VERBOSE = nil
34
+ initialize_increment unless @increment_initialized
35
+ $VERBOSE = tmp
36
+
37
+ to_submit =
38
+ if peps.is_a? SpecID::Pep
39
+ [peps]
40
+ else
41
+ peps
42
+ end
43
+ @increment_total_submitted += to_submit.size
44
+ (tps, fps) = partition(to_submit)
45
+ @increment_tps += tps.size
46
+ @increment_fps += fps.size
47
+ (num_tps, num_fps) =
48
+ if self.respond_to?(:calc_precision_prep) # for digestion based validators
49
+ (num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
50
+ [num_tps, num_fps]
51
+ else
52
+ [@increment_tps, @increment_fps]
53
+ end
54
+ calc_precision(num_tps, num_fps)
55
+ end
56
+
57
+
58
+ # returns an adjusted false positive rate (a float not to drop below 0.0)
59
+ # based on a background of 'false'-false positive hits to total hits. Also
60
+ # sets the @calculated_background attribute. Accepts floats or ints
61
+ def adjust_fps_for_background(num_tps, num_fps, background)
62
+ num_fps = num_fps.to_f
63
+ total_peps = num_tps + num_fps
64
+ @calculated_background = num_fps / total_peps
65
+ num_fps -= (total_peps.to_f * background)
66
+ num_fps = 0.0 if num_fps < 0.0
67
+ num_fps
68
+ end
69
+
70
+ # copied from libjtp: vec
71
+ # returns the mean and std_dev
72
+ def sample_stats(array)
73
+ _len = array.size
74
+ _sum = 0.0
75
+ _sum_sq = 0.0
76
+ array.each do |val|
77
+ _sum += val
78
+ _sum_sq += val * val
79
+ end
80
+ std_dev = _sum_sq - ((_sum * _sum)/_len)
81
+ std_dev /= ( (_len > 1) ? (_len-1) : 1 )
82
+ # on occasion, a very small negative number occurs
83
+ if std_dev < 0.0
84
+ std_dev = 0.0
85
+ else
86
+ std_dev = Math.sqrt(std_dev)
87
+ end
88
+ mean = _sum.to_f/_len
89
+ [mean, std_dev]
90
+ end
91
+
92
+ # takes an array of validators and returns a fresh array where each has been
93
+ # turned into a sensible hash (with symbols as the keys!)
94
+ def self.sensible_validator_hashes(validators)
95
+ validators.map do |val|
96
+ hash = {}
97
+ case val
98
+ when Validator::TruePos
99
+ hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
100
+ when Validator::AA
101
+ %w(frequency false_to_total_ratio background calculated_background false_to_total_ratio).each do |cat|
102
+ hash[cat.to_sym] = val.send(cat.to_sym)
103
+ end
104
+ when Validator::Decoy
105
+ %w(correct_wins decoy_on_match).each do |cat|
106
+ hash[cat.to_sym] = val.send(cat.to_sym)
107
+ end
108
+ hash[:constraint] = val.constraint.inspect if val.constraint
109
+ when Validator::Bias
110
+ %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
111
+ hash[cat.to_sym] = val.send(cat.to_sym)
112
+ end
113
+ hash[:file] = val.fasta.filename
114
+ when Validator::Transmem::Protein
115
+ %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
116
+ hash[cat.to_sym] = val.send(cat.to_sym)
117
+ end
118
+ when Validator::Probability
119
+ %w(prob_method).each do |cat|
120
+ hash[cat.to_sym] = val.send(cat.to_sym)
121
+ end
122
+ else ; raise ArgumentError, "Don't know the validator class #{val}"
123
+ end
124
+ klass_as_s = val.class.to_s
125
+ hash[:type] = Validator_to_string[klass_as_s]
126
+ hash[:class] = klass_as_s
127
+ hash
128
+ end
129
+ end
130
+
131
+ =begin
132
+ ## THIS IS WITH STRINGS AS KEYS!
133
+ # takes an array of validators and returns a fresh array where each has been
134
+ # turned into a sensible hash (with symbols as the keys!)
135
+ def self.sensible_validator_hashes(validators)
136
+ validators.map do |val|
137
+ hash = {}
138
+ case val
139
+ when Validator::TruePos
140
+ hash.merge( {'correct_wins' => val.correct_wins, 'file' => val.fasta.filename } )
141
+ when Validator::AA
142
+ %w(frequency false_to_total_ratio background calculated_background false_to_total_ratio).each do |cat|
143
+ hash[cat] = val.send(cat.to_sym)
144
+ end
145
+ when Validator::Decoy
146
+ %w(correct_wins decoy_on_match).each do |cat|
147
+ hash[cat] = val.send(cat.to_sym)
148
+ end
149
+ hash['constraint'] = val.constraint.inspect if val.constraint
150
+ when Validator::Bias
151
+ %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
152
+ hash[cat] = val.send(cat.to_sym)
153
+ end
154
+ hash['file'] = val.fasta.filename
155
+ when Validator::Transmem::Protein
156
+ %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
157
+ hash[cat] = val.send(cat.to_sym)
158
+ end
159
+ when Validator::Probability
160
+ else ; raise ArgumentError, "Don't know the validator class #{val}"
161
+ end
162
+ klass_as_s = val.class.to_s
163
+ hash['type'] = Validator_to_string[klass_as_s]
164
+ hash['class'] = klass_as_s
165
+ hash
166
+ end
167
+ end
168
+ =end
169
+
170
+ end
171
+
172
+ module Precision::Calculator
173
+ # calculates precision by the assumption that the first group are all true
174
+ # hits and the second are all false hits
175
+ # (0,0) is returned as 1.0
176
+ def calc_precision(num_true_hits, num_false_hits)
177
+ if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
178
+ 1.0
179
+ else
180
+ num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
181
+ end
182
+ end
183
+ end
184
+
185
+ # will calculate precision for groups of proteins where the first group are
186
+ # normal hits (which may be true or false) and the second are decoy hits.
187
+ # edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
188
+ module Precision::Calculator::Decoy
189
+ def calc_precision(num_normal, num_decoy)
190
+ # will calculate as floats in case fractional amounts passed in for
191
+ # whatever reason
192
+ num_normal_f = num_normal.to_f
193
+ num_true_pos = num_normal.to_f - num_decoy
194
+ precision =
195
+ if num_normal_f == 0.0
196
+ if num_decoy.to_f > 0.0
197
+ 0.0
198
+ else
199
+ 1.0
200
+ end
201
+ else
202
+ num_true_pos/num_normal_f
203
+ end
204
+ end
205
+ end
206
+
207
+ require 'validator/true_pos'
208
+ require 'validator/aa'
209
+ require 'validator/bias'
210
+ require 'validator/decoy'
211
+ require 'validator/transmem'
212
+ require 'validator/probability'
213
+ require 'validator/prot_from_pep'
214
+
data/lib/xml.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ module XML
3
+ HourMinuteMatch = /[MH]/o
4
+ # returns a float object of seconds
5
+ # doesn't support year month, etc, yet
6
+ def self.duration_to_seconds(string)
7
+ case x = string[0,2]
8
+ when 'PT'
9
+ rest = string[2..-1]
10
+ # usually it will be this 'PT1.223434S':
11
+ if rest !~ HourMinuteMatch
12
+ rest[0...-1].to_f
13
+ else
14
+ addit = ''
15
+ total_secs = 0
16
+ total_secs_as_float = nil
17
+ rest.split('').each do |let|
18
+ case let
19
+ when 'H'
20
+ total_secs += addit.to_i * 3600
21
+ addit = ''
22
+ when 'M'
23
+ total_secs += addit.to_i * 60
24
+ addit = ''
25
+ when 'S'
26
+ total_secs_as_float = total_secs.to_f
27
+ total_secs_as_float += addit.to_f
28
+ else
29
+ addit << let
30
+ end
31
+ end
32
+ total_secs_as_float
33
+ end
34
+ else
35
+ abort 'need to include support for other durations'
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,105 @@
1
+
2
+ module XMLStyleParser
3
+ @done_once = nil
4
+
5
+ Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
6
+ # currently AXML requires 'xmlparser' to be installed.... (may not always be
7
+ # the case...)
8
+ File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
9
+
10
+ # the method that the parser will call on the given file at parse!
11
+ attr_accessor :method
12
+
13
+ # parses the given file by sending to @method
14
+ def parse(file, opts={})
15
+ if respond_to? @method
16
+ send(@method, file, opts)
17
+ else
18
+ raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
19
+ end
20
+ end
21
+
22
+ # XMLParser and xml/libxml are incompatible, so if xmlparser is available,
23
+ # libxml will not be loaded (XMLParser#parse is clobbered by
24
+ # XML::Parser#parse [don't ask me why])
25
+ def self.require_parsers
26
+ if !@done_once
27
+ have_xmlparser = false
28
+ begin
29
+ require 'xmlparser'
30
+ puts "Loaded XMLParser" if $VERBOSE
31
+ have_xmlparser = true
32
+ rescue LoadError
33
+ end
34
+
35
+ begin
36
+ require 'axml'
37
+ puts "Loaded AXML" if $VERBOSE
38
+ rescue LoadError
39
+ end
40
+
41
+ begin
42
+ unless have_xmlparser
43
+ require 'xml/libxml'
44
+ puts "Loaded xml/libxml" if $VERBOSE
45
+ ################################################################
46
+ # IMPORTANT!
47
+ # This magic line makes the parser behave like it ought to!!
48
+ XML::Parser.default_keep_blanks = false
49
+ ################################################################
50
+ end
51
+ rescue LoadError
52
+ end
53
+ end
54
+ @done_once = true
55
+ end
56
+
57
+ # returns an array of strings depending on File_required (in the order of
58
+ # Parser_precedence)
59
+ def self.available_xml_parsers
60
+ require_parsers
61
+ parser_precedence = Parser_precedence.dup
62
+ File_required.map do |k,v|
63
+ unless $".any? {|req_file| req_file.match(v) }
64
+ parser_precedence.delete(k)
65
+ end
66
+ end
67
+ parser_precedence
68
+ end
69
+
70
+ ## appends downcase to each parser type here and tries to require it
71
+ # returns all those that were required without a load error
72
+ def self.require_parse_files(base_dir)
73
+ XMLStyleParser.available_xml_parsers.select do |v|
74
+ to_require = base_dir + '/' + v.downcase
75
+ begin
76
+ require to_require
77
+ true
78
+ rescue LoadError
79
+ false
80
+ end
81
+ end
82
+ end
83
+
84
+ # seeks a subclass that has the public_method @method
85
+ def self.choose_parser(const, method)
86
+ ## First update @@parser_precedence to ensure we should get these guys
87
+ parser_precedence = available_xml_parsers
88
+
89
+ available_constants = parser_precedence.select do |v|
90
+ const.const_defined?(v)
91
+ end
92
+ available_subclasses = available_constants.map do |v|
93
+ const.const_get(v)
94
+ end
95
+ available = available_subclasses.select do |subclass|
96
+ subclass.public_method_defined? method
97
+ end
98
+ if available.size > 0
99
+ available.first
100
+ else
101
+ raise NoMethodError, "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
102
+ end
103
+ end
104
+
105
+ end
@@ -0,0 +1,19 @@
1
+
2
+
3
+ module XMLParserWrapper
4
+ def parse_and_report(file, const, report_method=:report)
5
+ parse_and_report_string(IO.read(file), const, report_method)
6
+ end
7
+
8
+ def parse_and_report_string(string, const, report_method=:report)
9
+ parser = self.class.const_get(const).new
10
+ parser.parse(string)
11
+ parser.send(report_method)
12
+ end
13
+
14
+ def parse_and_report_io(io, const, report_method=:report)
15
+ parser = self.class.const_get(const).new
16
+ parser.parse(io)
17
+ parser.send(report_method)
18
+ end
19
+ end
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ require 'roc'
5
+ require 'optparse'
6
+ require 'generator'
7
+
8
+ $decoy = false
9
+ $base = "precision_vs_numhits"
10
+
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
13
+ op.separator ""
14
+ op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
15
+ op.separator ""
16
+ op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
17
+ op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
18
+ end
19
+
20
+ opts.parse!
21
+
22
+ if ARGV.size <= 0
23
+ puts opts
24
+ exit
25
+ end
26
+
27
+ files = ARGV.to_a
28
+
29
+ xys = files.map do |file|
30
+ triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
31
+ line.chomp.split("\t")
32
+ end
33
+
34
+ # check that they're all OK:
35
+ triplets.each do |trip|
36
+ if trip.size != 3 ; abort "bad triplet" end
37
+ end
38
+
39
+ # figure out the ordering (and correct if necessary):
40
+ higher_better = triplets[0][0].to_f > triplets.last[0].to_f
41
+
42
+ doublets = triplets.map do |trip|
43
+ value = trip[0].to_f
44
+ value *= -1 if higher_better
45
+ [value, ((trip[2] == 'T') ? true : false)]
46
+ end
47
+
48
+ roc = ROC.new
49
+
50
+ (tps, fps) = roc.doublets_to_separate(doublets)
51
+
52
+ (x, y) =
53
+ if $decoy
54
+ (numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
55
+ [numhits, precision]
56
+ else
57
+ (numhits, precision) = roc.numhits_and_ppv(doublets)
58
+ [numhits, precision]
59
+ end
60
+ [x,y]
61
+
62
+ end
63
+
64
+
65
+ ## PLOT TO to_plot
66
+ File.open( $base + ".to_plot", 'w') do |fh|
67
+ fh.puts "XYData"
68
+ fh.puts $base
69
+ fh.puts "precision vs. num hits"
70
+ fh.puts "num hits"
71
+ fh.puts "precision"
72
+ files.zip(xys) do |file,xy|
73
+ (x,y) = xy
74
+ x.unshift(0)
75
+ y.unshift(1)
76
+ fh.puts file.sub(/\.[^\.]$/,'')
77
+ fh.puts x.join(" ")
78
+ fh.puts y.join(" ")
79
+ end
80
+ end
81
+
82
+ File.open( $base + ".csv", 'w') do |fh|
83
+ columns = []
84
+ files.zip(xys) do |file,xy|
85
+ f = file.sub(/\.[^\.]$/,'')
86
+ (x,y) = xy
87
+ x.unshift("#Hits: #{f}")
88
+ y.unshift("Precision: #{f}")
89
+ columns << x << y
90
+ end
91
+ SyncEnumerator.new(*columns).each do |row|
92
+ fh.puts row.join("\t")
93
+ end
94
+ end
95
+
96
+
97
+
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'table'
5
+
6
+ require 'spec/gradient_program'
7
+
8
+ delimiter = "\t"
9
+ table_format = false
10
+ opts = OptionParser.new do |op|
11
+ op.banner = "#{File.basename(__FILE__)} [OPTIONS] <file>.meth"
12
+ op.on("-d", "--delimiter <tab|space|format>", "delimiter (tab default)", "format = space delimited, formatted ascii table") do |v|
13
+ if v == 'space'
14
+ delimiter = " "
15
+ elsif v == 'tab'
16
+ delimiter = "\t"
17
+ elsif v == 'format'
18
+ table_format = true
19
+ else
20
+ abort "don't recognize #{v}"
21
+ end
22
+ end
23
+ end
24
+
25
+ opts.parse!
26
+
27
+ if ARGV.size == 0
28
+ puts opts
29
+ exit
30
+ end
31
+
32
+
33
+ sets_of_tables = {}
34
+ ARGV.each do |file|
35
+ File.open(file) do |fh|
36
+ sets_of_tables[file] = GradientProgram.all_from_handle(fh)
37
+ end
38
+ end
39
+
40
+ sets_of_tables.each do |file, tables|
41
+ puts "FILE: #{file}"
42
+ tables.each do |gp|
43
+ puts "PUMP_TYPE: #{gp.pump_type}"
44
+ col_labels = ["time(min)", "%A", "%B", "%C", "%D", "ul/min"]
45
+ data = gp.time_points.map do |tp|
46
+ line = [tp.time, *(tp.percentages)]
47
+ line << tp.flow_rate
48
+ end
49
+ table = Table.new(data, nil, col_labels)
50
+ if table_format
51
+ puts table.to_formatted_string
52
+ else
53
+ puts table.to_s(delimiter)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rexml/document'
4
+
5
+ if ARGV.size == 0
6
+ puts "usage: #{File.basename(__FILE__)} <file>-prot.xml ..."
7
+ puts "outputs a .csv file"
8
+ exit
9
+ end
10
+
11
+ class Protein
12
+ attr_accessor :name, :pi, :ni
13
+ def initialize(name, pi, ni)
14
+ @name, @pi, @ni = name, pi, ni
15
+ end
16
+ end
17
+
18
+ class Listener
19
+ attr_accessor :proteins
20
+
21
+ def initialize
22
+ @proteins = []
23
+ end
24
+
25
+ def tag_start(name, attrs)
26
+ if name == "protein"
27
+ protein = Protein.new( attrs['protein_name'], attrs['probability'].to_f, attrs['total_number_peptides'].to_i)
28
+ @proteins.push( protein )
29
+ end
30
+ end
31
+
32
+ def method_missing(*args) ; end
33
+
34
+ end
35
+
36
+ ARGV.each do |file|
37
+ File.open("output.csv", 'w') do |out|
38
+ listener = Listener.new
39
+ REXML::Document.parse_stream(File.new(file), listener)
40
+ listener.proteins.sort_by {|prot| [prot.pi, prot.ni, prot.name] }.reverse.each do |protein|
41
+ out.puts [protein.name, protein.pi, protein.ni].join("\t")
42
+ end
43
+ end
44
+ end
@@ -18,7 +18,7 @@ end
18
18
  ARGV.each do |file|
19
19
  puts "READING: " + file
20
20
  outfile = file + '.timeIndex'
21
- obj = Spec::MSRunIndex.new(file)
21
+ obj = MS::MSRunIndex.new(file)
22
22
  puts "WRITING: " + outfile
23
23
  obj.to_index_file(outfile)
24
24
  end
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'spec_id'
4
+ require 'fasta'
5
+ require 'optparse'
6
+
7
+ $top = false
8
+ opts = OptionParser.new do |op|
9
+ op.banner = "usage: #{File.basename(__FILE__)} bioworks.xml <file>.fasta|prefix"
10
+ op.separator "outputs stdout (tab del sorted by probability) probability, file:aaseq:charge T/F"
11
+ op.separator "hashes on file+aaseq+charge"
12
+ op.on("-t", "--top", "only top peptide (by prob) per scan+charge") do
13
+ $top = true
14
+ end
15
+ end
16
+
17
+ opts.parse!
18
+
19
+ if ARGV.size < 2
20
+ puts opts.to_s
21
+ exit
22
+ end
23
+
24
+ specid_file = ARGV.shift
25
+ file_or_prefix = ARGV.shift
26
+
27
+ specid = SpecID.new(specid_file)
28
+
29
+ indicator =
30
+ if File.exist? file_or_prefix
31
+ Fasta.new.read_file(file_or_prefix)
32
+ else
33
+ file_or_prefix
34
+ end
35
+
36
+
37
+ # returns an array containing the min prob peptides (in case of a tie)
38
+ def lowest_peps(ar)
39
+ min_prob = ar.min {|a,b| a.probability.to_f <=> b.probability.to_f }.probability.to_f
40
+ ar.select {|v| v.probability.to_f == min_prob }
41
+ end
42
+
43
+ peps = specid.peps
44
+ if $top
45
+ top_by_scan = []
46
+ peps.hash_by(:base_name, :first_scan).each do |k,v|
47
+ low_peps = lowest_peps(v)
48
+ top_by_scan.push( *low_peps )
49
+ end
50
+ end
51
+
52
+ results = top_by_scan.hash_by(:base_name, :aaseq, :charge).map do |k,v|
53
+ low_peps = lowest_peps(v)
54
+ #min_pep = v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
55
+ all_prots = []
56
+ low_peps.each do |pep|
57
+ all_prot_references.push( *(pep.prots.map {|v| v.reference }) )
58
+ end
59
+ all_prot_references.uniq!
60
+ is_true =
61
+ if indicator.is_a? Fasta
62
+ all_prot_references.any? do |ref|
63
+ indicator.included_in_header?(ref)
64
+ end
65
+ else
66
+ !(all_prot_references.all? {|ref| ref.include?( indicator )})
67
+ end
68
+ [min_pep.probability.to_f, k, is_true]
69
+ end
70
+
71
+ results.sort.each do |result|
72
+ report = [result[0], result[1].join(':'), (result[2] ? 'T' : 'F')]
73
+ puts report.join("\t")
74
+ end
75
+
76
+ =begin
77
+ # ORIGINAL CODE
78
+ peps = specid.peps
79
+ if $top
80
+ peps = peps.hash_by(:base_name, :first_scan).map do |k,v|
81
+ v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
82
+ end
83
+ end
84
+
85
+ results = peps.hash_by(:base_name, :aaseq, :charge).map do |k,v|
86
+ min_pep = v.min {|a,b| a.probability.to_f <=> b.probability.to_f }
87
+ references = min_pep.prots.map {|v| v.reference }.uniq
88
+ is_true =
89
+ if indicator.is_a? Fasta
90
+ references.any? do |ref|
91
+ indicator.included_in_header?(ref)
92
+ end
93
+ else
94
+ !(references.all? {|ref| ref.include?( indicator )})
95
+ end
96
+ [min_pep.probability.to_f, k, is_true]
97
+ end
98
+
99
+ results.sort.each do |result|
100
+ report = [result[0], result[1].join(':'), (result[2] ? 'T' : 'F')]
101
+ puts report.join("\t")
102
+ end
103
+ =end