mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator.rb DELETED
@@ -1,197 +0,0 @@
1
-
2
- class Validator
3
-
4
- # in the absence of digestion, does the spec_id type requires pephits for
5
- # validation?
6
- def self.requires_pephits?(spec_id_obj)
7
- case spec_id_obj
8
- when Proph::ProtSummary : true
9
- when Proph::PepSummary : true
10
- when SQTGroup : true
11
- else ; false
12
- end
13
- end
14
-
15
- Validator_to_string = {
16
- 'Validator::AA' => 'badAA',
17
- 'Validator::AAEst' => 'badAAEst',
18
- 'Validator::Decoy' => 'decoy',
19
- 'Validator::Transmem::Protein' => 'tmm',
20
- 'Validator::TruePos' => 'tps',
21
- 'Validator::Bias' => 'bias',
22
- 'Validator::Probability' => 'prob',
23
- 'Validator::QValue' => 'qval',
24
- :bad_aa => 'badAA',
25
- :bad_aa_est => 'badAAEst',
26
- :decoy => 'decoy',
27
- :tmm => 'tmm',
28
- :tps => 'tps',
29
- :bias => 'bias',
30
- :prob => 'prob',
31
- :qval => 'qval',
32
- }
33
-
34
- def initialize_increment
35
- @increment_tps = 0
36
- @increment_fps = 0
37
- @increment_total_submitted = 0
38
- @increment_initialized = true
39
- end
40
-
41
- # if adding pephits in groups at a time, the entire group does not need to be
42
- # queried, just the individual hit. Use this OR pephits_precision (NOT
43
- # both). The initial query to this method will begin a running tally that
44
- # is saved by the validator.
45
- # takes either an array or a single pephit (determined by if it is a
46
- # SpecID::Pep)
47
- def increment_pephits_precision(peps)
48
- tmp = $VERBOSE; $VERBOSE = nil
49
- initialize_increment unless @increment_initialized
50
- $VERBOSE = tmp
51
-
52
- to_submit =
53
- if peps.is_a? SpecID::Pep
54
- [peps]
55
- else
56
- peps
57
- end
58
- @increment_total_submitted += to_submit.size
59
- (tps, fps) = partition(to_submit)
60
- @increment_tps += tps.size
61
- @increment_fps += fps.size
62
- (num_tps, num_fps) =
63
- if self.respond_to?(:calc_precision_prep) # for digestion based validators
64
- (num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
65
- [num_tps, num_fps]
66
- else
67
- [@increment_tps, @increment_fps]
68
- end
69
- calc_precision(num_tps, num_fps)
70
- end
71
-
72
-
73
- # returns an adjusted false positive rate (a float not to drop below 0.0)
74
- # based on a background of 'false'-false positive hits to total hits. Also
75
- # sets the @calculated_background attribute. Accepts floats or ints
76
- def adjust_fps_for_background(num_tps, num_fps, background)
77
- num_fps = num_fps.to_f
78
- total_peps = num_tps + num_fps
79
- @calculated_background = num_fps / total_peps
80
- num_fps -= (total_peps.to_f * background)
81
- num_fps = 0.0 if num_fps < 0.0
82
- num_fps
83
- end
84
-
85
- # copied from libjtp: vec
86
- # returns the mean and std_dev
87
- def sample_stats(array)
88
- _len = array.size
89
- _sum = 0.0
90
- _sum_sq = 0.0
91
- array.each do |val|
92
- _sum += val
93
- _sum_sq += val * val
94
- end
95
- std_dev = _sum_sq - ((_sum * _sum)/_len)
96
- std_dev /= ( (_len > 1) ? (_len-1) : 1 )
97
- # on occasion, a very small negative number occurs
98
- if std_dev < 0.0
99
- std_dev = 0.0
100
- else
101
- std_dev = Math.sqrt(std_dev)
102
- end
103
- mean = _sum.to_f/_len
104
- [mean, std_dev]
105
- end
106
-
107
- # takes an array of validators and returns a fresh array where each has been
108
- # turned into a sensible hash (with symbols as the keys!)
109
- def self.sensible_validator_hashes(validators)
110
- validators.map do |val|
111
- hash = {}
112
- case val
113
- when Validator::TruePos
114
- hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
115
- when Validator::AAEst
116
- %w(frequency background calculated_background).each do |cat|
117
- hash[cat.to_sym] = val.send(cat.to_sym)
118
- end
119
- when Validator::AA
120
- %w(false_to_total_ratio background calculated_background).each do |cat|
121
- hash[cat.to_sym] = val.send(cat.to_sym)
122
- end
123
- when Validator::Decoy
124
- %w(pi_zero correct_wins decoy_on_match).each do |cat|
125
- hash[cat.to_sym] = val.send(cat.to_sym)
126
- end
127
- hash[:constraint] = val.constraint.inspect if val.constraint
128
- when Validator::Bias
129
- %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
130
- hash[cat.to_sym] = val.send(cat.to_sym)
131
- end
132
- hash[:file] = val.fasta.filename
133
- when Validator::Transmem::Protein
134
- %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
135
- hash[cat.to_sym] = val.send(cat.to_sym)
136
- end
137
- when Validator::Probability
138
- %w(prob_method).each do |cat|
139
- hash[cat.to_sym] = val.send(cat.to_sym)
140
- end
141
- when Validator::QValue
142
- # no params to add
143
- else ; raise ArgumentError, "Don't know the validator class #{val}"
144
- end
145
- klass_as_s = val.class.to_s
146
- hash[:type] = Validator_to_string[klass_as_s]
147
- hash[:class] = klass_as_s
148
- hash
149
- end
150
- end
151
- end
152
-
153
- module Precision::Calculator
154
- # calculates precision by the assumption that the first group are all true
155
- # hits and the second are all false hits
156
- # (0,0) is returned as 1.0
157
- def calc_precision(num_true_hits, num_false_hits)
158
- if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
159
- 1.0
160
- else
161
- num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
162
- end
163
- end
164
- end
165
-
166
- # will calculate precision for groups of proteins where the first group are
167
- # normal hits (which may be true or false) and the second are decoy hits.
168
- # edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
169
- module Precision::Calculator::Decoy
170
- def calc_precision(num_normal, num_decoy, frit=1.0)
171
- # will calculate as floats in case fractional amounts passed in for
172
- # whatever reason
173
- num_normal_f = num_normal.to_f
174
- num_true_pos = num_normal_f - (num_decoy.to_f * frit)
175
- precision =
176
- if num_normal_f == 0.0
177
- if num_decoy.to_f > 0.0
178
- 0.0
179
- else
180
- 1.0
181
- end
182
- else
183
- num_true_pos/num_normal_f
184
- end
185
- end
186
- end
187
-
188
- #require 'validator/true_pos'
189
- #require 'validator/aa'
190
- #require 'validator/aa_est'
191
- #require 'validator/bias'
192
- #require 'validator/decoy'
193
- #require 'validator/transmem'
194
- #require 'validator/probability'
195
- #require 'validator/q_value'
196
- #require 'validator/prot_from_pep'
197
-
data/lib/xml.rb DELETED
@@ -1,38 +0,0 @@
1
-
2
- module XML
3
- HourMinuteMatch = /[MH]/o
4
- # returns a float object of seconds
5
- # doesn't support year month, etc, yet
6
- def self.duration_to_seconds(string)
7
- case x = string[0,2]
8
- when 'PT'
9
- rest = string[2..-1]
10
- # usually it will be this 'PT1.223434S':
11
- if rest !~ HourMinuteMatch
12
- rest[0...-1].to_f
13
- else
14
- addit = ''
15
- total_secs = 0
16
- total_secs_as_float = nil
17
- rest.split('').each do |let|
18
- case let
19
- when 'H'
20
- total_secs += addit.to_i * 3600
21
- addit = ''
22
- when 'M'
23
- total_secs += addit.to_i * 60
24
- addit = ''
25
- when 'S'
26
- total_secs_as_float = total_secs.to_f
27
- total_secs_as_float += addit.to_f
28
- else
29
- addit << let
30
- end
31
- end
32
- total_secs_as_float
33
- end
34
- else
35
- abort 'need to include support for other durations'
36
- end
37
- end
38
- end
@@ -1,119 +0,0 @@
1
-
2
- module XMLStyleParser
3
- @done_once = nil
4
-
5
- Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
6
- # currently AXML requires 'xmlparser' to be installed.... (may not always be
7
- # the case...)
8
- File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
9
-
10
- # the method that the parser will call on the given file at parse!
11
- attr_accessor :method
12
-
13
- # parses the given file by sending to @method
14
- def parse(file, opts={})
15
- if respond_to? @method
16
- send(@method, file, opts)
17
- else
18
- raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
19
- end
20
- end
21
-
22
- # XMLParser and xml/libxml are incompatible, so if xmlparser is available,
23
- # libxml will not be loaded (XMLParser#parse is clobbered by
24
- # XML::Parser#parse [don't ask me why])
25
- def self.require_parsers
26
- if !@done_once
27
- have_xmlparser = false
28
- begin
29
- require 'xmlparser'
30
- puts "Loaded XMLParser" if $VERBOSE
31
- have_xmlparser = true
32
- rescue LoadError
33
- end
34
-
35
- begin
36
- require 'axml'
37
- puts "Loaded AXML" if $VERBOSE
38
- rescue LoadError
39
- end
40
-
41
- begin
42
- unless have_xmlparser
43
- require 'xml/libxml'
44
- puts "Loaded xml/libxml" if $VERBOSE
45
- ################################################################
46
- # IMPORTANT!
47
- # This magic line makes the parser behave like it ought to!!
48
- XML::Parser.default_keep_blanks = false
49
- ################################################################
50
- end
51
- rescue LoadError
52
- end
53
- end
54
- @done_once = true
55
- end
56
-
57
- # returns an array of strings depending on File_required (in the order of
58
- # Parser_precedence)
59
- def self.available_xml_parsers
60
- require_parsers
61
- parser_precedence = Parser_precedence.dup
62
- File_required.map do |k,v|
63
- unless $".any? {|req_file| req_file.match(v) }
64
- parser_precedence.delete(k)
65
- end
66
- end
67
- parser_precedence
68
- end
69
-
70
- ## appends downcase to each parser type here and tries to require it
71
- # returns all those that were required without a load error
72
- def self.require_parse_files(base_dir)
73
- XMLStyleParser.available_xml_parsers.select do |v|
74
- to_require = base_dir + '/' + v.downcase
75
- begin
76
- require to_require
77
- true
78
- rescue LoadError
79
- false
80
- end
81
- end
82
- end
83
-
84
- # seeks a subclass that has the public_method @method
85
- def self.choose_parser(const, method, special_subclass=nil)
86
- ## First update @@parser_precedence to ensure we should get these guys
87
- parser_precedence = available_xml_parsers
88
-
89
- available_constants = parser_precedence.select do |v|
90
- const.const_defined?(v)
91
- end
92
- available_subclasses = available_constants.map do |v|
93
- const.const_get(v)
94
- end
95
- available = available_subclasses.select do |subclass|
96
- subclass.public_method_defined? method
97
- end
98
- if special_subclass
99
- available_special_subclasses = []
100
- available.each do |subclass|
101
- if subclass.const_defined?(special_subclass)
102
- available_special_subclasses << subclass.const_get(special_subclass)
103
- end
104
- end
105
- available = available_special_subclasses
106
- end
107
- if available.size > 0
108
- available.first
109
- else
110
- warning = ""
111
- if special_subclass
112
- warning << "** while looking for special subclass: #{special_subclass} **\n"
113
- end
114
- warning << "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
115
- raise NoMethodError, warning
116
- end
117
- end
118
-
119
- end
@@ -1,19 +0,0 @@
1
-
2
-
3
- module XMLParserWrapper
4
- def parse_and_report(file, const, report_method=:report)
5
- parse_and_report_string(IO.read(file), const, report_method)
6
- end
7
-
8
- def parse_and_report_string(string, const, report_method=:report)
9
- parser = self.class.const_get(const).new
10
- parser.parse(string)
11
- parser.send(report_method)
12
- end
13
-
14
- def parse_and_report_io(io, const, report_method=:report)
15
- parser = self.class.const_get(const).new
16
- parser.parse(io)
17
- parser.send(report_method)
18
- end
19
- end
data/release_notes.txt DELETED
@@ -1,2 +0,0 @@
1
-
2
-
@@ -1,97 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
-
4
- require 'roc'
5
- require 'optparse'
6
- require 'generator'
7
-
8
- $decoy = false
9
- $base = "precision_vs_numhits"
10
-
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
13
- op.separator ""
14
- op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
15
- op.separator ""
16
- op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
17
- op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
18
- end
19
-
20
- opts.parse!
21
-
22
- if ARGV.size <= 0
23
- puts opts
24
- exit
25
- end
26
-
27
- files = ARGV.to_a
28
-
29
- xys = files.map do |file|
30
- triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
31
- line.chomp.split("\t")
32
- end
33
-
34
- # check that they're all OK:
35
- triplets.each do |trip|
36
- if trip.size != 3 ; abort "bad triplet" end
37
- end
38
-
39
- # figure out the ordering (and correct if necessary):
40
- higher_better = triplets[0][0].to_f > triplets.last[0].to_f
41
-
42
- doublets = triplets.map do |trip|
43
- value = trip[0].to_f
44
- value *= -1 if higher_better
45
- [value, ((trip[2] == 'T') ? true : false)]
46
- end
47
-
48
- roc = ROC.new
49
-
50
- (tps, fps) = roc.doublets_to_separate(doublets)
51
-
52
- (x, y) =
53
- if $decoy
54
- (numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
55
- [numhits, precision]
56
- else
57
- (numhits, precision) = roc.numhits_and_ppv(doublets)
58
- [numhits, precision]
59
- end
60
- [x,y]
61
-
62
- end
63
-
64
-
65
- ## PLOT TO to_plot
66
- File.open( $base + ".to_plot", 'w') do |fh|
67
- fh.puts "XYData"
68
- fh.puts $base
69
- fh.puts "precision vs. num hits"
70
- fh.puts "num hits"
71
- fh.puts "precision"
72
- files.zip(xys) do |file,xy|
73
- (x,y) = xy
74
- x.unshift(0)
75
- y.unshift(1)
76
- fh.puts file.sub(/\.[^\.]$/,'')
77
- fh.puts x.join(" ")
78
- fh.puts y.join(" ")
79
- end
80
- end
81
-
82
- File.open( $base + ".csv", 'w') do |fh|
83
- columns = []
84
- files.zip(xys) do |file,xy|
85
- f = file.sub(/\.[^\.]$/,'')
86
- (x,y) = xy
87
- x.unshift("#Hits: #{f}")
88
- y.unshift("Precision: #{f}")
89
- columns << x << y
90
- end
91
- SyncEnumerator.new(*columns).each do |row|
92
- fh.puts row.join("\t")
93
- end
94
- end
95
-
96
-
97
-
@@ -1,61 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
-
4
- if ARGV.size < 2
5
- puts "usage: #{File.basename(__FILE__)} protxml pepxml"
6
- puts "Based on some kind of truncated prot xml file, takes a pepxml file"
7
- puts "and deletes all search hits/peptides that aren't in the prot xml file!"
8
- exit
9
- end
10
-
11
- protxml = ARGV[0]
12
- pepxml = ARGV[1]
13
-
14
- hash = {}
15
- File.open(protxml) do |fh|
16
- while line = fh.gets
17
- if line =~ /peptide_sequence="(.*?)" charge="(\d)" /
18
- hash[[$1.dup,$2.dup]] = 1
19
- end
20
- end
21
- end
22
-
23
- p hash
24
-
25
- out = File.open(pepxml + ".small", "w")
26
-
27
- in_hit = false
28
- cur_charge = nil
29
- stored_lines = ""
30
- print_it = false
31
- File.open(pepxml) do |fh|
32
- while line = fh.gets
33
- if line =~ /<search_result .*? assumed_charge="(\d)".*?>/
34
- cur_charge = $1.dup
35
- in_hit = true
36
- end
37
- if line =~ /<search_hit .*? peptide="(.*?)"/
38
- if hash.key?([$1.dup,cur_charge])
39
- print_it = true
40
- else
41
- print_it = false
42
- end
43
- end
44
- if line =~ /<\/search_result>/
45
- if print_it == true
46
- stored_lines << line
47
- out.print stored_lines
48
- end
49
- stored_lines = ""
50
- in_hit == false
51
- elsif !in_hit
52
- out.print line
53
- else
54
- stored_lines << line
55
- end
56
- end
57
-
58
-
59
- end
60
-
61
- out.close
@@ -1,47 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'fasta'
4
- require 'sample_enzyme'
5
-
6
- if ARGV.size < 3
7
- puts "usage: #{File.basename(__FILE__)} min_peptide_length missed_cleavages <file>.fasta ..."
8
- puts " returns <file>.min_pep_length_<#>.missed_cleavages_<#>.degenerate_peptides.csv"
9
- abort
10
- end
11
-
12
-
13
-
14
- min_peptide_length = ARGV.shift.to_i
15
- missed_cleavages = ARGV.shift.to_i
16
-
17
- ARGV.each do |file|
18
- hash = {}
19
-
20
- if file !~ /\.fasta/
21
- abort "must be a fasta file with extension fasta"
22
- end
23
- new_filename = file.sub(/\.fasta$/, '')
24
- new_filename << ".min_pep_length_#{min_peptide_length}.missed_cleavages_#{missed_cleavages}.degenerate_peptides.csv"
25
- peptides = []
26
- Fasta.new.read_file(file).prots.each do |prot|
27
-
28
-
29
- SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).each do |aaseq|
30
- if aaseq.size >= min_peptide_length
31
- hash[aaseq] ||= []
32
- hash[aaseq].push( prot.header.sub(/^>/,'') )
33
- end
34
- end
35
- #fh.puts( prot.header.split(/\s+/).first.sub(/^>/,'') + "\t" + SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).join(" ") )
36
- end
37
-
38
- File.open(new_filename, "w") do |fh|
39
- hash.keys.sort_by {|pep| hash[pep].size }.reverse.each do |pep|
40
- fh.puts( [pep, *(hash[pep])].join("\t") )
41
- end
42
- end
43
- end
44
-
45
-
46
-
47
-