mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator.rb DELETED
@@ -1,197 +0,0 @@
1
-
2
- class Validator
3
-
4
- # in the absence of digestion, does the spec_id type requires pephits for
5
- # validation?
6
- def self.requires_pephits?(spec_id_obj)
7
- case spec_id_obj
8
- when Proph::ProtSummary : true
9
- when Proph::PepSummary : true
10
- when SQTGroup : true
11
- else ; false
12
- end
13
- end
14
-
15
- Validator_to_string = {
16
- 'Validator::AA' => 'badAA',
17
- 'Validator::AAEst' => 'badAAEst',
18
- 'Validator::Decoy' => 'decoy',
19
- 'Validator::Transmem::Protein' => 'tmm',
20
- 'Validator::TruePos' => 'tps',
21
- 'Validator::Bias' => 'bias',
22
- 'Validator::Probability' => 'prob',
23
- 'Validator::QValue' => 'qval',
24
- :bad_aa => 'badAA',
25
- :bad_aa_est => 'badAAEst',
26
- :decoy => 'decoy',
27
- :tmm => 'tmm',
28
- :tps => 'tps',
29
- :bias => 'bias',
30
- :prob => 'prob',
31
- :qval => 'qval',
32
- }
33
-
34
- def initialize_increment
35
- @increment_tps = 0
36
- @increment_fps = 0
37
- @increment_total_submitted = 0
38
- @increment_initialized = true
39
- end
40
-
41
- # if adding pephits in groups at a time, the entire group does not need to be
42
- # queried, just the individual hit. Use this OR pephits_precision (NOT
43
- # both). The initial query to this method will begin a running tally that
44
- # is saved by the validator.
45
- # takes either an array or a single pephit (determined by if it is a
46
- # SpecID::Pep)
47
- def increment_pephits_precision(peps)
48
- tmp = $VERBOSE; $VERBOSE = nil
49
- initialize_increment unless @increment_initialized
50
- $VERBOSE = tmp
51
-
52
- to_submit =
53
- if peps.is_a? SpecID::Pep
54
- [peps]
55
- else
56
- peps
57
- end
58
- @increment_total_submitted += to_submit.size
59
- (tps, fps) = partition(to_submit)
60
- @increment_tps += tps.size
61
- @increment_fps += fps.size
62
- (num_tps, num_fps) =
63
- if self.respond_to?(:calc_precision_prep) # for digestion based validators
64
- (num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
65
- [num_tps, num_fps]
66
- else
67
- [@increment_tps, @increment_fps]
68
- end
69
- calc_precision(num_tps, num_fps)
70
- end
71
-
72
-
73
- # returns an adjusted false positive rate (a float not to drop below 0.0)
74
- # based on a background of 'false'-false positive hits to total hits. Also
75
- # sets the @calculated_background attribute. Accepts floats or ints
76
- def adjust_fps_for_background(num_tps, num_fps, background)
77
- num_fps = num_fps.to_f
78
- total_peps = num_tps + num_fps
79
- @calculated_background = num_fps / total_peps
80
- num_fps -= (total_peps.to_f * background)
81
- num_fps = 0.0 if num_fps < 0.0
82
- num_fps
83
- end
84
-
85
- # copied from libjtp: vec
86
- # returns the mean and std_dev
87
- def sample_stats(array)
88
- _len = array.size
89
- _sum = 0.0
90
- _sum_sq = 0.0
91
- array.each do |val|
92
- _sum += val
93
- _sum_sq += val * val
94
- end
95
- std_dev = _sum_sq - ((_sum * _sum)/_len)
96
- std_dev /= ( (_len > 1) ? (_len-1) : 1 )
97
- # on occasion, a very small negative number occurs
98
- if std_dev < 0.0
99
- std_dev = 0.0
100
- else
101
- std_dev = Math.sqrt(std_dev)
102
- end
103
- mean = _sum.to_f/_len
104
- [mean, std_dev]
105
- end
106
-
107
- # takes an array of validators and returns a fresh array where each has been
108
- # turned into a sensible hash (with symbols as the keys!)
109
- def self.sensible_validator_hashes(validators)
110
- validators.map do |val|
111
- hash = {}
112
- case val
113
- when Validator::TruePos
114
- hash.merge( {:correct_wins => val.correct_wins, :file => val.fasta.filename } )
115
- when Validator::AAEst
116
- %w(frequency background calculated_background).each do |cat|
117
- hash[cat.to_sym] = val.send(cat.to_sym)
118
- end
119
- when Validator::AA
120
- %w(false_to_total_ratio background calculated_background).each do |cat|
121
- hash[cat.to_sym] = val.send(cat.to_sym)
122
- end
123
- when Validator::Decoy
124
- %w(pi_zero correct_wins decoy_on_match).each do |cat|
125
- hash[cat.to_sym] = val.send(cat.to_sym)
126
- end
127
- hash[:constraint] = val.constraint.inspect if val.constraint
128
- when Validator::Bias
129
- %w(correct_wins proteins_expected background calculated_background false_to_total_ratio).each do |cat|
130
- hash[cat.to_sym] = val.send(cat.to_sym)
131
- end
132
- hash[:file] = val.fasta.filename
133
- when Validator::Transmem::Protein
134
- %w(false_to_total_ratio min_num_tms soluble_fraction correct_wins no_include_tm_peps background calculated_background transmem_file).each do |cat|
135
- hash[cat.to_sym] = val.send(cat.to_sym)
136
- end
137
- when Validator::Probability
138
- %w(prob_method).each do |cat|
139
- hash[cat.to_sym] = val.send(cat.to_sym)
140
- end
141
- when Validator::QValue
142
- # no params to add
143
- else ; raise ArgumentError, "Don't know the validator class #{val}"
144
- end
145
- klass_as_s = val.class.to_s
146
- hash[:type] = Validator_to_string[klass_as_s]
147
- hash[:class] = klass_as_s
148
- hash
149
- end
150
- end
151
- end
152
-
153
- module Precision::Calculator
154
- # calculates precision by the assumption that the first group are all true
155
- # hits and the second are all false hits
156
- # (0,0) is returned as 1.0
157
- def calc_precision(num_true_hits, num_false_hits)
158
- if ((num_true_hits.to_f == 0.0) && (num_false_hits.to_f == 0.0))
159
- 1.0
160
- else
161
- num_true_hits.to_f / (num_true_hits.to_f + num_false_hits.to_f)
162
- end
163
- end
164
- end
165
-
166
- # will calculate precision for groups of proteins where the first group are
167
- # normal hits (which may be true or false) and the second are decoy hits.
168
- # edge case: if num_normal.to_f == 0.0 then if num_decoy.to_f > 0 ; 0, else 1
169
- module Precision::Calculator::Decoy
170
- def calc_precision(num_normal, num_decoy, frit=1.0)
171
- # will calculate as floats in case fractional amounts passed in for
172
- # whatever reason
173
- num_normal_f = num_normal.to_f
174
- num_true_pos = num_normal_f - (num_decoy.to_f * frit)
175
- precision =
176
- if num_normal_f == 0.0
177
- if num_decoy.to_f > 0.0
178
- 0.0
179
- else
180
- 1.0
181
- end
182
- else
183
- num_true_pos/num_normal_f
184
- end
185
- end
186
- end
187
-
188
- #require 'validator/true_pos'
189
- #require 'validator/aa'
190
- #require 'validator/aa_est'
191
- #require 'validator/bias'
192
- #require 'validator/decoy'
193
- #require 'validator/transmem'
194
- #require 'validator/probability'
195
- #require 'validator/q_value'
196
- #require 'validator/prot_from_pep'
197
-
data/lib/xml.rb DELETED
@@ -1,38 +0,0 @@
1
-
2
- module XML
3
- HourMinuteMatch = /[MH]/o
4
- # returns a float object of seconds
5
- # doesn't support year month, etc, yet
6
- def self.duration_to_seconds(string)
7
- case x = string[0,2]
8
- when 'PT'
9
- rest = string[2..-1]
10
- # usually it will be this 'PT1.223434S':
11
- if rest !~ HourMinuteMatch
12
- rest[0...-1].to_f
13
- else
14
- addit = ''
15
- total_secs = 0
16
- total_secs_as_float = nil
17
- rest.split('').each do |let|
18
- case let
19
- when 'H'
20
- total_secs += addit.to_i * 3600
21
- addit = ''
22
- when 'M'
23
- total_secs += addit.to_i * 60
24
- addit = ''
25
- when 'S'
26
- total_secs_as_float = total_secs.to_f
27
- total_secs_as_float += addit.to_f
28
- else
29
- addit << let
30
- end
31
- end
32
- total_secs_as_float
33
- end
34
- else
35
- abort 'need to include support for other durations'
36
- end
37
- end
38
- end
@@ -1,119 +0,0 @@
1
-
2
- module XMLStyleParser
3
- @done_once = nil
4
-
5
- Parser_precedence = %w(AXML LibXML XMLParser Regexp REXML)
6
- # currently AXML requires 'xmlparser' to be installed.... (may not always be
7
- # the case...)
8
- File_required = {'AXML' => /^axml/, 'LibXML' => /^xml\/libxml/, 'XMLParser' => /^xmlparser/}
9
-
10
- # the method that the parser will call on the given file at parse!
11
- attr_accessor :method
12
-
13
- # parses the given file by sending to @method
14
- def parse(file, opts={})
15
- if respond_to? @method
16
- send(@method, file, opts)
17
- else
18
- raise NoMethodError, "Parser of class #{self.class} can't parse #{@method} yet"
19
- end
20
- end
21
-
22
- # XMLParser and xml/libxml are incompatible, so if xmlparser is available,
23
- # libxml will not be loaded (XMLParser#parse is clobbered by
24
- # XML::Parser#parse [don't ask me why])
25
- def self.require_parsers
26
- if !@done_once
27
- have_xmlparser = false
28
- begin
29
- require 'xmlparser'
30
- puts "Loaded XMLParser" if $VERBOSE
31
- have_xmlparser = true
32
- rescue LoadError
33
- end
34
-
35
- begin
36
- require 'axml'
37
- puts "Loaded AXML" if $VERBOSE
38
- rescue LoadError
39
- end
40
-
41
- begin
42
- unless have_xmlparser
43
- require 'xml/libxml'
44
- puts "Loaded xml/libxml" if $VERBOSE
45
- ################################################################
46
- # IMPORTANT!
47
- # This magic line makes the parser behave like it ought to!!
48
- XML::Parser.default_keep_blanks = false
49
- ################################################################
50
- end
51
- rescue LoadError
52
- end
53
- end
54
- @done_once = true
55
- end
56
-
57
- # returns an array of strings depending on File_required (in the order of
58
- # Parser_precedence)
59
- def self.available_xml_parsers
60
- require_parsers
61
- parser_precedence = Parser_precedence.dup
62
- File_required.map do |k,v|
63
- unless $".any? {|req_file| req_file.match(v) }
64
- parser_precedence.delete(k)
65
- end
66
- end
67
- parser_precedence
68
- end
69
-
70
- ## appends downcase to each parser type here and tries to require it
71
- # returns all those that were required without a load error
72
- def self.require_parse_files(base_dir)
73
- XMLStyleParser.available_xml_parsers.select do |v|
74
- to_require = base_dir + '/' + v.downcase
75
- begin
76
- require to_require
77
- true
78
- rescue LoadError
79
- false
80
- end
81
- end
82
- end
83
-
84
- # seeks a subclass that has the public_method @method
85
- def self.choose_parser(const, method, special_subclass=nil)
86
- ## First update @@parser_precedence to ensure we should get these guys
87
- parser_precedence = available_xml_parsers
88
-
89
- available_constants = parser_precedence.select do |v|
90
- const.const_defined?(v)
91
- end
92
- available_subclasses = available_constants.map do |v|
93
- const.const_get(v)
94
- end
95
- available = available_subclasses.select do |subclass|
96
- subclass.public_method_defined? method
97
- end
98
- if special_subclass
99
- available_special_subclasses = []
100
- available.each do |subclass|
101
- if subclass.const_defined?(special_subclass)
102
- available_special_subclasses << subclass.const_get(special_subclass)
103
- end
104
- end
105
- available = available_special_subclasses
106
- end
107
- if available.size > 0
108
- available.first
109
- else
110
- warning = ""
111
- if special_subclass
112
- warning << "** while looking for special subclass: #{special_subclass} **\n"
113
- end
114
- warning << "No parser of class #{const} can parse :#{method}\n** Is 'axml' (or another xml parser) installed and working? **"
115
- raise NoMethodError, warning
116
- end
117
- end
118
-
119
- end
@@ -1,19 +0,0 @@
1
-
2
-
3
- module XMLParserWrapper
4
- def parse_and_report(file, const, report_method=:report)
5
- parse_and_report_string(IO.read(file), const, report_method)
6
- end
7
-
8
- def parse_and_report_string(string, const, report_method=:report)
9
- parser = self.class.const_get(const).new
10
- parser.parse(string)
11
- parser.send(report_method)
12
- end
13
-
14
- def parse_and_report_io(io, const, report_method=:report)
15
- parser = self.class.const_get(const).new
16
- parser.parse(io)
17
- parser.send(report_method)
18
- end
19
- end
data/release_notes.txt DELETED
@@ -1,2 +0,0 @@
1
-
2
-
@@ -1,97 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
-
4
- require 'roc'
5
- require 'optparse'
6
- require 'generator'
7
-
8
- $decoy = false
9
- $base = "precision_vs_numhits"
10
-
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{File.basename(__FILE__)} smriti.csv ..."
13
- op.separator ""
14
- op.separator "smriti.csv = (tab delimited) prob, file:seq:charge, T/F"
15
- op.separator ""
16
- op.on("--decoy", "'F' indicates this is a decoy") {|v| $decoy = true }
17
- op.on("-o", "--outfile <filename>", "base outfile name (#{$base})") {|v| $base = v}
18
- end
19
-
20
- opts.parse!
21
-
22
- if ARGV.size <= 0
23
- puts opts
24
- exit
25
- end
26
-
27
- files = ARGV.to_a
28
-
29
- xys = files.map do |file|
30
- triplets = IO.readlines(file).reject{|v| v =~ /^#/}.map do |line|
31
- line.chomp.split("\t")
32
- end
33
-
34
- # check that they're all OK:
35
- triplets.each do |trip|
36
- if trip.size != 3 ; abort "bad triplet" end
37
- end
38
-
39
- # figure out the ordering (and correct if necessary):
40
- higher_better = triplets[0][0].to_f > triplets.last[0].to_f
41
-
42
- doublets = triplets.map do |trip|
43
- value = trip[0].to_f
44
- value *= -1 if higher_better
45
- [value, ((trip[2] == 'T') ? true : false)]
46
- end
47
-
48
- roc = ROC.new
49
-
50
- (tps, fps) = roc.doublets_to_separate(doublets)
51
-
52
- (x, y) =
53
- if $decoy
54
- (numhits, precision) = DecoyROC.new.pred_and_ppv(tps, fps)
55
- [numhits, precision]
56
- else
57
- (numhits, precision) = roc.numhits_and_ppv(doublets)
58
- [numhits, precision]
59
- end
60
- [x,y]
61
-
62
- end
63
-
64
-
65
- ## PLOT TO to_plot
66
- File.open( $base + ".to_plot", 'w') do |fh|
67
- fh.puts "XYData"
68
- fh.puts $base
69
- fh.puts "precision vs. num hits"
70
- fh.puts "num hits"
71
- fh.puts "precision"
72
- files.zip(xys) do |file,xy|
73
- (x,y) = xy
74
- x.unshift(0)
75
- y.unshift(1)
76
- fh.puts file.sub(/\.[^\.]$/,'')
77
- fh.puts x.join(" ")
78
- fh.puts y.join(" ")
79
- end
80
- end
81
-
82
- File.open( $base + ".csv", 'w') do |fh|
83
- columns = []
84
- files.zip(xys) do |file,xy|
85
- f = file.sub(/\.[^\.]$/,'')
86
- (x,y) = xy
87
- x.unshift("#Hits: #{f}")
88
- y.unshift("Precision: #{f}")
89
- columns << x << y
90
- end
91
- SyncEnumerator.new(*columns).each do |row|
92
- fh.puts row.join("\t")
93
- end
94
- end
95
-
96
-
97
-
@@ -1,61 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
-
4
- if ARGV.size < 2
5
- puts "usage: #{File.basename(__FILE__)} protxml pepxml"
6
- puts "Based on some kind of truncated prot xml file, takes a pepxml file"
7
- puts "and deletes all search hits/peptides that aren't in the prot xml file!"
8
- exit
9
- end
10
-
11
- protxml = ARGV[0]
12
- pepxml = ARGV[1]
13
-
14
- hash = {}
15
- File.open(protxml) do |fh|
16
- while line = fh.gets
17
- if line =~ /peptide_sequence="(.*?)" charge="(\d)" /
18
- hash[[$1.dup,$2.dup]] = 1
19
- end
20
- end
21
- end
22
-
23
- p hash
24
-
25
- out = File.open(pepxml + ".small", "w")
26
-
27
- in_hit = false
28
- cur_charge = nil
29
- stored_lines = ""
30
- print_it = false
31
- File.open(pepxml) do |fh|
32
- while line = fh.gets
33
- if line =~ /<search_result .*? assumed_charge="(\d)".*?>/
34
- cur_charge = $1.dup
35
- in_hit = true
36
- end
37
- if line =~ /<search_hit .*? peptide="(.*?)"/
38
- if hash.key?([$1.dup,cur_charge])
39
- print_it = true
40
- else
41
- print_it = false
42
- end
43
- end
44
- if line =~ /<\/search_result>/
45
- if print_it == true
46
- stored_lines << line
47
- out.print stored_lines
48
- end
49
- stored_lines = ""
50
- in_hit == false
51
- elsif !in_hit
52
- out.print line
53
- else
54
- stored_lines << line
55
- end
56
- end
57
-
58
-
59
- end
60
-
61
- out.close
@@ -1,47 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'fasta'
4
- require 'sample_enzyme'
5
-
6
- if ARGV.size < 3
7
- puts "usage: #{File.basename(__FILE__)} min_peptide_length missed_cleavages <file>.fasta ..."
8
- puts " returns <file>.min_pep_length_<#>.missed_cleavages_<#>.degenerate_peptides.csv"
9
- abort
10
- end
11
-
12
-
13
-
14
- min_peptide_length = ARGV.shift.to_i
15
- missed_cleavages = ARGV.shift.to_i
16
-
17
- ARGV.each do |file|
18
- hash = {}
19
-
20
- if file !~ /\.fasta/
21
- abort "must be a fasta file with extension fasta"
22
- end
23
- new_filename = file.sub(/\.fasta$/, '')
24
- new_filename << ".min_pep_length_#{min_peptide_length}.missed_cleavages_#{missed_cleavages}.degenerate_peptides.csv"
25
- peptides = []
26
- Fasta.new.read_file(file).prots.each do |prot|
27
-
28
-
29
- SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).each do |aaseq|
30
- if aaseq.size >= min_peptide_length
31
- hash[aaseq] ||= []
32
- hash[aaseq].push( prot.header.sub(/^>/,'') )
33
- end
34
- end
35
- #fh.puts( prot.header.split(/\s+/).first.sub(/^>/,'') + "\t" + SampleEnzyme.tryptic(prot.aaseq, missed_cleavages).join(" ") )
36
- end
37
-
38
- File.open(new_filename, "w") do |fh|
39
- hash.keys.sort_by {|pep| hash[pep].size }.reverse.each do |pep|
40
- fh.puts( [pep, *(hash[pep])].join("\t") )
41
- end
42
- end
43
- end
44
-
45
-
46
-
47
-