mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,637 +0,0 @@
1
- require 'sort_by_attributes'
2
- require 'validator'
3
- require 'spec_id'
4
- require 'merge_deep'
5
- require 'spec_id/precision/filter/interactive'
6
- require 'spec_id/precision/filter/output'
7
-
8
-
9
- class Filter
10
-
11
- # filters using previously passed in methods and options
12
- def filter(group)
13
- if @opts
14
- send(@method, group, *@opts)
15
- else
16
- send(@method, group)
17
- end
18
- end
19
-
20
- # replaces the contents of group with what passed
21
- def filter!(group)
22
- group.replace(filter(group))
23
- end
24
- end
25
-
26
-
27
-
28
- # we have to require this after we setup our defaults hash
29
- # require 'filter/spec_id/cmdline'
30
-
31
- class SpecID::Precision::Filter
32
- FV_DEFAULTS = {
33
- :sequest =>
34
- {
35
- :xcorr1 => 1.0,
36
- :xcorr2 => 1.5,
37
- :xcorr3 => 2.0,
38
- :deltacn => 0.1,
39
- :ppm => 1000,
40
- :include_deltacnstar => true,
41
- },
42
-
43
-
44
- # output
45
- :proteins => false,
46
- :output => [],
47
-
48
- # general
49
- :top_hit_by => :xcorr,
50
- :postfilter => :top_per_scan,
51
- :prefilter => false,
52
- :hits_together => true,
53
-
54
- # These are also defaulted in the commandline because they are necessary
55
- # for the validators... could this introduce conflicts somehow?
56
- :decoy_on_match => true,
57
- :ties => true,
58
-
59
- # UNLISTED FOR NOW:
60
- :include_ties_in_top_hit_prefilter => true,
61
- :include_ties_in_top_hit_postfilter => false,
62
- }
63
-
64
- require 'spec_id/precision/filter/cmdline'
65
-
66
- def filter_and_validate_cmdline(args)
67
- (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
68
- if spec_id_obj == nil
69
- puts option_parser
70
- return
71
- end
72
- final_answer = SpecID::Precision::Filter.new.filter_and_validate(spec_id_obj, options)
73
- end
74
-
75
- # # output_array has doublets of [format, handle]
76
- # # answer is the answer one gets out of filter_and_validate
77
- # def output(answer, output_array)
78
- # output_array.each do |format, handle|
79
- # SpecID::Precision::Filter::Output.new(format, handle)
80
- # end
81
- # end
82
-
83
- # Very high level method that takes simple parameters.
84
- # spec_id may be a filename or a SpecID object (containing peps)
85
- # Default values may be queried from SpecID::Precision::Filter::FV_DEFAULTS
86
- # Returns a structured hash:
87
- # Fl = Float ; Ar = Array
88
- # { :params => <Hash of filtering params>,
89
- # :pephits => <Ar of pephits>,
90
- # :pephits_precision => [<array of precision>]
91
- # # if :proteins => true
92
- # :prothits => <Array of prothits>,
93
- # :prothits_precision => [ Array of hashes where each hash =
94
- # { :worst => Fl, :normal => Fl,
95
- # :normal_stdev => Fl } ]
96
- # }
97
- #
98
- # NOTE: Brackets [] indicate an Array! The Bar '|' indicates another option.
99
- # The asterik '*' is the default option.
100
- #
101
- # :sequest => {
102
- # :xcorr1 -> >= (xcorr +1 charge state)
103
- # :xcorr2 -> >= (xcorr +2 charge state)
104
- # :xcorr3 -> >= (xcorr +3 charge state)
105
- # :deltacn -> >= (delta cn)
106
- # :ppm -> <= parts per million (Float)
107
- # :include_deltacnstar => *true | false include deltacn (given at 1.1) of
108
- # top hit with no 2nd hit
109
- #
110
- # }
111
- # OUTPUT:
112
- # :proteins => true | *false gives proteins (and validation)
113
- # :output => [[format, FILENAME=nil],...] formats to output filtering results.
114
- # can be used multiple times
115
- # FILENAME is the filename to use
116
- # if nil, then outputs to $stdout
117
- # valid formats are:
118
- # :text_table (default)
119
- # :yaml (need to implement)
120
- # :protein_summary (need to implement)
121
- # :html_table (need to implement)
122
- # default value =>
123
- # [[:text_table,nil]]
124
- #
125
- # VALIDATION:
126
- # :validators => [Array] objects that respond to pephit_precision
127
- # usually of base class Validator
128
- # NOTE: if you have decoy peptides, you MUST have
129
- # a Validator::Decoy object to separate them out.
130
- # NOTE: if transmem validator passed in, the
131
- # proteins in spec_id must already be granted
132
- # transmem status!
133
- #
134
- #
135
- # OTHER:
136
- # :top_hit_by -> *:xcorr | :probability
137
- # probabilities only in bioworks.xml files right now (if
138
- # they were calculated).
139
- # :postfilter -> *:top_per_scan | :top_per_aaseq | :top_per_aaseq_charge
140
- # :top_per_scan hashes by filename + scan
141
- # :top_per_aaseq hashes by top_per_scan + aaseq
142
- # :top_per_aaseq_charge hashes by top_per_aaseq + charge
143
- # :prefilter -> true | *false Takes top hit per file+scan+charge
144
- # :interactive => interactive_object
145
- # # should behave like this:
146
- # # interactive_object.filter_args(currentopts) -> args_for_filtering | nil (done)
147
- #
148
- # # interactive_object.passing(final_answer)
149
-
150
- # The defaults for filter_and_validate
151
-
152
- def filter_and_validate(spec_id_obj, options={})
153
- # NOTE:
154
- # This is a fairly complicated method. The complication comes in doing
155
- # top hit filters on separate/cat searches wanted them to be either
156
- # together or separate. I opt for fewer conversions between the two, but
157
- # that means keeping track of more things...
158
-
159
- opts = FV_DEFAULTS.merge_deep(options)
160
-
161
- spec_id = spec_id_obj
162
-
163
- peps = spec_id.peps
164
- filename = spec_id.filename
165
-
166
- #######################################
167
- # DEFAULTS:
168
- interactive_changing_keys = [:xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar, :postfilter]
169
- interactive_shortcut_map = {
170
- :xcorr1 => 'x1',
171
- :xcorr2 => 'x2',
172
- :xcorr3 => 'x3',
173
- :deltacn => 'dcn',
174
- :ppm => 'ppm',
175
- :include_deltacnstar => 'dcns',
176
- :postfilter => 'pf',
177
- }
178
- to_float = proc {|x| x.to_f}
179
- to_bool = proc do |x|
180
- case x
181
- when /^t/io
182
- true
183
- when /^f/io
184
- false
185
- when true
186
- true
187
- when false
188
- false
189
- else
190
- nil
191
- end
192
- end
193
- to_postfilter = proc do |x|
194
- case x
195
- when 's'
196
- :top_per_scan
197
- when 'a'
198
- :top_per_aaseq
199
- when 'ac'
200
- :top_per_aaseq_charge
201
- when Symbol
202
- x
203
- end
204
- end
205
- casting_map = {
206
- :xcorr1 => to_float,
207
- :xcorr2 => to_float,
208
- :xcorr3 => to_float,
209
- :deltacn => to_float,
210
- :ppm => to_float,
211
- :include_deltacnstar => to_bool,
212
- :postfilter => to_postfilter,
213
- }
214
-
215
- # output:
216
- # NOTE: BOOLEANS that are by default false do not need a default!!
217
- # They will yield false on key lookup if no key or false!
218
- # BOOLEANS that by default are true should be queried like this
219
- # !(opts[:<option>] == false)
220
-
221
- # open up each of the files for writing
222
- if opts[:output]
223
- outputs = opts[:output].map do |format, where|
224
- if where == nil
225
- where = $stdout
226
- end
227
- SpecID::Precision::Filter::Output.new(format, where)
228
- end
229
- end
230
-
231
- postfilters_per_hash = {
232
- :top_per_scan => [:base_name, :first_scan],
233
- :top_per_aaseq => [:aaseq], # first by top_per_scan, then this guy
234
- :top_per_aaseq_charge => [:aaseq, :charge], # first by top_per_scan, then this one
235
- }
236
-
237
- top_hit_by__to_sort_by = {
238
- :xcorr => [:xcorr, {:down=> [:xcorr]}],
239
- :probability => [:probability, (spec_id.hi_prob_best ? {:down=> [:probability]} : {})],
240
- }
241
- sort_by_att_opts = top_hit_by__to_sort_by[opts[:top_hit_by]]
242
- opts_for_top_hit_prefilter = {
243
- :per => [:base_name, :first_scan, :charge],
244
- :by => sort_by_att_opts,
245
- :include_ties => opts[:include_ties_in_top_hit_prefilter]
246
- }
247
- # PRIVATE DEFAULTS:
248
- merge_prefix = 'DECOY_'
249
- unmerge_regexp = /^DECOY_/
250
-
251
- #######################################
252
-
253
-
254
- # opts_decoy = opts[:decoy]
255
-
256
-
257
-
258
- # if we have a Validator::Decoy object, we will use its defaults to split
259
- # peptides.
260
- decoy_validator =
261
- if opts[:validators]
262
- decoy_vals = opts[:validators].select {|v| v.class == Validator::Decoy }
263
- if decoy_vals.size == 0
264
- nil
265
- elsif decoy_vals.size == 1
266
- decoy_vals.first
267
- else
268
- raise ArgumentError, "can only have one Validator::Decoy object"
269
- end
270
-
271
- ### suck out the relevant parameters
272
- #sep_params = [:decoy_on_match, :correct_wins].inject({}) do |hash,k|
273
- # hash[k] = decoy_validator.send(k)
274
- # hash
275
- #end
276
- else
277
- nil
278
- end
279
-
280
- decoy_validator_to_split_with = nil
281
-
282
- pep_sets =
283
- if decoy_validator
284
- if decoy_validator.constraint.is_a?(Regexp)
285
- if opts[:hits_together]
286
- decoy_validator_to_split_with = decoy_validator
287
- [peps]
288
- else
289
- (target, decoy) = decoy_validator.partition(peps)
290
- #(target, decoy) = SpecID.classify_by_prot(peps, opts_decoy, sep_params[:decoy_on_match], sep_params[:correct_wins])
291
- [target, decoy]
292
- end
293
- elsif decoy_validator.constraint.is_a?(String) ## a Filename
294
- decoy_peps = SpecID.new(decoy_validator.constraint).peps
295
-
296
- if opts[:hits_together]
297
- # we fake that the protein sets are together
298
- decoy_validator_to_split_with = Validator::Decoy.new(:constraint => unmerge_regexp)
299
- decoy_peps.each do |pep|
300
- pep.prots.each {|prt| prt.reference = merge_prefix + prt.reference }
301
- end
302
- [peps + decoy_peps] # wrap them so we get the target out
303
- else
304
- [peps, decoy_peps]
305
- end
306
- else
307
- raise ArgumentError, "Decoy::Validator#constraint must be a Regexp or valid SpecID file"
308
- end
309
- else
310
- [peps] # no decoy
311
- end
312
-
313
- # This method doesn't seem to do so well, but a person can use a different
314
- # one and enter in their own custom pi_0 value!
315
- #if opts[:decoy_pi_zero]
316
- # if pep_sets.size < 2
317
- # raise ArgumentError, "must have a decoy validator for pi zero calculation!"
318
- # end
319
- # require 'pi_zero'
320
- # (_target, _decoy) = pep_sets
321
- # pvals = PiZero.p_values_for_sequest(*pep_sets).sort
322
- # pi_zero = PiZero.pi_zero(pvals)
323
- # opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
324
- #end
325
-
326
- if opts[:proteins]
327
- protein_validator = Validator::ProtFromPep.new
328
- end
329
-
330
- ### TOP HITS PREFILTER < < TOP_HITS_TOGETHER > >
331
- ###########################
332
- # TOP HITS FILTER:
333
- ###########################
334
- # REALLY, this guy only exists for speed and memory consumption
335
- # If we prefilter, we don't have to filter as many hits in every
336
- # interactive round. I'd leave this guy out if I were doing only a
337
- # sequest filter. (I should compare results with this filter and w/o)
338
- # This guy is very tricky since we need to consider whether they are to be
339
- # run together or separately and not do more work than we need
340
- # get passed_target for any case (and passed_decoy if opts[:decoy])
341
-
342
-
343
- top_hit_prefilter = SpecID::Precision::Filter::Peps.new(:top_hit, opts_for_top_hit_prefilter) if opts[:prefilter]
344
-
345
- if top_hit_prefilter
346
- pep_sets.map! do |pep_set|
347
- top_hit_prefilter.filter(pep_set)
348
- end
349
- end
350
-
351
- # prepare our top hit filter:
352
- # since we are now modulating this guy, we need to create it fresh every
353
- # time
354
- top_per_scan_postfilter = SpecID::Precision::Filter::Peps.new(:top_hit,
355
- :per => postfilters_per_hash[:top_per_scan],
356
- :by => sort_by_att_opts,
357
- :include_ties => opts[:include_ties_in_top_hit_postfilter])
358
-
359
-
360
-
361
- # Prepare to loop
362
- # Give interactive help once here if necessary
363
- interactive = opts[:interactive]
364
- if interactive
365
- ARGV.clear
366
- interactive.out(interactive.interactive_help(interactive_changing_keys, interactive_shortcut_map)) if interactive.verbose
367
- end
368
-
369
- # the loop is for if we are interactive
370
- final_answer = nil
371
- loop do
372
-
373
- if interactive #interactive
374
- # a bit of a hack, but we shove on the postfilter param to modulate
375
- opts[:sequest][:postfilter] = opts[:postfilter]
376
- response = interactive.filter_args(opts[:sequest], interactive_changing_keys, interactive_shortcut_map, casting_map)
377
- opts[:postfilter] = opts[:sequest].delete(:postfilter)
378
- break if response == nil
379
- end
380
-
381
- # prepare our top hit filter:
382
- # since we are now modulating this guy, we need to create it fresh every
383
- # time
384
-
385
- sub_postfilter =
386
- if opts[:postfilter] == :top_per_scan
387
- nil
388
- else
389
- postfilter_per_args = postfilters_per_hash[opts[:postfilter]]
390
- SpecID::Precision::Filter::Peps.new(:top_hit,
391
- :per => postfilter_per_args,
392
- :by => sort_by_att_opts,
393
- :include_ties => opts[:include_ties_in_top_hit_postfilter]
394
- )
395
- end
396
-
397
- pep_sets_to_be_filtered = pep_sets.map
398
-
399
- ### SEQUEST < EITHER >
400
- ###########################
401
- # SEQUEST FILTER:
402
- ###########################
403
- # This guy is immune to the trickiness of top hits, so we just filter
404
- # separately since validation is best done without decoys (except decoy)
405
- sequest_args = opts[:sequest].values_at( :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar )
406
- sequest_filter = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *sequest_args)
407
-
408
- pep_sets_filtered = pep_sets_to_be_filtered.map do |pep_set|
409
- sequest_filter.filter(pep_set)
410
- end
411
-
412
- ### FINAL HIT PER SCAN < < TOP_HITS_TOGETHER > >
413
- ##########################
414
- # FINAL HIT PER SCAN
415
- ##########################
416
- # Why not just do the top hit filter in the top hits pre filter before?
417
- # Good question. Answer: We may have instances when the top hit (by
418
- # xcorr) has some other poorer attribute than the hit at the other charge.
419
- # In this case, we'd end up with no passing peptide.
420
- # Also, the xcorr filter is per charge, so we may filter out the higher
421
- # scoring peptide hit even though the other would pass based on its charge
422
- # state, etc., etc....
423
- # ###################################################
424
- # NOTE THIS WELL:
425
- # IF IT IS SUPPOSE TO be separate it's *ALREADY* separate, if together its
426
- # *ALREADY* together!!!!
427
- # the implication is that we don't need to do any merging or
428
- # separating before we do this last filter!!!!
429
- # ###################################################
430
-
431
- # TODO: We need to add this guy in!
432
- #if opts[:uniq_aa]
433
- # pep_sets_filtered.map do |pep_set|
434
- # end
435
- #end
436
-
437
- pep_sets_filtered.map! do |pep_set|
438
- top_per_scan_postfilter.filter!(pep_set)
439
- if sub_postfilter
440
- sub_postfilter.filter!(pep_set)
441
- else
442
- pep_set
443
- end
444
- end
445
-
446
- normal_post_filtered_peps = pep_sets_filtered.first
447
-
448
- # separate the decoy's out if they are together
449
- if decoy_validator_to_split_with # only set if opts[:hits_together]!!
450
- (target, decoy) = decoy_validator_to_split_with.partition(normal_post_filtered_peps)
451
- pep_sets_filtered = [target, decoy]
452
- end
453
-
454
- ### VALIDATION < SEPARATE >
455
- pephit_precision_array = get_pephit_precision(opts[:validators], *pep_sets_filtered) if opts[:validators]
456
-
457
- final_answer = {
458
- :params => opts,
459
- :pephits => pep_sets_filtered.first,
460
- }
461
- if pephit_precision_array
462
- final_answer[:pephits_precision] = pephit_precision_array
463
- end
464
-
465
- if opts[:proteins]
466
- protein_precision_array = peptide_precision_to_protein_precision(protein_validator, normal_post_filtered_peps, pephit_precision_array)
467
- # this could be factored out (since we do it in protein_precision)
468
-
469
- # merge the final prots into a unique set:
470
- final_answer[:prothits] = normal_post_filtered_peps.inject(Set.new) do |protset, pep|
471
- protset.merge(pep.prots)
472
- end
473
- final_answer[:prothits_precision] = protein_precision_array
474
- end
475
-
476
- ## output the output
477
- outputs.each {|output| output.print(final_answer) }
478
-
479
- if interactive
480
- interactive.passing(opts, final_answer)
481
- end
482
-
483
- if !interactive
484
- break
485
- end
486
- end
487
- # Close the filehandles
488
- outputs.each { |output| output.close } if opts[:output]
489
- final_answer
490
- end
491
-
492
- # takes peps and a peptide_precision_hash. Returns a hash with the same
493
- # keys of peptide_precision_hash where the value is a hash with these keys:
494
- # :worst => worstcase protein precision
495
- # :normal => estimaton by binomial/gaussian method (optimistic)
496
- # :normal_stdev => the stdev of the normal method
497
- def peptide_precision_to_protein_precision(protein_validator, peps, peptide_precision_array, round_num_false=:ceil)
498
- peptide_precision_array.map do |precision|
499
- num_false = ((1.0 - precision) * peps.size).ceil
500
- reply = protein_validator.prothit_precision(peps, num_false)
501
- hash = {}
502
- %w(worst normal normal_stdev).zip(reply) do |label, answer|
503
- hash[label.to_sym] = answer
504
- end
505
- hash
506
- end
507
- end
508
-
509
- # takes an array of validator objects and peps (already separated out from
510
- # decoys; the decoy's can be passed in
511
- # returns an array of results
512
- def get_pephit_precision(validators, peps, decoy_peps=nil, grant_transmem_status=false)
513
- validators.map do |validator|
514
- if validator.class == Validator::Decoy
515
- validator.pephit_precision(peps, decoy_peps)
516
- else
517
- validator.pephit_precision(peps)
518
- end
519
- end
520
- end
521
- end
522
-
523
- class SpecID::Precision::Filter::Peps < Filter
524
-
525
- # can pass in the method to call. If you have static options and you will
526
- # reuse your filter, you can pass them in here.
527
- # BEWARE: this will override any passed into the method at filter time.
528
- # If you need to do that, make a new, blank filter and pass in your args
529
- # at filter time
530
- def initialize(meth=nil, *opts)
531
- @method = meth
532
- if opts.size > 0
533
- @opts = opts
534
- else
535
- @opts = nil
536
- end
537
- end
538
-
539
- # passes the top peptide hits per attributes that it is hashed by
540
- # all hits with same score as top score are returned
541
- # assumes that all attributes are cast properly: Float,Integer, etc
542
- # converts xcorr, deltacn, deltamass, mass, and charge into numerical types
543
- # deletes the protein array (but not relevant proteins)
544
- # hashes on [pep.basename, pep.first_scan.to_i, pep.charge.to_i]
545
- # returns self for chaining
546
- # opts
547
- # :per => Array of attributes e.g. [:first_scan, :charge] # TODO: allow lambda
548
- # :by => an array for sort_by_attributes
549
- # e.g. [:xcorr, :deltacn, :ppm, {:down => [:xcorr, :deltacn]}]
550
- # :ties => *false | true | :as_array
551
- # false - one top hit is selected by random (by sorting)
552
- # true - all ties are included in final answer
553
- # :as_array - ties are included as an array
554
- def top_hit(peps, opts = {})
555
-
556
- # get the top peptide by firstscan/charge (equivalent to .out files)
557
- top_peps = []
558
- #hash = peps.hash_by(*(opts[:per]))
559
- per_array = opts[:per]
560
- hash = peps.hash_by(*per_array)
561
- ties = opts[:ties]
562
- if ties == :as_array
563
- as_array = true
564
- end
565
- hash.values.each do |v|
566
- best_to_worst = v.sort_by_attributes(*(opts[:by]))
567
- if ties
568
-
569
- best_hit = best_to_worst.first
570
- ## get the values that matter for the top hit
571
- # here get the attributes we are considering
572
- atts =
573
- if opts[:by].last.is_a? Hash
574
- opts[:by][0...-1]
575
- else
576
- opts[:by].dup
577
- end
578
- # find the best hits values
579
- top_hit_vals = atts.map do |att|
580
- best_hit.send(att)
581
- end
582
-
583
- tying_peps = []
584
- best_to_worst.each do |pep|
585
- tie = true
586
- atts.each_with_index do |att,i|
587
- unless (pep.send(att) == top_hit_vals[i])
588
- tie = false
589
- break
590
- end
591
- end
592
- if tie
593
- tying_peps << pep
594
- else
595
- break
596
- end
597
- end
598
- if as_array
599
- if tying_peps.size == 1
600
- top_peps.push( *tying_peps )
601
- else
602
- top_peps.push( tying_peps )
603
- end
604
- else
605
- top_peps.push( *tying_peps )
606
- end
607
- else
608
- top_peps << best_to_worst.first
609
- end
610
- end
611
- top_peps
612
- end
613
-
614
- # returns self for chaining
615
- # ( >= +3 charge for the x3)
616
- def standard_sequest_filter(peps, x1,x2,x3,deltacn,ppm,include_deltacnstar=true)
617
- peps.select do |pep|
618
- pep_deltacn = pep.deltacn
619
- pep_charge = pep.charge
620
-
621
- ## The outer parentheses are critical to getting the correct answer!
622
- _passing = ( (pep_deltacn >= deltacn) and ((pep_charge == 1 && pep.xcorr >= x1) or (pep_charge == 2 && pep.xcorr >= x2) or (pep_charge >= 3 && pep.xcorr >= x3)) and ( pep.ppm <= ppm ))
623
-
624
- if _passing
625
- if ((!include_deltacnstar) && (pep_deltacn > 1.0))
626
- false
627
- else
628
- true
629
- end
630
- else
631
- false
632
- end
633
- end
634
- end
635
-
636
- end
637
-
@@ -1,60 +0,0 @@
1
-
2
-
3
- module SpecID ; end
4
- module SpecID::Precision ; end
5
-
6
- module SpecID::Precision::Output
7
-
8
- # takes a format type (as symbol) and the handle to write to
9
- # if handle_or_file is a file, will open it and close (on calling close)
10
- # if it is a handle, will not close it
11
- def initialize(format, handle_or_file)
12
- @handle =
13
- if handle_or_file.is_a? String
14
- @need_to_close = true
15
- File.open(handle_or_file, 'w')
16
- else
17
- @need_to_close = false
18
- handle_or_file
19
- end
20
- @format = format
21
- end
22
-
23
- # returns self
24
- def print(answer)
25
- send( @format, @handle, answer )
26
- self
27
- end
28
-
29
- # turns all keys that are symbols into strings (recursively into *Hashes*)
30
- def self.symbol_keys_to_string(hash)
31
- new_hash = {}
32
- hash.each do |k,v|
33
- new_value =
34
- if v.is_a? Hash
35
- symbol_keys_to_string(v)
36
- else
37
- v
38
- end
39
- if k.is_a? Symbol
40
- new_hash[k.to_s] = new_value
41
- else
42
- new_hash[k] = new_value
43
- end
44
- end
45
- new_hash
46
- end
47
-
48
- # TODO: implement recursively, this has just grown and grown terribly
49
- def hash_as_string(hash)
50
- hash.inspect
51
- end
52
-
53
- # will close the handle if it is a File object
54
- def close
55
- if @need_to_close
56
- @handle.close
57
- end
58
- end
59
-
60
- end