mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,637 +0,0 @@
1
- require 'sort_by_attributes'
2
- require 'validator'
3
- require 'spec_id'
4
- require 'merge_deep'
5
- require 'spec_id/precision/filter/interactive'
6
- require 'spec_id/precision/filter/output'
7
-
8
-
9
- class Filter
10
-
11
- # filters using previously passed in methods and options
12
- def filter(group)
13
- if @opts
14
- send(@method, group, *@opts)
15
- else
16
- send(@method, group)
17
- end
18
- end
19
-
20
- # replaces the contents of group with what passed
21
- def filter!(group)
22
- group.replace(filter(group))
23
- end
24
- end
25
-
26
-
27
-
28
- # we have to require this after we setup our defaults hash
29
- # require 'filter/spec_id/cmdline'
30
-
31
- class SpecID::Precision::Filter
32
- FV_DEFAULTS = {
33
- :sequest =>
34
- {
35
- :xcorr1 => 1.0,
36
- :xcorr2 => 1.5,
37
- :xcorr3 => 2.0,
38
- :deltacn => 0.1,
39
- :ppm => 1000,
40
- :include_deltacnstar => true,
41
- },
42
-
43
-
44
- # output
45
- :proteins => false,
46
- :output => [],
47
-
48
- # general
49
- :top_hit_by => :xcorr,
50
- :postfilter => :top_per_scan,
51
- :prefilter => false,
52
- :hits_together => true,
53
-
54
- # These are also defaulted in the commandline because they are necessary
55
- # for the validators... could this introduce conflicts somehow?
56
- :decoy_on_match => true,
57
- :ties => true,
58
-
59
- # UNLISTED FOR NOW:
60
- :include_ties_in_top_hit_prefilter => true,
61
- :include_ties_in_top_hit_postfilter => false,
62
- }
63
-
64
- require 'spec_id/precision/filter/cmdline'
65
-
66
- def filter_and_validate_cmdline(args)
67
- (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
68
- if spec_id_obj == nil
69
- puts option_parser
70
- return
71
- end
72
- final_answer = SpecID::Precision::Filter.new.filter_and_validate(spec_id_obj, options)
73
- end
74
-
75
- # # output_array has doublets of [format, handle]
76
- # # answer is the answer one gets out of filter_and_validate
77
- # def output(answer, output_array)
78
- # output_array.each do |format, handle|
79
- # SpecID::Precision::Filter::Output.new(format, handle)
80
- # end
81
- # end
82
-
83
- # Very high level method that takes simple parameters.
84
- # spec_id may be a filename or a SpecID object (containing peps)
85
- # Default values may be queried from SpecID::Precision::Filter::FV_DEFAULTS
86
- # Returns a structured hash:
87
- # Fl = Float ; Ar = Array
88
- # { :params => <Hash of filtering params>,
89
- # :pephits => <Ar of pephits>,
90
- # :pephits_precision => [<array of precision>]
91
- # # if :proteins => true
92
- # :prothits => <Array of prothits>,
93
- # :prothits_precision => [ Array of hashes where each hash =
94
- # { :worst => Fl, :normal => Fl,
95
- # :normal_stdev => Fl } ]
96
- # }
97
- #
98
- # NOTE: Brackets [] indicate an Array! The Bar '|' indicates another option.
99
- # The asterik '*' is the default option.
100
- #
101
- # :sequest => {
102
- # :xcorr1 -> >= (xcorr +1 charge state)
103
- # :xcorr2 -> >= (xcorr +2 charge state)
104
- # :xcorr3 -> >= (xcorr +3 charge state)
105
- # :deltacn -> >= (delta cn)
106
- # :ppm -> <= parts per million (Float)
107
- # :include_deltacnstar => *true | false include deltacn (given at 1.1) of
108
- # top hit with no 2nd hit
109
- #
110
- # }
111
- # OUTPUT:
112
- # :proteins => true | *false gives proteins (and validation)
113
- # :output => [[format, FILENAME=nil],...] formats to output filtering results.
114
- # can be used multiple times
115
- # FILENAME is the filename to use
116
- # if nil, then outputs to $stdout
117
- # valid formats are:
118
- # :text_table (default)
119
- # :yaml (need to implement)
120
- # :protein_summary (need to implement)
121
- # :html_table (need to implement)
122
- # default value =>
123
- # [[:text_table,nil]]
124
- #
125
- # VALIDATION:
126
- # :validators => [Array] objects that respond to pephit_precision
127
- # usually of base class Validator
128
- # NOTE: if you have decoy peptides, you MUST have
129
- # a Validator::Decoy object to separate them out.
130
- # NOTE: if transmem validator passed in, the
131
- # proteins in spec_id must already be granted
132
- # transmem status!
133
- #
134
- #
135
- # OTHER:
136
- # :top_hit_by -> *:xcorr | :probability
137
- # probabilities only in bioworks.xml files right now (if
138
- # they were calculated).
139
- # :postfilter -> *:top_per_scan | :top_per_aaseq | :top_per_aaseq_charge
140
- # :top_per_scan hashes by filename + scan
141
- # :top_per_aaseq hashes by top_per_scan + aaseq
142
- # :top_per_aaseq_charge hashes by top_per_aaseq + charge
143
- # :prefilter -> true | *false Takes top hit per file+scan+charge
144
- # :interactive => interactive_object
145
- # # should behave like this:
146
- # # interactive_object.filter_args(currentopts) -> args_for_filtering | nil (done)
147
- #
148
- # # interactive_object.passing(final_answer)
149
-
150
- # The defaults for filter_and_validate
151
-
152
- def filter_and_validate(spec_id_obj, options={})
153
- # NOTE:
154
- # This is a fairly complicated method. The complication comes in doing
155
- # top hit filters on separate/cat searches wanted them to be either
156
- # together or separate. I opt for fewer conversions between the two, but
157
- # that means keeping track of more things...
158
-
159
- opts = FV_DEFAULTS.merge_deep(options)
160
-
161
- spec_id = spec_id_obj
162
-
163
- peps = spec_id.peps
164
- filename = spec_id.filename
165
-
166
- #######################################
167
- # DEFAULTS:
168
- interactive_changing_keys = [:xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar, :postfilter]
169
- interactive_shortcut_map = {
170
- :xcorr1 => 'x1',
171
- :xcorr2 => 'x2',
172
- :xcorr3 => 'x3',
173
- :deltacn => 'dcn',
174
- :ppm => 'ppm',
175
- :include_deltacnstar => 'dcns',
176
- :postfilter => 'pf',
177
- }
178
- to_float = proc {|x| x.to_f}
179
- to_bool = proc do |x|
180
- case x
181
- when /^t/io
182
- true
183
- when /^f/io
184
- false
185
- when true
186
- true
187
- when false
188
- false
189
- else
190
- nil
191
- end
192
- end
193
- to_postfilter = proc do |x|
194
- case x
195
- when 's'
196
- :top_per_scan
197
- when 'a'
198
- :top_per_aaseq
199
- when 'ac'
200
- :top_per_aaseq_charge
201
- when Symbol
202
- x
203
- end
204
- end
205
- casting_map = {
206
- :xcorr1 => to_float,
207
- :xcorr2 => to_float,
208
- :xcorr3 => to_float,
209
- :deltacn => to_float,
210
- :ppm => to_float,
211
- :include_deltacnstar => to_bool,
212
- :postfilter => to_postfilter,
213
- }
214
-
215
- # output:
216
- # NOTE: BOOLEANS that are by default false do not need a default!!
217
- # They will yield false on key lookup if no key or false!
218
- # BOOLEANS that by default are true should be queried like this
219
- # !(opts[:<option>] == false)
220
-
221
- # open up each of the files for writing
222
- if opts[:output]
223
- outputs = opts[:output].map do |format, where|
224
- if where == nil
225
- where = $stdout
226
- end
227
- SpecID::Precision::Filter::Output.new(format, where)
228
- end
229
- end
230
-
231
- postfilters_per_hash = {
232
- :top_per_scan => [:base_name, :first_scan],
233
- :top_per_aaseq => [:aaseq], # first by top_per_scan, then this guy
234
- :top_per_aaseq_charge => [:aaseq, :charge], # first by top_per_scan, then this one
235
- }
236
-
237
- top_hit_by__to_sort_by = {
238
- :xcorr => [:xcorr, {:down=> [:xcorr]}],
239
- :probability => [:probability, (spec_id.hi_prob_best ? {:down=> [:probability]} : {})],
240
- }
241
- sort_by_att_opts = top_hit_by__to_sort_by[opts[:top_hit_by]]
242
- opts_for_top_hit_prefilter = {
243
- :per => [:base_name, :first_scan, :charge],
244
- :by => sort_by_att_opts,
245
- :include_ties => opts[:include_ties_in_top_hit_prefilter]
246
- }
247
- # PRIVATE DEFAULTS:
248
- merge_prefix = 'DECOY_'
249
- unmerge_regexp = /^DECOY_/
250
-
251
- #######################################
252
-
253
-
254
- # opts_decoy = opts[:decoy]
255
-
256
-
257
-
258
- # if we have a Validator::Decoy object, we will use its defaults to split
259
- # peptides.
260
- decoy_validator =
261
- if opts[:validators]
262
- decoy_vals = opts[:validators].select {|v| v.class == Validator::Decoy }
263
- if decoy_vals.size == 0
264
- nil
265
- elsif decoy_vals.size == 1
266
- decoy_vals.first
267
- else
268
- raise ArgumentError, "can only have one Validator::Decoy object"
269
- end
270
-
271
- ### suck out the relevant parameters
272
- #sep_params = [:decoy_on_match, :correct_wins].inject({}) do |hash,k|
273
- # hash[k] = decoy_validator.send(k)
274
- # hash
275
- #end
276
- else
277
- nil
278
- end
279
-
280
- decoy_validator_to_split_with = nil
281
-
282
- pep_sets =
283
- if decoy_validator
284
- if decoy_validator.constraint.is_a?(Regexp)
285
- if opts[:hits_together]
286
- decoy_validator_to_split_with = decoy_validator
287
- [peps]
288
- else
289
- (target, decoy) = decoy_validator.partition(peps)
290
- #(target, decoy) = SpecID.classify_by_prot(peps, opts_decoy, sep_params[:decoy_on_match], sep_params[:correct_wins])
291
- [target, decoy]
292
- end
293
- elsif decoy_validator.constraint.is_a?(String) ## a Filename
294
- decoy_peps = SpecID.new(decoy_validator.constraint).peps
295
-
296
- if opts[:hits_together]
297
- # we fake that the protein sets are together
298
- decoy_validator_to_split_with = Validator::Decoy.new(:constraint => unmerge_regexp)
299
- decoy_peps.each do |pep|
300
- pep.prots.each {|prt| prt.reference = merge_prefix + prt.reference }
301
- end
302
- [peps + decoy_peps] # wrap them so we get the target out
303
- else
304
- [peps, decoy_peps]
305
- end
306
- else
307
- raise ArgumentError, "Decoy::Validator#constraint must be a Regexp or valid SpecID file"
308
- end
309
- else
310
- [peps] # no decoy
311
- end
312
-
313
- # This method doesn't seem to do so well, but a person can use a different
314
- # one and enter in their own custom pi_0 value!
315
- #if opts[:decoy_pi_zero]
316
- # if pep_sets.size < 2
317
- # raise ArgumentError, "must have a decoy validator for pi zero calculation!"
318
- # end
319
- # require 'pi_zero'
320
- # (_target, _decoy) = pep_sets
321
- # pvals = PiZero.p_values_for_sequest(*pep_sets).sort
322
- # pi_zero = PiZero.pi_zero(pvals)
323
- # opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
324
- #end
325
-
326
- if opts[:proteins]
327
- protein_validator = Validator::ProtFromPep.new
328
- end
329
-
330
- ### TOP HITS PREFILTER < < TOP_HITS_TOGETHER > >
331
- ###########################
332
- # TOP HITS FILTER:
333
- ###########################
334
- # REALLY, this guy only exists for speed and memory consumption
335
- # If we prefilter, we don't have to filter as many hits in every
336
- # interactive round. I'd leave this guy out if I were doing only a
337
- # sequest filter. (I should compare results with this filter and w/o)
338
- # This guy is very tricky since we need to consider whether they are to be
339
- # run together or separately and not do more work than we need
340
- # get passed_target for any case (and passed_decoy if opts[:decoy])
341
-
342
-
343
- top_hit_prefilter = SpecID::Precision::Filter::Peps.new(:top_hit, opts_for_top_hit_prefilter) if opts[:prefilter]
344
-
345
- if top_hit_prefilter
346
- pep_sets.map! do |pep_set|
347
- top_hit_prefilter.filter(pep_set)
348
- end
349
- end
350
-
351
- # prepare our top hit filter:
352
- # since we are now modulating this guy, we need to create it fresh every
353
- # time
354
- top_per_scan_postfilter = SpecID::Precision::Filter::Peps.new(:top_hit,
355
- :per => postfilters_per_hash[:top_per_scan],
356
- :by => sort_by_att_opts,
357
- :include_ties => opts[:include_ties_in_top_hit_postfilter])
358
-
359
-
360
-
361
- # Prepare to loop
362
- # Give interactive help once here if necessary
363
- interactive = opts[:interactive]
364
- if interactive
365
- ARGV.clear
366
- interactive.out(interactive.interactive_help(interactive_changing_keys, interactive_shortcut_map)) if interactive.verbose
367
- end
368
-
369
- # the loop is for if we are interactive
370
- final_answer = nil
371
- loop do
372
-
373
- if interactive #interactive
374
- # a bit of a hack, but we shove on the postfilter param to modulate
375
- opts[:sequest][:postfilter] = opts[:postfilter]
376
- response = interactive.filter_args(opts[:sequest], interactive_changing_keys, interactive_shortcut_map, casting_map)
377
- opts[:postfilter] = opts[:sequest].delete(:postfilter)
378
- break if response == nil
379
- end
380
-
381
- # prepare our top hit filter:
382
- # since we are now modulating this guy, we need to create it fresh every
383
- # time
384
-
385
- sub_postfilter =
386
- if opts[:postfilter] == :top_per_scan
387
- nil
388
- else
389
- postfilter_per_args = postfilters_per_hash[opts[:postfilter]]
390
- SpecID::Precision::Filter::Peps.new(:top_hit,
391
- :per => postfilter_per_args,
392
- :by => sort_by_att_opts,
393
- :include_ties => opts[:include_ties_in_top_hit_postfilter]
394
- )
395
- end
396
-
397
- pep_sets_to_be_filtered = pep_sets.map
398
-
399
- ### SEQUEST < EITHER >
400
- ###########################
401
- # SEQUEST FILTER:
402
- ###########################
403
- # This guy is immune to the trickiness of top hits, so we just filter
404
- # separately since validation is best done without decoys (except decoy)
405
- sequest_args = opts[:sequest].values_at( :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar )
406
- sequest_filter = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *sequest_args)
407
-
408
- pep_sets_filtered = pep_sets_to_be_filtered.map do |pep_set|
409
- sequest_filter.filter(pep_set)
410
- end
411
-
412
- ### FINAL HIT PER SCAN < < TOP_HITS_TOGETHER > >
413
- ##########################
414
- # FINAL HIT PER SCAN
415
- ##########################
416
- # Why not just do the top hit filter in the top hits pre filter before?
417
- # Good question. Answer: We may have instances when the top hit (by
418
- # xcorr) has some other poorer attribute than the hit at the other charge.
419
- # In this case, we'd end up with no passing peptide.
420
- # Also, the xcorr filter is per charge, so we may filter out the higher
421
- # scoring peptide hit even though the other would pass based on its charge
422
- # state, etc., etc....
423
- # ###################################################
424
- # NOTE THIS WELL:
425
- # IF IT IS SUPPOSE TO be separate it's *ALREADY* separate, if together its
426
- # *ALREADY* together!!!!
427
- # the implication is that we don't need to do any merging or
428
- # separating before we do this last filter!!!!
429
- # ###################################################
430
-
431
- # TODO: We need to add this guy in!
432
- #if opts[:uniq_aa]
433
- # pep_sets_filtered.map do |pep_set|
434
- # end
435
- #end
436
-
437
- pep_sets_filtered.map! do |pep_set|
438
- top_per_scan_postfilter.filter!(pep_set)
439
- if sub_postfilter
440
- sub_postfilter.filter!(pep_set)
441
- else
442
- pep_set
443
- end
444
- end
445
-
446
- normal_post_filtered_peps = pep_sets_filtered.first
447
-
448
- # separate the decoy's out if they are together
449
- if decoy_validator_to_split_with # only set if opts[:hits_together]!!
450
- (target, decoy) = decoy_validator_to_split_with.partition(normal_post_filtered_peps)
451
- pep_sets_filtered = [target, decoy]
452
- end
453
-
454
- ### VALIDATION < SEPARATE >
455
- pephit_precision_array = get_pephit_precision(opts[:validators], *pep_sets_filtered) if opts[:validators]
456
-
457
- final_answer = {
458
- :params => opts,
459
- :pephits => pep_sets_filtered.first,
460
- }
461
- if pephit_precision_array
462
- final_answer[:pephits_precision] = pephit_precision_array
463
- end
464
-
465
- if opts[:proteins]
466
- protein_precision_array = peptide_precision_to_protein_precision(protein_validator, normal_post_filtered_peps, pephit_precision_array)
467
- # this could be factored out (since we do it in protein_precision)
468
-
469
- # merge the final prots into a unique set:
470
- final_answer[:prothits] = normal_post_filtered_peps.inject(Set.new) do |protset, pep|
471
- protset.merge(pep.prots)
472
- end
473
- final_answer[:prothits_precision] = protein_precision_array
474
- end
475
-
476
- ## output the output
477
- outputs.each {|output| output.print(final_answer) }
478
-
479
- if interactive
480
- interactive.passing(opts, final_answer)
481
- end
482
-
483
- if !interactive
484
- break
485
- end
486
- end
487
- # Close the filehandles
488
- outputs.each { |output| output.close } if opts[:output]
489
- final_answer
490
- end
491
-
492
- # takes peps and a peptide_precision_hash. Returns a hash with the same
493
- # keys of peptide_precision_hash where the value is a hash with these keys:
494
- # :worst => worstcase protein precision
495
- # :normal => estimaton by binomial/gaussian method (optimistic)
496
- # :normal_stdev => the stdev of the normal method
497
- def peptide_precision_to_protein_precision(protein_validator, peps, peptide_precision_array, round_num_false=:ceil)
498
- peptide_precision_array.map do |precision|
499
- num_false = ((1.0 - precision) * peps.size).ceil
500
- reply = protein_validator.prothit_precision(peps, num_false)
501
- hash = {}
502
- %w(worst normal normal_stdev).zip(reply) do |label, answer|
503
- hash[label.to_sym] = answer
504
- end
505
- hash
506
- end
507
- end
508
-
509
- # takes an array of validator objects and peps (already separated out from
510
- # decoys; the decoy's can be passed in
511
- # returns an array of results
512
- def get_pephit_precision(validators, peps, decoy_peps=nil, grant_transmem_status=false)
513
- validators.map do |validator|
514
- if validator.class == Validator::Decoy
515
- validator.pephit_precision(peps, decoy_peps)
516
- else
517
- validator.pephit_precision(peps)
518
- end
519
- end
520
- end
521
- end
522
-
523
- class SpecID::Precision::Filter::Peps < Filter
524
-
525
- # can pass in the method to call. If you have static options and you will
526
- # reuse your filter, you can pass them in here.
527
- # BEWARE: this will override any passed into the method at filter time.
528
- # If you need to do that, make a new, blank filter and pass in your args
529
- # at filter time
530
- def initialize(meth=nil, *opts)
531
- @method = meth
532
- if opts.size > 0
533
- @opts = opts
534
- else
535
- @opts = nil
536
- end
537
- end
538
-
539
- # passes the top peptide hits per attributes that it is hashed by
540
- # all hits with same score as top score are returned
541
- # assumes that all attributes are cast properly: Float,Integer, etc
542
- # converts xcorr, deltacn, deltamass, mass, and charge into numerical types
543
- # deletes the protein array (but not relevant proteins)
544
- # hashes on [pep.basename, pep.first_scan.to_i, pep.charge.to_i]
545
- # returns self for chaining
546
- # opts
547
- # :per => Array of attributes e.g. [:first_scan, :charge] # TODO: allow lambda
548
- # :by => an array for sort_by_attributes
549
- # e.g. [:xcorr, :deltacn, :ppm, {:down => [:xcorr, :deltacn]}]
550
- # :ties => *false | true | :as_array
551
- # false - one top hit is selected by random (by sorting)
552
- # true - all ties are included in final answer
553
- # :as_array - ties are included as an array
554
- def top_hit(peps, opts = {})
555
-
556
- # get the top peptide by firstscan/charge (equivalent to .out files)
557
- top_peps = []
558
- #hash = peps.hash_by(*(opts[:per]))
559
- per_array = opts[:per]
560
- hash = peps.hash_by(*per_array)
561
- ties = opts[:ties]
562
- if ties == :as_array
563
- as_array = true
564
- end
565
- hash.values.each do |v|
566
- best_to_worst = v.sort_by_attributes(*(opts[:by]))
567
- if ties
568
-
569
- best_hit = best_to_worst.first
570
- ## get the values that matter for the top hit
571
- # here get the attributes we are considering
572
- atts =
573
- if opts[:by].last.is_a? Hash
574
- opts[:by][0...-1]
575
- else
576
- opts[:by].dup
577
- end
578
- # find the best hits values
579
- top_hit_vals = atts.map do |att|
580
- best_hit.send(att)
581
- end
582
-
583
- tying_peps = []
584
- best_to_worst.each do |pep|
585
- tie = true
586
- atts.each_with_index do |att,i|
587
- unless (pep.send(att) == top_hit_vals[i])
588
- tie = false
589
- break
590
- end
591
- end
592
- if tie
593
- tying_peps << pep
594
- else
595
- break
596
- end
597
- end
598
- if as_array
599
- if tying_peps.size == 1
600
- top_peps.push( *tying_peps )
601
- else
602
- top_peps.push( tying_peps )
603
- end
604
- else
605
- top_peps.push( *tying_peps )
606
- end
607
- else
608
- top_peps << best_to_worst.first
609
- end
610
- end
611
- top_peps
612
- end
613
-
614
- # returns self for chaining
615
- # ( >= +3 charge for the x3)
616
- def standard_sequest_filter(peps, x1,x2,x3,deltacn,ppm,include_deltacnstar=true)
617
- peps.select do |pep|
618
- pep_deltacn = pep.deltacn
619
- pep_charge = pep.charge
620
-
621
- ## The outer parentheses are critical to getting the correct answer!
622
- _passing = ( (pep_deltacn >= deltacn) and ((pep_charge == 1 && pep.xcorr >= x1) or (pep_charge == 2 && pep.xcorr >= x2) or (pep_charge >= 3 && pep.xcorr >= x3)) and ( pep.ppm <= ppm ))
623
-
624
- if _passing
625
- if ((!include_deltacnstar) && (pep_deltacn > 1.0))
626
- false
627
- else
628
- true
629
- end
630
- else
631
- false
632
- end
633
- end
634
- end
635
-
636
- end
637
-
@@ -1,60 +0,0 @@
1
-
2
-
3
- module SpecID ; end
4
- module SpecID::Precision ; end
5
-
6
- module SpecID::Precision::Output
7
-
8
- # takes a format type (as symbol) and the handle to write to
9
- # if handle_or_file is a file, will open it and close (on calling close)
10
- # if it is a handle, will not close it
11
- def initialize(format, handle_or_file)
12
- @handle =
13
- if handle_or_file.is_a? String
14
- @need_to_close = true
15
- File.open(handle_or_file, 'w')
16
- else
17
- @need_to_close = false
18
- handle_or_file
19
- end
20
- @format = format
21
- end
22
-
23
- # returns self
24
- def print(answer)
25
- send( @format, @handle, answer )
26
- self
27
- end
28
-
29
- # turns all keys that are symbols into strings (recursively into *Hashes*)
30
- def self.symbol_keys_to_string(hash)
31
- new_hash = {}
32
- hash.each do |k,v|
33
- new_value =
34
- if v.is_a? Hash
35
- symbol_keys_to_string(v)
36
- else
37
- v
38
- end
39
- if k.is_a? Symbol
40
- new_hash[k.to_s] = new_value
41
- else
42
- new_hash[k] = new_value
43
- end
44
- end
45
- new_hash
46
- end
47
-
48
- # TODO: implement recursively, this has just grown and grown terribly
49
- def hash_as_string(hash)
50
- hash.inspect
51
- end
52
-
53
- # will close the handle if it is a File object
54
- def close
55
- if @need_to_close
56
- @handle.close
57
- end
58
- end
59
-
60
- end