mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
@@ -1,637 +0,0 @@
|
|
1
|
-
require 'sort_by_attributes'
|
2
|
-
require 'validator'
|
3
|
-
require 'spec_id'
|
4
|
-
require 'merge_deep'
|
5
|
-
require 'spec_id/precision/filter/interactive'
|
6
|
-
require 'spec_id/precision/filter/output'
|
7
|
-
|
8
|
-
|
9
|
-
class Filter
|
10
|
-
|
11
|
-
# filters using previously passed in methods and options
|
12
|
-
def filter(group)
|
13
|
-
if @opts
|
14
|
-
send(@method, group, *@opts)
|
15
|
-
else
|
16
|
-
send(@method, group)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
# replaces the contents of group with what passed
|
21
|
-
def filter!(group)
|
22
|
-
group.replace(filter(group))
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
# we have to require this after we setup our defaults hash
|
29
|
-
# require 'filter/spec_id/cmdline'
|
30
|
-
|
31
|
-
class SpecID::Precision::Filter
|
32
|
-
FV_DEFAULTS = {
|
33
|
-
:sequest =>
|
34
|
-
{
|
35
|
-
:xcorr1 => 1.0,
|
36
|
-
:xcorr2 => 1.5,
|
37
|
-
:xcorr3 => 2.0,
|
38
|
-
:deltacn => 0.1,
|
39
|
-
:ppm => 1000,
|
40
|
-
:include_deltacnstar => true,
|
41
|
-
},
|
42
|
-
|
43
|
-
|
44
|
-
# output
|
45
|
-
:proteins => false,
|
46
|
-
:output => [],
|
47
|
-
|
48
|
-
# general
|
49
|
-
:top_hit_by => :xcorr,
|
50
|
-
:postfilter => :top_per_scan,
|
51
|
-
:prefilter => false,
|
52
|
-
:hits_together => true,
|
53
|
-
|
54
|
-
# These are also defaulted in the commandline because they are necessary
|
55
|
-
# for the validators... could this introduce conflicts somehow?
|
56
|
-
:decoy_on_match => true,
|
57
|
-
:ties => true,
|
58
|
-
|
59
|
-
# UNLISTED FOR NOW:
|
60
|
-
:include_ties_in_top_hit_prefilter => true,
|
61
|
-
:include_ties_in_top_hit_postfilter => false,
|
62
|
-
}
|
63
|
-
|
64
|
-
require 'spec_id/precision/filter/cmdline'
|
65
|
-
|
66
|
-
def filter_and_validate_cmdline(args)
|
67
|
-
(spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
|
68
|
-
if spec_id_obj == nil
|
69
|
-
puts option_parser
|
70
|
-
return
|
71
|
-
end
|
72
|
-
final_answer = SpecID::Precision::Filter.new.filter_and_validate(spec_id_obj, options)
|
73
|
-
end
|
74
|
-
|
75
|
-
# # output_array has doublets of [format, handle]
|
76
|
-
# # answer is the answer one gets out of filter_and_validate
|
77
|
-
# def output(answer, output_array)
|
78
|
-
# output_array.each do |format, handle|
|
79
|
-
# SpecID::Precision::Filter::Output.new(format, handle)
|
80
|
-
# end
|
81
|
-
# end
|
82
|
-
|
83
|
-
# Very high level method that takes simple parameters.
|
84
|
-
# spec_id may be a filename or a SpecID object (containing peps)
|
85
|
-
# Default values may be queried from SpecID::Precision::Filter::FV_DEFAULTS
|
86
|
-
# Returns a structured hash:
|
87
|
-
# Fl = Float ; Ar = Array
|
88
|
-
# { :params => <Hash of filtering params>,
|
89
|
-
# :pephits => <Ar of pephits>,
|
90
|
-
# :pephits_precision => [<array of precision>]
|
91
|
-
# # if :proteins => true
|
92
|
-
# :prothits => <Array of prothits>,
|
93
|
-
# :prothits_precision => [ Array of hashes where each hash =
|
94
|
-
# { :worst => Fl, :normal => Fl,
|
95
|
-
# :normal_stdev => Fl } ]
|
96
|
-
# }
|
97
|
-
#
|
98
|
-
# NOTE: Brackets [] indicate an Array! The Bar '|' indicates another option.
|
99
|
-
# The asterik '*' is the default option.
|
100
|
-
#
|
101
|
-
# :sequest => {
|
102
|
-
# :xcorr1 -> >= (xcorr +1 charge state)
|
103
|
-
# :xcorr2 -> >= (xcorr +2 charge state)
|
104
|
-
# :xcorr3 -> >= (xcorr +3 charge state)
|
105
|
-
# :deltacn -> >= (delta cn)
|
106
|
-
# :ppm -> <= parts per million (Float)
|
107
|
-
# :include_deltacnstar => *true | false include deltacn (given at 1.1) of
|
108
|
-
# top hit with no 2nd hit
|
109
|
-
#
|
110
|
-
# }
|
111
|
-
# OUTPUT:
|
112
|
-
# :proteins => true | *false gives proteins (and validation)
|
113
|
-
# :output => [[format, FILENAME=nil],...] formats to output filtering results.
|
114
|
-
# can be used multiple times
|
115
|
-
# FILENAME is the filename to use
|
116
|
-
# if nil, then outputs to $stdout
|
117
|
-
# valid formats are:
|
118
|
-
# :text_table (default)
|
119
|
-
# :yaml (need to implement)
|
120
|
-
# :protein_summary (need to implement)
|
121
|
-
# :html_table (need to implement)
|
122
|
-
# default value =>
|
123
|
-
# [[:text_table,nil]]
|
124
|
-
#
|
125
|
-
# VALIDATION:
|
126
|
-
# :validators => [Array] objects that respond to pephit_precision
|
127
|
-
# usually of base class Validator
|
128
|
-
# NOTE: if you have decoy peptides, you MUST have
|
129
|
-
# a Validator::Decoy object to separate them out.
|
130
|
-
# NOTE: if transmem validator passed in, the
|
131
|
-
# proteins in spec_id must already be granted
|
132
|
-
# transmem status!
|
133
|
-
#
|
134
|
-
#
|
135
|
-
# OTHER:
|
136
|
-
# :top_hit_by -> *:xcorr | :probability
|
137
|
-
# probabilities only in bioworks.xml files right now (if
|
138
|
-
# they were calculated).
|
139
|
-
# :postfilter -> *:top_per_scan | :top_per_aaseq | :top_per_aaseq_charge
|
140
|
-
# :top_per_scan hashes by filename + scan
|
141
|
-
# :top_per_aaseq hashes by top_per_scan + aaseq
|
142
|
-
# :top_per_aaseq_charge hashes by top_per_aaseq + charge
|
143
|
-
# :prefilter -> true | *false Takes top hit per file+scan+charge
|
144
|
-
# :interactive => interactive_object
|
145
|
-
# # should behave like this:
|
146
|
-
# # interactive_object.filter_args(currentopts) -> args_for_filtering | nil (done)
|
147
|
-
#
|
148
|
-
# # interactive_object.passing(final_answer)
|
149
|
-
|
150
|
-
# The defaults for filter_and_validate
|
151
|
-
|
152
|
-
def filter_and_validate(spec_id_obj, options={})
|
153
|
-
# NOTE:
|
154
|
-
# This is a fairly complicated method. The complication comes in doing
|
155
|
-
# top hit filters on separate/cat searches wanted them to be either
|
156
|
-
# together or separate. I opt for fewer conversions between the two, but
|
157
|
-
# that means keeping track of more things...
|
158
|
-
|
159
|
-
opts = FV_DEFAULTS.merge_deep(options)
|
160
|
-
|
161
|
-
spec_id = spec_id_obj
|
162
|
-
|
163
|
-
peps = spec_id.peps
|
164
|
-
filename = spec_id.filename
|
165
|
-
|
166
|
-
#######################################
|
167
|
-
# DEFAULTS:
|
168
|
-
interactive_changing_keys = [:xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar, :postfilter]
|
169
|
-
interactive_shortcut_map = {
|
170
|
-
:xcorr1 => 'x1',
|
171
|
-
:xcorr2 => 'x2',
|
172
|
-
:xcorr3 => 'x3',
|
173
|
-
:deltacn => 'dcn',
|
174
|
-
:ppm => 'ppm',
|
175
|
-
:include_deltacnstar => 'dcns',
|
176
|
-
:postfilter => 'pf',
|
177
|
-
}
|
178
|
-
to_float = proc {|x| x.to_f}
|
179
|
-
to_bool = proc do |x|
|
180
|
-
case x
|
181
|
-
when /^t/io
|
182
|
-
true
|
183
|
-
when /^f/io
|
184
|
-
false
|
185
|
-
when true
|
186
|
-
true
|
187
|
-
when false
|
188
|
-
false
|
189
|
-
else
|
190
|
-
nil
|
191
|
-
end
|
192
|
-
end
|
193
|
-
to_postfilter = proc do |x|
|
194
|
-
case x
|
195
|
-
when 's'
|
196
|
-
:top_per_scan
|
197
|
-
when 'a'
|
198
|
-
:top_per_aaseq
|
199
|
-
when 'ac'
|
200
|
-
:top_per_aaseq_charge
|
201
|
-
when Symbol
|
202
|
-
x
|
203
|
-
end
|
204
|
-
end
|
205
|
-
casting_map = {
|
206
|
-
:xcorr1 => to_float,
|
207
|
-
:xcorr2 => to_float,
|
208
|
-
:xcorr3 => to_float,
|
209
|
-
:deltacn => to_float,
|
210
|
-
:ppm => to_float,
|
211
|
-
:include_deltacnstar => to_bool,
|
212
|
-
:postfilter => to_postfilter,
|
213
|
-
}
|
214
|
-
|
215
|
-
# output:
|
216
|
-
# NOTE: BOOLEANS that are by default false do not need a default!!
|
217
|
-
# They will yield false on key lookup if no key or false!
|
218
|
-
# BOOLEANS that by default are true should be queried like this
|
219
|
-
# !(opts[:<option>] == false)
|
220
|
-
|
221
|
-
# open up each of the files for writing
|
222
|
-
if opts[:output]
|
223
|
-
outputs = opts[:output].map do |format, where|
|
224
|
-
if where == nil
|
225
|
-
where = $stdout
|
226
|
-
end
|
227
|
-
SpecID::Precision::Filter::Output.new(format, where)
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
postfilters_per_hash = {
|
232
|
-
:top_per_scan => [:base_name, :first_scan],
|
233
|
-
:top_per_aaseq => [:aaseq], # first by top_per_scan, then this guy
|
234
|
-
:top_per_aaseq_charge => [:aaseq, :charge], # first by top_per_scan, then this one
|
235
|
-
}
|
236
|
-
|
237
|
-
top_hit_by__to_sort_by = {
|
238
|
-
:xcorr => [:xcorr, {:down=> [:xcorr]}],
|
239
|
-
:probability => [:probability, (spec_id.hi_prob_best ? {:down=> [:probability]} : {})],
|
240
|
-
}
|
241
|
-
sort_by_att_opts = top_hit_by__to_sort_by[opts[:top_hit_by]]
|
242
|
-
opts_for_top_hit_prefilter = {
|
243
|
-
:per => [:base_name, :first_scan, :charge],
|
244
|
-
:by => sort_by_att_opts,
|
245
|
-
:include_ties => opts[:include_ties_in_top_hit_prefilter]
|
246
|
-
}
|
247
|
-
# PRIVATE DEFAULTS:
|
248
|
-
merge_prefix = 'DECOY_'
|
249
|
-
unmerge_regexp = /^DECOY_/
|
250
|
-
|
251
|
-
#######################################
|
252
|
-
|
253
|
-
|
254
|
-
# opts_decoy = opts[:decoy]
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
# if we have a Validator::Decoy object, we will use its defaults to split
|
259
|
-
# peptides.
|
260
|
-
decoy_validator =
|
261
|
-
if opts[:validators]
|
262
|
-
decoy_vals = opts[:validators].select {|v| v.class == Validator::Decoy }
|
263
|
-
if decoy_vals.size == 0
|
264
|
-
nil
|
265
|
-
elsif decoy_vals.size == 1
|
266
|
-
decoy_vals.first
|
267
|
-
else
|
268
|
-
raise ArgumentError, "can only have one Validator::Decoy object"
|
269
|
-
end
|
270
|
-
|
271
|
-
### suck out the relevant parameters
|
272
|
-
#sep_params = [:decoy_on_match, :correct_wins].inject({}) do |hash,k|
|
273
|
-
# hash[k] = decoy_validator.send(k)
|
274
|
-
# hash
|
275
|
-
#end
|
276
|
-
else
|
277
|
-
nil
|
278
|
-
end
|
279
|
-
|
280
|
-
decoy_validator_to_split_with = nil
|
281
|
-
|
282
|
-
pep_sets =
|
283
|
-
if decoy_validator
|
284
|
-
if decoy_validator.constraint.is_a?(Regexp)
|
285
|
-
if opts[:hits_together]
|
286
|
-
decoy_validator_to_split_with = decoy_validator
|
287
|
-
[peps]
|
288
|
-
else
|
289
|
-
(target, decoy) = decoy_validator.partition(peps)
|
290
|
-
#(target, decoy) = SpecID.classify_by_prot(peps, opts_decoy, sep_params[:decoy_on_match], sep_params[:correct_wins])
|
291
|
-
[target, decoy]
|
292
|
-
end
|
293
|
-
elsif decoy_validator.constraint.is_a?(String) ## a Filename
|
294
|
-
decoy_peps = SpecID.new(decoy_validator.constraint).peps
|
295
|
-
|
296
|
-
if opts[:hits_together]
|
297
|
-
# we fake that the protein sets are together
|
298
|
-
decoy_validator_to_split_with = Validator::Decoy.new(:constraint => unmerge_regexp)
|
299
|
-
decoy_peps.each do |pep|
|
300
|
-
pep.prots.each {|prt| prt.reference = merge_prefix + prt.reference }
|
301
|
-
end
|
302
|
-
[peps + decoy_peps] # wrap them so we get the target out
|
303
|
-
else
|
304
|
-
[peps, decoy_peps]
|
305
|
-
end
|
306
|
-
else
|
307
|
-
raise ArgumentError, "Decoy::Validator#constraint must be a Regexp or valid SpecID file"
|
308
|
-
end
|
309
|
-
else
|
310
|
-
[peps] # no decoy
|
311
|
-
end
|
312
|
-
|
313
|
-
# This method doesn't seem to do so well, but a person can use a different
|
314
|
-
# one and enter in their own custom pi_0 value!
|
315
|
-
#if opts[:decoy_pi_zero]
|
316
|
-
# if pep_sets.size < 2
|
317
|
-
# raise ArgumentError, "must have a decoy validator for pi zero calculation!"
|
318
|
-
# end
|
319
|
-
# require 'pi_zero'
|
320
|
-
# (_target, _decoy) = pep_sets
|
321
|
-
# pvals = PiZero.p_values_for_sequest(*pep_sets).sort
|
322
|
-
# pi_zero = PiZero.pi_zero(pvals)
|
323
|
-
# opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
|
324
|
-
#end
|
325
|
-
|
326
|
-
if opts[:proteins]
|
327
|
-
protein_validator = Validator::ProtFromPep.new
|
328
|
-
end
|
329
|
-
|
330
|
-
### TOP HITS PREFILTER < < TOP_HITS_TOGETHER > >
|
331
|
-
###########################
|
332
|
-
# TOP HITS FILTER:
|
333
|
-
###########################
|
334
|
-
# REALLY, this guy only exists for speed and memory consumption
|
335
|
-
# If we prefilter, we don't have to filter as many hits in every
|
336
|
-
# interactive round. I'd leave this guy out if I were doing only a
|
337
|
-
# sequest filter. (I should compare results with this filter and w/o)
|
338
|
-
# This guy is very tricky since we need to consider whether they are to be
|
339
|
-
# run together or separately and not do more work than we need
|
340
|
-
# get passed_target for any case (and passed_decoy if opts[:decoy])
|
341
|
-
|
342
|
-
|
343
|
-
top_hit_prefilter = SpecID::Precision::Filter::Peps.new(:top_hit, opts_for_top_hit_prefilter) if opts[:prefilter]
|
344
|
-
|
345
|
-
if top_hit_prefilter
|
346
|
-
pep_sets.map! do |pep_set|
|
347
|
-
top_hit_prefilter.filter(pep_set)
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
# prepare our top hit filter:
|
352
|
-
# since we are now modulating this guy, we need to create it fresh every
|
353
|
-
# time
|
354
|
-
top_per_scan_postfilter = SpecID::Precision::Filter::Peps.new(:top_hit,
|
355
|
-
:per => postfilters_per_hash[:top_per_scan],
|
356
|
-
:by => sort_by_att_opts,
|
357
|
-
:include_ties => opts[:include_ties_in_top_hit_postfilter])
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
# Prepare to loop
|
362
|
-
# Give interactive help once here if necessary
|
363
|
-
interactive = opts[:interactive]
|
364
|
-
if interactive
|
365
|
-
ARGV.clear
|
366
|
-
interactive.out(interactive.interactive_help(interactive_changing_keys, interactive_shortcut_map)) if interactive.verbose
|
367
|
-
end
|
368
|
-
|
369
|
-
# the loop is for if we are interactive
|
370
|
-
final_answer = nil
|
371
|
-
loop do
|
372
|
-
|
373
|
-
if interactive #interactive
|
374
|
-
# a bit of a hack, but we shove on the postfilter param to modulate
|
375
|
-
opts[:sequest][:postfilter] = opts[:postfilter]
|
376
|
-
response = interactive.filter_args(opts[:sequest], interactive_changing_keys, interactive_shortcut_map, casting_map)
|
377
|
-
opts[:postfilter] = opts[:sequest].delete(:postfilter)
|
378
|
-
break if response == nil
|
379
|
-
end
|
380
|
-
|
381
|
-
# prepare our top hit filter:
|
382
|
-
# since we are now modulating this guy, we need to create it fresh every
|
383
|
-
# time
|
384
|
-
|
385
|
-
sub_postfilter =
|
386
|
-
if opts[:postfilter] == :top_per_scan
|
387
|
-
nil
|
388
|
-
else
|
389
|
-
postfilter_per_args = postfilters_per_hash[opts[:postfilter]]
|
390
|
-
SpecID::Precision::Filter::Peps.new(:top_hit,
|
391
|
-
:per => postfilter_per_args,
|
392
|
-
:by => sort_by_att_opts,
|
393
|
-
:include_ties => opts[:include_ties_in_top_hit_postfilter]
|
394
|
-
)
|
395
|
-
end
|
396
|
-
|
397
|
-
pep_sets_to_be_filtered = pep_sets.map
|
398
|
-
|
399
|
-
### SEQUEST < EITHER >
|
400
|
-
###########################
|
401
|
-
# SEQUEST FILTER:
|
402
|
-
###########################
|
403
|
-
# This guy is immune to the trickiness of top hits, so we just filter
|
404
|
-
# separately since validation is best done without decoys (except decoy)
|
405
|
-
sequest_args = opts[:sequest].values_at( :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar )
|
406
|
-
sequest_filter = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *sequest_args)
|
407
|
-
|
408
|
-
pep_sets_filtered = pep_sets_to_be_filtered.map do |pep_set|
|
409
|
-
sequest_filter.filter(pep_set)
|
410
|
-
end
|
411
|
-
|
412
|
-
### FINAL HIT PER SCAN < < TOP_HITS_TOGETHER > >
|
413
|
-
##########################
|
414
|
-
# FINAL HIT PER SCAN
|
415
|
-
##########################
|
416
|
-
# Why not just do the top hit filter in the top hits pre filter before?
|
417
|
-
# Good question. Answer: We may have instances when the top hit (by
|
418
|
-
# xcorr) has some other poorer attribute than the hit at the other charge.
|
419
|
-
# In this case, we'd end up with no passing peptide.
|
420
|
-
# Also, the xcorr filter is per charge, so we may filter out the higher
|
421
|
-
# scoring peptide hit even though the other would pass based on its charge
|
422
|
-
# state, etc., etc....
|
423
|
-
# ###################################################
|
424
|
-
# NOTE THIS WELL:
|
425
|
-
# IF IT IS SUPPOSE TO be separate it's *ALREADY* separate, if together its
|
426
|
-
# *ALREADY* together!!!!
|
427
|
-
# the implication is that we don't need to do any merging or
|
428
|
-
# separating before we do this last filter!!!!
|
429
|
-
# ###################################################
|
430
|
-
|
431
|
-
# TODO: We need to add this guy in!
|
432
|
-
#if opts[:uniq_aa]
|
433
|
-
# pep_sets_filtered.map do |pep_set|
|
434
|
-
# end
|
435
|
-
#end
|
436
|
-
|
437
|
-
pep_sets_filtered.map! do |pep_set|
|
438
|
-
top_per_scan_postfilter.filter!(pep_set)
|
439
|
-
if sub_postfilter
|
440
|
-
sub_postfilter.filter!(pep_set)
|
441
|
-
else
|
442
|
-
pep_set
|
443
|
-
end
|
444
|
-
end
|
445
|
-
|
446
|
-
normal_post_filtered_peps = pep_sets_filtered.first
|
447
|
-
|
448
|
-
# separate the decoy's out if they are together
|
449
|
-
if decoy_validator_to_split_with # only set if opts[:hits_together]!!
|
450
|
-
(target, decoy) = decoy_validator_to_split_with.partition(normal_post_filtered_peps)
|
451
|
-
pep_sets_filtered = [target, decoy]
|
452
|
-
end
|
453
|
-
|
454
|
-
### VALIDATION < SEPARATE >
|
455
|
-
pephit_precision_array = get_pephit_precision(opts[:validators], *pep_sets_filtered) if opts[:validators]
|
456
|
-
|
457
|
-
final_answer = {
|
458
|
-
:params => opts,
|
459
|
-
:pephits => pep_sets_filtered.first,
|
460
|
-
}
|
461
|
-
if pephit_precision_array
|
462
|
-
final_answer[:pephits_precision] = pephit_precision_array
|
463
|
-
end
|
464
|
-
|
465
|
-
if opts[:proteins]
|
466
|
-
protein_precision_array = peptide_precision_to_protein_precision(protein_validator, normal_post_filtered_peps, pephit_precision_array)
|
467
|
-
# this could be factored out (since we do it in protein_precision)
|
468
|
-
|
469
|
-
# merge the final prots into a unique set:
|
470
|
-
final_answer[:prothits] = normal_post_filtered_peps.inject(Set.new) do |protset, pep|
|
471
|
-
protset.merge(pep.prots)
|
472
|
-
end
|
473
|
-
final_answer[:prothits_precision] = protein_precision_array
|
474
|
-
end
|
475
|
-
|
476
|
-
## output the output
|
477
|
-
outputs.each {|output| output.print(final_answer) }
|
478
|
-
|
479
|
-
if interactive
|
480
|
-
interactive.passing(opts, final_answer)
|
481
|
-
end
|
482
|
-
|
483
|
-
if !interactive
|
484
|
-
break
|
485
|
-
end
|
486
|
-
end
|
487
|
-
# Close the filehandles
|
488
|
-
outputs.each { |output| output.close } if opts[:output]
|
489
|
-
final_answer
|
490
|
-
end
|
491
|
-
|
492
|
-
# takes peps and a peptide_precision_hash. Returns a hash with the same
|
493
|
-
# keys of peptide_precision_hash where the value is a hash with these keys:
|
494
|
-
# :worst => worstcase protein precision
|
495
|
-
# :normal => estimaton by binomial/gaussian method (optimistic)
|
496
|
-
# :normal_stdev => the stdev of the normal method
|
497
|
-
def peptide_precision_to_protein_precision(protein_validator, peps, peptide_precision_array, round_num_false=:ceil)
|
498
|
-
peptide_precision_array.map do |precision|
|
499
|
-
num_false = ((1.0 - precision) * peps.size).ceil
|
500
|
-
reply = protein_validator.prothit_precision(peps, num_false)
|
501
|
-
hash = {}
|
502
|
-
%w(worst normal normal_stdev).zip(reply) do |label, answer|
|
503
|
-
hash[label.to_sym] = answer
|
504
|
-
end
|
505
|
-
hash
|
506
|
-
end
|
507
|
-
end
|
508
|
-
|
509
|
-
# takes an array of validator objects and peps (already separated out from
|
510
|
-
# decoys; the decoy's can be passed in
|
511
|
-
# returns an array of results
|
512
|
-
def get_pephit_precision(validators, peps, decoy_peps=nil, grant_transmem_status=false)
|
513
|
-
validators.map do |validator|
|
514
|
-
if validator.class == Validator::Decoy
|
515
|
-
validator.pephit_precision(peps, decoy_peps)
|
516
|
-
else
|
517
|
-
validator.pephit_precision(peps)
|
518
|
-
end
|
519
|
-
end
|
520
|
-
end
|
521
|
-
end
|
522
|
-
|
523
|
-
class SpecID::Precision::Filter::Peps < Filter
|
524
|
-
|
525
|
-
# can pass in the method to call. If you have static options and you will
|
526
|
-
# reuse your filter, you can pass them in here.
|
527
|
-
# BEWARE: this will override any passed into the method at filter time.
|
528
|
-
# If you need to do that, make a new, blank filter and pass in your args
|
529
|
-
# at filter time
|
530
|
-
def initialize(meth=nil, *opts)
|
531
|
-
@method = meth
|
532
|
-
if opts.size > 0
|
533
|
-
@opts = opts
|
534
|
-
else
|
535
|
-
@opts = nil
|
536
|
-
end
|
537
|
-
end
|
538
|
-
|
539
|
-
# passes the top peptide hits per attributes that it is hashed by
|
540
|
-
# all hits with same score as top score are returned
|
541
|
-
# assumes that all attributes are cast properly: Float,Integer, etc
|
542
|
-
# converts xcorr, deltacn, deltamass, mass, and charge into numerical types
|
543
|
-
# deletes the protein array (but not relevant proteins)
|
544
|
-
# hashes on [pep.basename, pep.first_scan.to_i, pep.charge.to_i]
|
545
|
-
# returns self for chaining
|
546
|
-
# opts
|
547
|
-
# :per => Array of attributes e.g. [:first_scan, :charge] # TODO: allow lambda
|
548
|
-
# :by => an array for sort_by_attributes
|
549
|
-
# e.g. [:xcorr, :deltacn, :ppm, {:down => [:xcorr, :deltacn]}]
|
550
|
-
# :ties => *false | true | :as_array
|
551
|
-
# false - one top hit is selected by random (by sorting)
|
552
|
-
# true - all ties are included in final answer
|
553
|
-
# :as_array - ties are included as an array
|
554
|
-
def top_hit(peps, opts = {})
|
555
|
-
|
556
|
-
# get the top peptide by firstscan/charge (equivalent to .out files)
|
557
|
-
top_peps = []
|
558
|
-
#hash = peps.hash_by(*(opts[:per]))
|
559
|
-
per_array = opts[:per]
|
560
|
-
hash = peps.hash_by(*per_array)
|
561
|
-
ties = opts[:ties]
|
562
|
-
if ties == :as_array
|
563
|
-
as_array = true
|
564
|
-
end
|
565
|
-
hash.values.each do |v|
|
566
|
-
best_to_worst = v.sort_by_attributes(*(opts[:by]))
|
567
|
-
if ties
|
568
|
-
|
569
|
-
best_hit = best_to_worst.first
|
570
|
-
## get the values that matter for the top hit
|
571
|
-
# here get the attributes we are considering
|
572
|
-
atts =
|
573
|
-
if opts[:by].last.is_a? Hash
|
574
|
-
opts[:by][0...-1]
|
575
|
-
else
|
576
|
-
opts[:by].dup
|
577
|
-
end
|
578
|
-
# find the best hits values
|
579
|
-
top_hit_vals = atts.map do |att|
|
580
|
-
best_hit.send(att)
|
581
|
-
end
|
582
|
-
|
583
|
-
tying_peps = []
|
584
|
-
best_to_worst.each do |pep|
|
585
|
-
tie = true
|
586
|
-
atts.each_with_index do |att,i|
|
587
|
-
unless (pep.send(att) == top_hit_vals[i])
|
588
|
-
tie = false
|
589
|
-
break
|
590
|
-
end
|
591
|
-
end
|
592
|
-
if tie
|
593
|
-
tying_peps << pep
|
594
|
-
else
|
595
|
-
break
|
596
|
-
end
|
597
|
-
end
|
598
|
-
if as_array
|
599
|
-
if tying_peps.size == 1
|
600
|
-
top_peps.push( *tying_peps )
|
601
|
-
else
|
602
|
-
top_peps.push( tying_peps )
|
603
|
-
end
|
604
|
-
else
|
605
|
-
top_peps.push( *tying_peps )
|
606
|
-
end
|
607
|
-
else
|
608
|
-
top_peps << best_to_worst.first
|
609
|
-
end
|
610
|
-
end
|
611
|
-
top_peps
|
612
|
-
end
|
613
|
-
|
614
|
-
# returns self for chaining
|
615
|
-
# ( >= +3 charge for the x3)
|
616
|
-
def standard_sequest_filter(peps, x1,x2,x3,deltacn,ppm,include_deltacnstar=true)
|
617
|
-
peps.select do |pep|
|
618
|
-
pep_deltacn = pep.deltacn
|
619
|
-
pep_charge = pep.charge
|
620
|
-
|
621
|
-
## The outer parentheses are critical to getting the correct answer!
|
622
|
-
_passing = ( (pep_deltacn >= deltacn) and ((pep_charge == 1 && pep.xcorr >= x1) or (pep_charge == 2 && pep.xcorr >= x2) or (pep_charge >= 3 && pep.xcorr >= x3)) and ( pep.ppm <= ppm ))
|
623
|
-
|
624
|
-
if _passing
|
625
|
-
if ((!include_deltacnstar) && (pep_deltacn > 1.0))
|
626
|
-
false
|
627
|
-
else
|
628
|
-
true
|
629
|
-
end
|
630
|
-
else
|
631
|
-
false
|
632
|
-
end
|
633
|
-
end
|
634
|
-
end
|
635
|
-
|
636
|
-
end
|
637
|
-
|
@@ -1,60 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module SpecID ; end
|
4
|
-
module SpecID::Precision ; end
|
5
|
-
|
6
|
-
module SpecID::Precision::Output
|
7
|
-
|
8
|
-
# takes a format type (as symbol) and the handle to write to
|
9
|
-
# if handle_or_file is a file, will open it and close (on calling close)
|
10
|
-
# if it is a handle, will not close it
|
11
|
-
def initialize(format, handle_or_file)
|
12
|
-
@handle =
|
13
|
-
if handle_or_file.is_a? String
|
14
|
-
@need_to_close = true
|
15
|
-
File.open(handle_or_file, 'w')
|
16
|
-
else
|
17
|
-
@need_to_close = false
|
18
|
-
handle_or_file
|
19
|
-
end
|
20
|
-
@format = format
|
21
|
-
end
|
22
|
-
|
23
|
-
# returns self
|
24
|
-
def print(answer)
|
25
|
-
send( @format, @handle, answer )
|
26
|
-
self
|
27
|
-
end
|
28
|
-
|
29
|
-
# turns all keys that are symbols into strings (recursively into *Hashes*)
|
30
|
-
def self.symbol_keys_to_string(hash)
|
31
|
-
new_hash = {}
|
32
|
-
hash.each do |k,v|
|
33
|
-
new_value =
|
34
|
-
if v.is_a? Hash
|
35
|
-
symbol_keys_to_string(v)
|
36
|
-
else
|
37
|
-
v
|
38
|
-
end
|
39
|
-
if k.is_a? Symbol
|
40
|
-
new_hash[k.to_s] = new_value
|
41
|
-
else
|
42
|
-
new_hash[k] = new_value
|
43
|
-
end
|
44
|
-
end
|
45
|
-
new_hash
|
46
|
-
end
|
47
|
-
|
48
|
-
# TODO: implement recursively, this has just grown and grown terribly
|
49
|
-
def hash_as_string(hash)
|
50
|
-
hash.inspect
|
51
|
-
end
|
52
|
-
|
53
|
-
# will close the handle if it is a File object
|
54
|
-
def close
|
55
|
-
if @need_to_close
|
56
|
-
@handle.close
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
end
|