mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator/cmdline.rb
DELETED
|
@@ -1,431 +0,0 @@
|
|
|
1
|
-
require 'validator'
|
|
2
|
-
|
|
3
|
-
require 'validator/true_pos'
|
|
4
|
-
require 'validator/aa'
|
|
5
|
-
require 'validator/aa_est'
|
|
6
|
-
require 'validator/bias'
|
|
7
|
-
require 'validator/decoy'
|
|
8
|
-
require 'validator/transmem'
|
|
9
|
-
require 'validator/probability'
|
|
10
|
-
require 'validator/q_value'
|
|
11
|
-
require 'validator/prot_from_pep'
|
|
12
|
-
|
|
13
|
-
## these all for a stupid check...
|
|
14
|
-
require 'spec_id/sqt'
|
|
15
|
-
require 'spec_id/proph/prot_summary'
|
|
16
|
-
require 'spec_id/proph/pep_summary'
|
|
17
|
-
|
|
18
|
-
class Validator::Cmdline
|
|
19
|
-
|
|
20
|
-
Validator_symbols_to_classes = {
|
|
21
|
-
:tmm => Validator::Transmem::Protein,
|
|
22
|
-
:decoy => Validator::Decoy,
|
|
23
|
-
:bad_aa => Validator::AA,
|
|
24
|
-
:bad_aa_est => Validator::AAEst,
|
|
25
|
-
:tps => Validator::TruePos,
|
|
26
|
-
:bias => Validator::Bias,
|
|
27
|
-
:prob => Validator::Probability,
|
|
28
|
-
:qval => Validator::QValue,
|
|
29
|
-
}
|
|
30
|
-
# was VAL_DEFAULTS
|
|
31
|
-
DEFAULTS = {
|
|
32
|
-
:tmm =>
|
|
33
|
-
{
|
|
34
|
-
# file
|
|
35
|
-
:min_num_tmm_seqs => 1,
|
|
36
|
-
:expect_soluble => true,
|
|
37
|
-
:no_include_tm_peps => 0.8,
|
|
38
|
-
:bkg => 0.0,
|
|
39
|
-
},
|
|
40
|
-
:decoy =>
|
|
41
|
-
{
|
|
42
|
-
:hits_together => true,
|
|
43
|
-
:decoy_on_match => true,
|
|
44
|
-
:frit => 1.0, # fraction incorrect targets (like PIT)
|
|
45
|
-
},
|
|
46
|
-
:bad_aa =>
|
|
47
|
-
{
|
|
48
|
-
:false_if_found => true,
|
|
49
|
-
:bkg => 0.0,
|
|
50
|
-
},
|
|
51
|
-
:bad_aa_est =>
|
|
52
|
-
{
|
|
53
|
-
:false_if_found => true,
|
|
54
|
-
:bkg => 0.0,
|
|
55
|
-
},
|
|
56
|
-
:bias =>
|
|
57
|
-
{
|
|
58
|
-
:bkg => 0.0,
|
|
59
|
-
:proteins_expected => true,
|
|
60
|
-
},
|
|
61
|
-
:ties => true,
|
|
62
|
-
}
|
|
63
|
-
COMMAND_LINE = {
|
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
|
65
|
-
"FILENAME of separate search on decoys.",
|
|
66
|
-
"All regular expressions must be surrounded by '/'",
|
|
67
|
-
"(no extended options [trailing modifiers]).",
|
|
68
|
-
"e.g., a run using concatenated reversed proteins that",
|
|
69
|
-
"includes 'REVERSE' in the fasta heading:",
|
|
70
|
-
" --decoy /REVERSE/",
|
|
71
|
-
"Anything fancier should be quoted:",
|
|
72
|
-
" --decoy '/^\\s*REVERSE/'",
|
|
73
|
-
"If decoys proteins were searched in a separate file,",
|
|
74
|
-
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
|
75
|
-
"FRIT = Fraction Incorrect Targets (like",
|
|
76
|
-
"the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
|
|
77
|
-
"DOM = *true/false, decoy on match",],
|
|
78
|
-
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
|
79
|
-
"fasta file containing the true protein hits"],
|
|
80
|
-
# may require digestion:
|
|
81
|
-
:fasta => ["--fasta FASTA", "fasta file for phobius transmembrane",
|
|
82
|
-
"(needed if PEPS options is not false)"],
|
|
83
|
-
:digestion => ["--digestion ORIG_FASTA,PARAMS", Array, "[not recommended]",
|
|
84
|
-
"Creates the 'false/total' ratio with in silico",
|
|
85
|
-
"digestion. Otherwise, the 3rd-10th best hits (sorted by",
|
|
86
|
-
"xcorr) are used.",
|
|
87
|
-
"The following validators will use this",
|
|
88
|
-
"information (shared between them) if option given",
|
|
89
|
-
"ORIG_FASTA = the fasta file used to do the run",
|
|
90
|
-
"PARAMS = the params file used to do the run",],
|
|
91
|
-
:bias => ["--bias FASTA[,PE,BKG]", Array, "FASTA contains proteins expected to be in the sample",
|
|
92
|
-
"PE = *true|false proteins in fasta file expected in sample",
|
|
93
|
-
"BKG = Background frequency of fps (d: #{DEFAULTS[:bias][:bkg]})",],
|
|
94
|
-
:bad_aa => ["--bad_aa AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
|
|
95
|
-
"AA = The amino acid (e.g., 'C')",
|
|
96
|
-
"BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa][:bkg]}):",],
|
|
97
|
-
:bad_aa_est => ["--bad_aa_est AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
|
|
98
|
-
"AA = The amino acid (e.g., 'C')",
|
|
99
|
-
"BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa_est][:bkg]}):",],
|
|
100
|
-
|
|
101
|
-
:tmm => ["--tmm <TM[,MIN,SOL,PEPS,BKG]>", Array, "TM = phobius.small or toppred.out file",
|
|
102
|
-
"phobius.small:",
|
|
103
|
-
"http://phobius.cgb.ki.se/",
|
|
104
|
-
"(select 'Short' output, and save output as file)",
|
|
105
|
-
"toppred.out:",
|
|
106
|
-
"http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html",
|
|
107
|
-
"(output 'toppred.out' in 'New' or 'Xml' format)",
|
|
108
|
-
"MIN = Int, minimum number transmembrane seqs (def: #{DEFAULTS[:tmm][:min_num_tmm_seqs]})",
|
|
109
|
-
"SOL = true|false, this is a soluble fraction( def: #{DEFAULTS[:tmm][:expect_soluble]})",
|
|
110
|
-
"PEPS = Float | false, don't consider tm peps (>= fraction",
|
|
111
|
-
" tm content) (false skips) (def: #{DEFAULTS[:tmm][:no_include_tm_peps]})",
|
|
112
|
-
"BKG = Float , background contaminating insoluble (def: #{DEFAULTS[:tmm][:bkg]})"],
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
# VALIDATION MODIFIERS
|
|
116
|
-
:false_on_tie => ["--false_on_tie", "if peptide belongs to correct AND incorrect proteins",
|
|
117
|
-
"it will be counted as correct"],
|
|
118
|
-
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
def self.boolean(arg, default)
|
|
122
|
-
case arg
|
|
123
|
-
when 'true' ; true
|
|
124
|
-
when 'false' ; false
|
|
125
|
-
else ; default
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
PrepArgs = {
|
|
130
|
-
:prob => lambda {|ar, opts|
|
|
131
|
-
mthd =
|
|
132
|
-
if ar
|
|
133
|
-
if ar == 'nsp'
|
|
134
|
-
:probability
|
|
135
|
-
elsif ar == 'init'
|
|
136
|
-
:initial_probability
|
|
137
|
-
else
|
|
138
|
-
raise ArgumentError, "--prob [arg], optional arg can only be 'nsp' or 'init'!"
|
|
139
|
-
end
|
|
140
|
-
else
|
|
141
|
-
:probability
|
|
142
|
-
end
|
|
143
|
-
opts[:validators].push([:prob, mthd])
|
|
144
|
-
},
|
|
145
|
-
:perc_qval => lambda {|ar, opts| opts[:validators].push([:perc_qval]) },
|
|
146
|
-
:to_qvalues => lambda {|ar, opts| opts[:validators].push([:to_qvalues]) },
|
|
147
|
-
:decoy => lambda {|ar, opts|
|
|
148
|
-
myargs = [:decoy]
|
|
149
|
-
first_arg = ar[0]
|
|
150
|
-
val_opts = {}
|
|
151
|
-
val_opts[:constraint] =
|
|
152
|
-
if first_arg[0,1] == '/' and first_arg[-1,1] == '/'
|
|
153
|
-
# cast as a regular expression of has '/ /'
|
|
154
|
-
Regexp.new(first_arg[1...-1])
|
|
155
|
-
else
|
|
156
|
-
# assume that it is a filename
|
|
157
|
-
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
|
158
|
-
first_arg
|
|
159
|
-
end
|
|
160
|
-
val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
|
|
161
|
-
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
|
162
|
-
myargs.push(val_opts)
|
|
163
|
-
opts[:validators].push(myargs)
|
|
164
|
-
},
|
|
165
|
-
:fasta => lambda {|arg, opts|
|
|
166
|
-
opts[:fasta] = Fasta.new(arg)
|
|
167
|
-
},
|
|
168
|
-
:digestion => lambda {|ar, opts|
|
|
169
|
-
raise(ArgumentError, "need fasta and sequest params!") if ar.size != 2
|
|
170
|
-
opts[:digestion] = ar.dup
|
|
171
|
-
opts[:digestion_objects] = [Fasta.new(ar[0]), Sequest::Params.new(ar[1])]
|
|
172
|
-
},
|
|
173
|
-
:bias => lambda {|ar, opts|
|
|
174
|
-
myargs = [:bias]
|
|
175
|
-
myargs.push( Fasta.new(ar[0]) )
|
|
176
|
-
val_opts = {}
|
|
177
|
-
val_opts[:proteins_expected] = self.boolean(ar[1], DEFAULTS[:bias][:proteins_expected])
|
|
178
|
-
val_opts[:background] =
|
|
179
|
-
if ar[2]
|
|
180
|
-
ar[2].to_f
|
|
181
|
-
else
|
|
182
|
-
DEFAULTS[:bias][:bkg]
|
|
183
|
-
end
|
|
184
|
-
if ar[3]
|
|
185
|
-
val_opts[:false_to_total_ratio] = ar[3].to_f
|
|
186
|
-
end
|
|
187
|
-
myargs.push(val_opts)
|
|
188
|
-
opts[:validators].push(myargs)
|
|
189
|
-
},
|
|
190
|
-
:bad_aa => lambda {|ar, opts|
|
|
191
|
-
## GET the FREQUENCY
|
|
192
|
-
myargs = [:bad_aa]
|
|
193
|
-
myargs.push( ar[0] )
|
|
194
|
-
val_opts = {}
|
|
195
|
-
val_opts[:background] =
|
|
196
|
-
if ar[1]
|
|
197
|
-
ar[1].to_f
|
|
198
|
-
else
|
|
199
|
-
DEFAULTS[:bad_aa][:bkg]
|
|
200
|
-
end
|
|
201
|
-
if ar[2]
|
|
202
|
-
val_opts[:false_to_total_ratio] = ar[2].to_f
|
|
203
|
-
end
|
|
204
|
-
myargs.push(val_opts)
|
|
205
|
-
opts[:validators].push(myargs)
|
|
206
|
-
},
|
|
207
|
-
:bad_aa_est => lambda {|ar, opts|
|
|
208
|
-
## GET the FREQUENCY
|
|
209
|
-
myargs = [:bad_aa_est]
|
|
210
|
-
myargs.push( ar[0] )
|
|
211
|
-
val_opts = {}
|
|
212
|
-
val_opts[:background] =
|
|
213
|
-
if ar[1]
|
|
214
|
-
ar[1].to_f
|
|
215
|
-
else
|
|
216
|
-
DEFAULTS[:bad_aa_est][:bkg]
|
|
217
|
-
end
|
|
218
|
-
if ar[2]
|
|
219
|
-
val_opts[:frequency] = ar[2].to_f
|
|
220
|
-
end
|
|
221
|
-
myargs.push(val_opts)
|
|
222
|
-
opts[:validators].push(myargs)
|
|
223
|
-
},
|
|
224
|
-
|
|
225
|
-
:tmm => lambda {|ar, opts|
|
|
226
|
-
myargs = [:tmm]
|
|
227
|
-
myargs.push( ar[0] )
|
|
228
|
-
val_opts = {}
|
|
229
|
-
val_opts[:min_num_tms] =
|
|
230
|
-
if ar[1] ; ar[1].to_i
|
|
231
|
-
else ; DEFAULTS[:tmm][:min_num_tmm_seqs]
|
|
232
|
-
end
|
|
233
|
-
val_opts[:soluble_fraction] = self.boolean(ar[2], DEFAULTS[:tmm][:expect_soluble])
|
|
234
|
-
val_opts[:no_include_tm_peps] =
|
|
235
|
-
if ar[3]
|
|
236
|
-
case ar[3]
|
|
237
|
-
when 'false' ; false
|
|
238
|
-
else ; ar[3].to_f
|
|
239
|
-
end
|
|
240
|
-
else ; DEFAULTS[:tmm][:no_include_tm_peps]
|
|
241
|
-
end
|
|
242
|
-
val_opts[:background] =
|
|
243
|
-
if ar[4] ; ar[4].to_f
|
|
244
|
-
else ; DEFAULTS[:tmm][:bkg]
|
|
245
|
-
end
|
|
246
|
-
if ar[5]
|
|
247
|
-
val_opts[:false_to_total_ratio] = ar[5].to_f
|
|
248
|
-
end
|
|
249
|
-
myargs.push(val_opts)
|
|
250
|
-
opts[:validators].push( myargs )
|
|
251
|
-
},
|
|
252
|
-
:pephits => lambda {|v,opts| opts[:pephits] = SpecID.new(v) },
|
|
253
|
-
:tps => lambda {|v,opts| opts[:validators].push([:tps, Fasta.new(v)]) },
|
|
254
|
-
:false_on_tie => lambda {|v,opts| opts[:ties] = false },
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
def self.requires_pephits?(spec_id_obj)
|
|
258
|
-
case spec_id_obj
|
|
259
|
-
when Proph::ProtSummary : true
|
|
260
|
-
# at least currently (subject to change)
|
|
261
|
-
when Proph::PepSummary : true
|
|
262
|
-
when SQTGroup
|
|
263
|
-
if spec_id_obj.peps.first.respond_to?(:q_value)
|
|
264
|
-
# its percolator output and we don't have other hits to use
|
|
265
|
-
true
|
|
266
|
-
else
|
|
267
|
-
false
|
|
268
|
-
end
|
|
269
|
-
else ; false
|
|
270
|
-
end
|
|
271
|
-
end
|
|
272
|
-
|
|
273
|
-
# remove the keys from opts involved in validators and return an array
|
|
274
|
-
# of validators
|
|
275
|
-
# postfilter is one of :top_per_scan, :top_per_aaseq,
|
|
276
|
-
# :top_per_aaseq_charge (of which last two are subsets of scan)
|
|
277
|
-
def self.prepare_validators(opts, false_on_tie, interactive, postfilter, spec_id)
|
|
278
|
-
|
|
279
|
-
validator_args = opts[:validators]
|
|
280
|
-
if validator_args.any? {|v| v.first == :to_qvalues }
|
|
281
|
-
prob_val_args_ar = validator_args.select {|v| v.first == :prob }.first
|
|
282
|
-
prob_method =
|
|
283
|
-
if prob_val_args_ar && prob_val_args_ar[1]
|
|
284
|
-
prob_val_args_ar[1]
|
|
285
|
-
else
|
|
286
|
-
:probability
|
|
287
|
-
end
|
|
288
|
-
validator_args.reject! {|v| v.first == :prob }
|
|
289
|
-
|
|
290
|
-
require 'vec'
|
|
291
|
-
require 'qvalue'
|
|
292
|
-
|
|
293
|
-
# get a list of p-values
|
|
294
|
-
pvals = spec_id.peps.map do |pep|
|
|
295
|
-
val = 1.0 - pep.send(prob_method)
|
|
296
|
-
val = 1e-9 if val == 0
|
|
297
|
-
val
|
|
298
|
-
end
|
|
299
|
-
File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
|
|
300
|
-
pvals = VecD.new(pvals)
|
|
301
|
-
#qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
|
|
302
|
-
qvals = pvals.qvalues
|
|
303
|
-
qvals.zip(spec_id.peps) do |qval,pep|
|
|
304
|
-
pep.q_value = qval
|
|
305
|
-
end
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
validator_args.map! do |v|
|
|
309
|
-
if v.first == :to_qvalues || v.first == :perc_qval
|
|
310
|
-
[:qval]
|
|
311
|
-
else
|
|
312
|
-
v
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
correct_wins = !false_on_tie
|
|
317
|
-
need_false_to_total_ratio = []
|
|
318
|
-
need_frequency = []
|
|
319
|
-
transmem_vals = []
|
|
320
|
-
validators = validator_args.map do |args|
|
|
321
|
-
tp = args.shift
|
|
322
|
-
val_args = args.dup # protect the original keys
|
|
323
|
-
val_args =
|
|
324
|
-
case tp
|
|
325
|
-
when :tmm
|
|
326
|
-
val_args[1][:correct_wins] = correct_wins
|
|
327
|
-
if opts.key?(:fasta)
|
|
328
|
-
val_args[1][:fasta] = opts[:fasta]
|
|
329
|
-
end
|
|
330
|
-
val_args
|
|
331
|
-
when :bias
|
|
332
|
-
val_args[1][:correct_wins] = correct_wins
|
|
333
|
-
val_args
|
|
334
|
-
when :tps
|
|
335
|
-
val_args = [val_args[0], correct_wins]
|
|
336
|
-
val_args
|
|
337
|
-
when :decoy
|
|
338
|
-
val_args[0][:correct_wins] = correct_wins
|
|
339
|
-
# don't delete the key here since we need the decoy = regexp key
|
|
340
|
-
val_args
|
|
341
|
-
else ## bad_aa, prob, and qval are represented here:
|
|
342
|
-
val_args
|
|
343
|
-
end
|
|
344
|
-
val = Validator_symbols_to_classes[tp].new( *val_args )
|
|
345
|
-
# make some lists of validators based on pre-processing needs:
|
|
346
|
-
if tp == :tmm
|
|
347
|
-
transmem_vals << val
|
|
348
|
-
end
|
|
349
|
-
potential_digestion_classes = /Transmem|AA|AAEst|Bias/
|
|
350
|
-
if val.class.to_s =~ potential_digestion_classes
|
|
351
|
-
if val.class.to_s == 'Validator::AAEst'
|
|
352
|
-
need_frequency.push(val) if val.frequency.nil?
|
|
353
|
-
elsif !(val.false_to_total_ratio.nil?)
|
|
354
|
-
$stderr.puts "using false_to_total_ratio: #{val.false_to_total_ratio}"
|
|
355
|
-
else
|
|
356
|
-
need_false_to_total_ratio << val
|
|
357
|
-
end
|
|
358
|
-
end
|
|
359
|
-
val
|
|
360
|
-
end
|
|
361
|
-
|
|
362
|
-
if ((need_false_to_total_ratio.size > 0) or (need_frequency.size > 0))
|
|
363
|
-
if opts.key?(:digestion_objects)
|
|
364
|
-
#raise ArgumentError, "requires --digestion fasta,params argument!" if !opts.key?(:digestion_objects)
|
|
365
|
-
peps = Digestor.digest( *(opts[:digestion_objects]) )
|
|
366
|
-
need_false_to_total_ratio.each do |val|
|
|
367
|
-
val.set_false_to_total_ratio( peps )
|
|
368
|
-
end
|
|
369
|
-
if need_frequency.size > 0
|
|
370
|
-
need_frequency.each do |val|
|
|
371
|
-
val.set_frequency( opts[:digestion_objects][0] )
|
|
372
|
-
end
|
|
373
|
-
end
|
|
374
|
-
opts.delete(:digestion_objects)
|
|
375
|
-
else ## do the new and improved selection of non-top hits to get false_to_total_ratios and freqs
|
|
376
|
-
$stderr.puts "...using pephits to calculate background ratios"
|
|
377
|
-
# first_index, last_index
|
|
378
|
-
pephits =
|
|
379
|
-
if opts[:pephits] ## protein prophet (since it needs to get ratios somewhere
|
|
380
|
-
$stderr.puts "using --pephits"
|
|
381
|
-
opts[:pephits].peps
|
|
382
|
-
elsif requires_pephits?(spec_id)
|
|
383
|
-
raise ArgumentError, "with objects of class '#{spec_id.class}', one of your validators requires --pephits or --digestion"
|
|
384
|
-
else
|
|
385
|
-
$stderr.puts "using given spec_id.peps"
|
|
386
|
-
spec_id.peps
|
|
387
|
-
end
|
|
388
|
-
|
|
389
|
-
not_first_or_second_peps = Sequest.other_hits_sorted_by_xcorr(pephits, 2, 9, [:base_name, :first_scan, :charge])
|
|
390
|
-
pephits =
|
|
391
|
-
case postfilter
|
|
392
|
-
when :top_per_scan
|
|
393
|
-
$stderr.puts "using top_per_scan" ; not_first_or_second_peps
|
|
394
|
-
when :top_per_aaseq
|
|
395
|
-
# it doesn't matter which one is given since validators are
|
|
396
|
-
# based on amino acid sequence
|
|
397
|
-
$stderr.puts 'using top_per_aaseq'
|
|
398
|
-
not_first_or_second_peps.hash_by(:aaseq).values.map {|pep| pep.first }
|
|
399
|
-
when :top_per_aaseq_charge
|
|
400
|
-
$stderr.puts 'using top_per_aaseq_charge'
|
|
401
|
-
not_first_or_second_peps.hash_by(:aaseq, :charge).values.map {|pep| pep.first }
|
|
402
|
-
else
|
|
403
|
-
raise ArgumentError, "must have a valid postfilter method, yours: '#{postfilter}'"
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
need_false_to_total_ratio.each do |val|
|
|
407
|
-
val.set_false_to_total_ratio( pephits )
|
|
408
|
-
$stderr.puts "false_to_total_ratio for #{val.class.to_s}: #{val.false_to_total_ratio}"
|
|
409
|
-
end
|
|
410
|
-
if need_frequency.size > 0
|
|
411
|
-
need_frequency.each do |val|
|
|
412
|
-
$stderr.puts "Setting frequency!"
|
|
413
|
-
val.set_frequency( pephits )
|
|
414
|
-
end
|
|
415
|
-
end
|
|
416
|
-
end
|
|
417
|
-
end
|
|
418
|
-
|
|
419
|
-
if (transmem_vals.size > 0) # and interactive ## we'd like to just run this for interactive
|
|
420
|
-
# This is overkill if we are doing a single filtering job, but it
|
|
421
|
-
# ensures that it works in all the ways I'm doing it. Should
|
|
422
|
-
# refactor eventually !!
|
|
423
|
-
transmem_vals.each do |val| ## but, prob uses it too!
|
|
424
|
-
val.transmem_status_hash = val.create_transmem_status_hash(spec_id.peps)
|
|
425
|
-
end
|
|
426
|
-
end
|
|
427
|
-
validators
|
|
428
|
-
|
|
429
|
-
end
|
|
430
|
-
|
|
431
|
-
end
|
data/lib/validator/decoy.rb
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
require 'validator'
|
|
2
|
-
|
|
3
|
-
class Validator::Decoy < Validator
|
|
4
|
-
include Precision::Calculator::Decoy
|
|
5
|
-
|
|
6
|
-
# a Regexp (if concatenated) or a String (the filename of separate run)
|
|
7
|
-
attr_accessor :constraint
|
|
8
|
-
|
|
9
|
-
attr_accessor :decoy_on_match
|
|
10
|
-
attr_accessor :correct_wins
|
|
11
|
-
# This is the the number of incorrect target hits over the total decoy hits
|
|
12
|
-
# The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
|
|
13
|
-
# The rough, conservative ballpark estimate is the ratio of target hits to
|
|
14
|
-
# decoy hits. This can be refined by removing the number of true target
|
|
15
|
-
# hits from the targets used to calculate it.
|
|
16
|
-
attr_accessor :frit
|
|
17
|
-
|
|
18
|
-
attr_accessor :last_pep_was_decoy
|
|
19
|
-
|
|
20
|
-
attr_accessor :increment_normal
|
|
21
|
-
attr_accessor :increment_decoy
|
|
22
|
-
attr_accessor :increment_total_submitted
|
|
23
|
-
|
|
24
|
-
attr_reader :normal_peps_just_submitted
|
|
25
|
-
|
|
26
|
-
DEFAULTS = {
|
|
27
|
-
:decoy_on_match => true,
|
|
28
|
-
:correct_wins => true,
|
|
29
|
-
:frit => 1.0,
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
def initialize(opts={})
|
|
33
|
-
merged = DEFAULTS.merge(opts)
|
|
34
|
-
@constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# returns [normal, decoy] (?? I think ??)
|
|
38
|
-
# reads the full protein reference
|
|
39
|
-
def partition(peps)
|
|
40
|
-
if @decoy_on_match
|
|
41
|
-
if @correct_wins
|
|
42
|
-
peps.partition do |pep|
|
|
43
|
-
!(pep.prots.all? {|prot| prot.reference.match(@constraint) })
|
|
44
|
-
end
|
|
45
|
-
else # fp wins
|
|
46
|
-
peps.partition do |pep|
|
|
47
|
-
!(pep.prots.any? {|prot| prot.reference.match(@constraint) })
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
else
|
|
51
|
-
if @correct_wins
|
|
52
|
-
peps.partition do |pep|
|
|
53
|
-
pep.prots.any? {|prot| prot.reference.match(@constraint) }
|
|
54
|
-
end
|
|
55
|
-
else
|
|
56
|
-
peps.partition do |pep|
|
|
57
|
-
pep.prots.all? {|prot| prot.reference.match(@constraint) }
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def initialize_increment
|
|
64
|
-
@increment_normal = 0
|
|
65
|
-
@increment_decoy = 0
|
|
66
|
-
@increment_total_submitted = 0
|
|
67
|
-
@increment_initialized = true
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
# does not deal in separate_peps right now!!
|
|
72
|
-
# will take an array or single peptide
|
|
73
|
-
def increment_pephits_precision(peps)
|
|
74
|
-
tmp = $VERBOSE; $VERBOSE = nil
|
|
75
|
-
initialize_increment unless @increment_initialized
|
|
76
|
-
$VERBOSE = tmp
|
|
77
|
-
|
|
78
|
-
to_submit =
|
|
79
|
-
if peps.is_a? SpecID::Pep
|
|
80
|
-
[peps]
|
|
81
|
-
else
|
|
82
|
-
peps
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
@increment_total_submitted += to_submit.size
|
|
86
|
-
(normal, decoy) = partition(to_submit)
|
|
87
|
-
@normal_peps_just_submitted = normal
|
|
88
|
-
@increment_normal += normal.size
|
|
89
|
-
@increment_decoy += decoy.size
|
|
90
|
-
calc_precision(@increment_normal, @increment_decoy, @frit)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def pephit_precision(peps, separate_peps=nil)
|
|
94
|
-
if separate_peps
|
|
95
|
-
calc_precision(peps.size, separate_peps.size, @frit)
|
|
96
|
-
else
|
|
97
|
-
(norm, decoy) = partition(peps)
|
|
98
|
-
calc_precision(norm.size, decoy.size, @frit)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def to_param_string
|
|
103
|
-
"decoy="+ ["{constraint=#{(constraint ? constraint.inspect : '')}", "decoy_on_match=#{@decoy_on_match}", "correct_wins=#{@correct_wins}}"].join(", ")
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
require 'validator'
|
|
2
|
-
require 'fasta'
|
|
3
|
-
require 'spec_id/sequest/params'
|
|
4
|
-
|
|
5
|
-
# objects of this class can calculate pephit_precision given an array of
|
|
6
|
-
# SpecID::Pep objects using the pephit_precision method.
|
|
7
|
-
class Validator::DigestionBased < Validator
|
|
8
|
-
DEFAULTS = {
|
|
9
|
-
#:false_to_total_ratio => 1.0, # disable because this needs to be set
|
|
10
|
-
# explicitly
|
|
11
|
-
:background => 0.0,
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
# the number of tps
|
|
15
|
-
attr_accessor :increment_tps
|
|
16
|
-
# the number of fps
|
|
17
|
-
attr_accessor :increment_fps
|
|
18
|
-
|
|
19
|
-
# the total peptides submitted to the validator (regardless of tp, fp, or
|
|
20
|
-
# nil)
|
|
21
|
-
attr_accessor :increment_total_submitted
|
|
22
|
-
|
|
23
|
-
# the ratio of false hits to total peptides in the fasta file
|
|
24
|
-
attr_accessor :false_to_total_ratio
|
|
25
|
-
|
|
26
|
-
# the false_to_total_ratio calculated (but not applied)
|
|
27
|
-
attr_reader :calculated_background
|
|
28
|
-
|
|
29
|
-
# For a sample with no false hits in it, (under defaults) this is the
|
|
30
|
-
# fraction of peptides with the constraint over the total number of peptides
|
|
31
|
-
# from which these hits are derived.
|
|
32
|
-
attr_accessor :background
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# expects that classes define a partition method, and a @background
|
|
36
|
-
def pephit_precision(peps)
|
|
37
|
-
## this gives us the fraction that are transmembrane (under defaults):
|
|
38
|
-
(tps, fps) = partition(peps)
|
|
39
|
-
(num_tps, num_fps) = calc_precision_prep(tps.size, fps.size)
|
|
40
|
-
calc_precision(num_tps, num_fps)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# returns [num_tps, num_fps]
|
|
44
|
-
def calc_precision_prep(num_tps, num_fps)
|
|
45
|
-
total_peps_passing_partition = num_tps + num_fps
|
|
46
|
-
num_fps = adjust_fps_for_background(num_tps, num_fps, background)
|
|
47
|
-
## we must use the false_to_total_ratio to estimate how many are really
|
|
48
|
-
## incorrect!
|
|
49
|
-
# FALSE/TOTAL = FALSE(found)/TOTAL(found)
|
|
50
|
-
# TOTAL(found) = FALSE(found) * TOTAL/FALSE
|
|
51
|
-
# = FALSE(found) / (FALSE/TOTAL)
|
|
52
|
-
total_false = num_fps / false_to_total_ratio
|
|
53
|
-
# NOTE: the partition algorithm drops peptides that are transmembrane
|
|
54
|
-
# under certain options. Thus, the total false estimate must be tempered
|
|
55
|
-
# by this lower number of total peptides.
|
|
56
|
-
adjusted_tps = total_peps_passing_partition.to_f - total_false
|
|
57
|
-
[adjusted_tps, total_false]
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# returns self
|
|
61
|
-
# assumes partition returns (tps, fps)
|
|
62
|
-
def set_false_to_total_ratio(peps)
|
|
63
|
-
(tps, fps) = partition(peps)
|
|
64
|
-
self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
|
|
65
|
-
self
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
# calculates precision based on the Benjamini-Hochberg FDR method.
|
|
3
|
-
# @TODO: class should probably be renamed to reflect method used!
|
|
4
|
-
# or options given to specify different methods (i.e., q-value)??
|
|
5
|
-
class Validator::Probability
|
|
6
|
-
|
|
7
|
-
attr_accessor :prob_method
|
|
8
|
-
|
|
9
|
-
def initialize(prob_method=:probability)
|
|
10
|
-
@prob_method = prob_method
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
# objs should respond_to probability
|
|
14
|
-
def precision(objs)
|
|
15
|
-
return 1.0 if objs.size == 0
|
|
16
|
-
|
|
17
|
-
current_sum_one_minus_prob = 0.0
|
|
18
|
-
|
|
19
|
-
# this should work!
|
|
20
|
-
#objs.inject(0.0) {|sum,obj| sum + (1.0 - obj.probability) }
|
|
21
|
-
|
|
22
|
-
objs.each do |obj|
|
|
23
|
-
# SUM(1-probX)/#objs
|
|
24
|
-
current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
|
|
25
|
-
end
|
|
26
|
-
prec = 1.0 - (current_sum_one_minus_prob / objs.size)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
# objs should respond_to probability
|
|
31
|
-
# These should be added from high probability(1.0) to low (0.0)
|
|
32
|
-
def increment_precision(objs)
|
|
33
|
-
if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
|
|
34
|
-
objs = [objs]
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
@total_objs ||= 0
|
|
38
|
-
@current_sum_one_minus_prob ||= 0.0
|
|
39
|
-
|
|
40
|
-
@total_objs += objs.size
|
|
41
|
-
objs.each do |obj|
|
|
42
|
-
@current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
|
|
43
|
-
end
|
|
44
|
-
prec = 1.0 - (@current_sum_one_minus_prob / @total_objs)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
alias_method :pephit_precision, :precision
|
|
49
|
-
alias_method :prothit_precision, :precision
|
|
50
|
-
alias_method :increment_pephits_precision, :increment_precision
|
|
51
|
-
end
|