mspire 0.4.9 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/validator/cmdline.rb
DELETED
@@ -1,431 +0,0 @@
|
|
1
|
-
require 'validator'
|
2
|
-
|
3
|
-
require 'validator/true_pos'
|
4
|
-
require 'validator/aa'
|
5
|
-
require 'validator/aa_est'
|
6
|
-
require 'validator/bias'
|
7
|
-
require 'validator/decoy'
|
8
|
-
require 'validator/transmem'
|
9
|
-
require 'validator/probability'
|
10
|
-
require 'validator/q_value'
|
11
|
-
require 'validator/prot_from_pep'
|
12
|
-
|
13
|
-
## these all for a stupid check...
|
14
|
-
require 'spec_id/sqt'
|
15
|
-
require 'spec_id/proph/prot_summary'
|
16
|
-
require 'spec_id/proph/pep_summary'
|
17
|
-
|
18
|
-
class Validator::Cmdline
|
19
|
-
|
20
|
-
Validator_symbols_to_classes = {
|
21
|
-
:tmm => Validator::Transmem::Protein,
|
22
|
-
:decoy => Validator::Decoy,
|
23
|
-
:bad_aa => Validator::AA,
|
24
|
-
:bad_aa_est => Validator::AAEst,
|
25
|
-
:tps => Validator::TruePos,
|
26
|
-
:bias => Validator::Bias,
|
27
|
-
:prob => Validator::Probability,
|
28
|
-
:qval => Validator::QValue,
|
29
|
-
}
|
30
|
-
# was VAL_DEFAULTS
|
31
|
-
DEFAULTS = {
|
32
|
-
:tmm =>
|
33
|
-
{
|
34
|
-
# file
|
35
|
-
:min_num_tmm_seqs => 1,
|
36
|
-
:expect_soluble => true,
|
37
|
-
:no_include_tm_peps => 0.8,
|
38
|
-
:bkg => 0.0,
|
39
|
-
},
|
40
|
-
:decoy =>
|
41
|
-
{
|
42
|
-
:hits_together => true,
|
43
|
-
:decoy_on_match => true,
|
44
|
-
:frit => 1.0, # fraction incorrect targets (like PIT)
|
45
|
-
},
|
46
|
-
:bad_aa =>
|
47
|
-
{
|
48
|
-
:false_if_found => true,
|
49
|
-
:bkg => 0.0,
|
50
|
-
},
|
51
|
-
:bad_aa_est =>
|
52
|
-
{
|
53
|
-
:false_if_found => true,
|
54
|
-
:bkg => 0.0,
|
55
|
-
},
|
56
|
-
:bias =>
|
57
|
-
{
|
58
|
-
:bkg => 0.0,
|
59
|
-
:proteins_expected => true,
|
60
|
-
},
|
61
|
-
:ties => true,
|
62
|
-
}
|
63
|
-
COMMAND_LINE = {
|
64
|
-
:decoy => ["--decoy /REGEXP/|FILENAME[,PIT,DOM]", Array, "REGEXP for decoy proteins (catenated searches) or a",
|
65
|
-
"FILENAME of separate search on decoys.",
|
66
|
-
"All regular expressions must be surrounded by '/'",
|
67
|
-
"(no extended options [trailing modifiers]).",
|
68
|
-
"e.g., a run using concatenated reversed proteins that",
|
69
|
-
"includes 'REVERSE' in the fasta heading:",
|
70
|
-
" --decoy /REVERSE/",
|
71
|
-
"Anything fancier should be quoted:",
|
72
|
-
" --decoy '/^\\s*REVERSE/'",
|
73
|
-
"If decoys proteins were searched in a separate file,",
|
74
|
-
"then give the FILENAME (e.g., --decoy decoy.srg)",
|
75
|
-
"FRIT = Fraction Incorrect Targets (like",
|
76
|
-
"the PIT as a fraction) (default: #{DEFAULTS[:decoy][:frit]})",
|
77
|
-
"DOM = *true/false, decoy on match",],
|
78
|
-
:tps => ["--tps <fasta>", "for a completely defined sample, this is the",
|
79
|
-
"fasta file containing the true protein hits"],
|
80
|
-
# may require digestion:
|
81
|
-
:fasta => ["--fasta FASTA", "fasta file for phobius transmembrane",
|
82
|
-
"(needed if PEPS options is not false)"],
|
83
|
-
:digestion => ["--digestion ORIG_FASTA,PARAMS", Array, "[not recommended]",
|
84
|
-
"Creates the 'false/total' ratio with in silico",
|
85
|
-
"digestion. Otherwise, the 3rd-10th best hits (sorted by",
|
86
|
-
"xcorr) are used.",
|
87
|
-
"The following validators will use this",
|
88
|
-
"information (shared between them) if option given",
|
89
|
-
"ORIG_FASTA = the fasta file used to do the run",
|
90
|
-
"PARAMS = the params file used to do the run",],
|
91
|
-
:bias => ["--bias FASTA[,PE,BKG]", Array, "FASTA contains proteins expected to be in the sample",
|
92
|
-
"PE = *true|false proteins in fasta file expected in sample",
|
93
|
-
"BKG = Background frequency of fps (d: #{DEFAULTS[:bias][:bkg]})",],
|
94
|
-
:bad_aa => ["--bad_aa AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
|
95
|
-
"AA = The amino acid (e.g., 'C')",
|
96
|
-
"BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa][:bkg]}):",],
|
97
|
-
:bad_aa_est => ["--bad_aa_est AA,BKG]", Array, "An amino acid expected (or not expected) in legitimate hits",
|
98
|
-
"AA = The amino acid (e.g., 'C')",
|
99
|
-
"BKG = Background frequency of genuine pephits (d: #{DEFAULTS[:bad_aa_est][:bkg]}):",],
|
100
|
-
|
101
|
-
:tmm => ["--tmm <TM[,MIN,SOL,PEPS,BKG]>", Array, "TM = phobius.small or toppred.out file",
|
102
|
-
"phobius.small:",
|
103
|
-
"http://phobius.cgb.ki.se/",
|
104
|
-
"(select 'Short' output, and save output as file)",
|
105
|
-
"toppred.out:",
|
106
|
-
"http://bioweb.pasteur.fr/seqanal/interfaces/toppred.html",
|
107
|
-
"(output 'toppred.out' in 'New' or 'Xml' format)",
|
108
|
-
"MIN = Int, minimum number transmembrane seqs (def: #{DEFAULTS[:tmm][:min_num_tmm_seqs]})",
|
109
|
-
"SOL = true|false, this is a soluble fraction( def: #{DEFAULTS[:tmm][:expect_soluble]})",
|
110
|
-
"PEPS = Float | false, don't consider tm peps (>= fraction",
|
111
|
-
" tm content) (false skips) (def: #{DEFAULTS[:tmm][:no_include_tm_peps]})",
|
112
|
-
"BKG = Float , background contaminating insoluble (def: #{DEFAULTS[:tmm][:bkg]})"],
|
113
|
-
|
114
|
-
|
115
|
-
# VALIDATION MODIFIERS
|
116
|
-
:false_on_tie => ["--false_on_tie", "if peptide belongs to correct AND incorrect proteins",
|
117
|
-
"it will be counted as correct"],
|
118
|
-
|
119
|
-
}
|
120
|
-
|
121
|
-
def self.boolean(arg, default)
|
122
|
-
case arg
|
123
|
-
when 'true' ; true
|
124
|
-
when 'false' ; false
|
125
|
-
else ; default
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
PrepArgs = {
|
130
|
-
:prob => lambda {|ar, opts|
|
131
|
-
mthd =
|
132
|
-
if ar
|
133
|
-
if ar == 'nsp'
|
134
|
-
:probability
|
135
|
-
elsif ar == 'init'
|
136
|
-
:initial_probability
|
137
|
-
else
|
138
|
-
raise ArgumentError, "--prob [arg], optional arg can only be 'nsp' or 'init'!"
|
139
|
-
end
|
140
|
-
else
|
141
|
-
:probability
|
142
|
-
end
|
143
|
-
opts[:validators].push([:prob, mthd])
|
144
|
-
},
|
145
|
-
:perc_qval => lambda {|ar, opts| opts[:validators].push([:perc_qval]) },
|
146
|
-
:to_qvalues => lambda {|ar, opts| opts[:validators].push([:to_qvalues]) },
|
147
|
-
:decoy => lambda {|ar, opts|
|
148
|
-
myargs = [:decoy]
|
149
|
-
first_arg = ar[0]
|
150
|
-
val_opts = {}
|
151
|
-
val_opts[:constraint] =
|
152
|
-
if first_arg[0,1] == '/' and first_arg[-1,1] == '/'
|
153
|
-
# cast as a regular expression of has '/ /'
|
154
|
-
Regexp.new(first_arg[1...-1])
|
155
|
-
else
|
156
|
-
# assume that it is a filename
|
157
|
-
raise ArgumentError, "File does not exist: #{first_arg}\n(was this supposed to be a regular expression? if so, should be given: /#{first_arg}/)" unless File.exist?(first_arg)
|
158
|
-
first_arg
|
159
|
-
end
|
160
|
-
val_opts[:frit] = (ar[1] || DEFAULTS[:decoy][:frit]).to_f
|
161
|
-
val_opts[:decoy_on_match] = self.boolean(ar[2], DEFAULTS[:decoy][:decoy_on_match])
|
162
|
-
myargs.push(val_opts)
|
163
|
-
opts[:validators].push(myargs)
|
164
|
-
},
|
165
|
-
:fasta => lambda {|arg, opts|
|
166
|
-
opts[:fasta] = Fasta.new(arg)
|
167
|
-
},
|
168
|
-
:digestion => lambda {|ar, opts|
|
169
|
-
raise(ArgumentError, "need fasta and sequest params!") if ar.size != 2
|
170
|
-
opts[:digestion] = ar.dup
|
171
|
-
opts[:digestion_objects] = [Fasta.new(ar[0]), Sequest::Params.new(ar[1])]
|
172
|
-
},
|
173
|
-
:bias => lambda {|ar, opts|
|
174
|
-
myargs = [:bias]
|
175
|
-
myargs.push( Fasta.new(ar[0]) )
|
176
|
-
val_opts = {}
|
177
|
-
val_opts[:proteins_expected] = self.boolean(ar[1], DEFAULTS[:bias][:proteins_expected])
|
178
|
-
val_opts[:background] =
|
179
|
-
if ar[2]
|
180
|
-
ar[2].to_f
|
181
|
-
else
|
182
|
-
DEFAULTS[:bias][:bkg]
|
183
|
-
end
|
184
|
-
if ar[3]
|
185
|
-
val_opts[:false_to_total_ratio] = ar[3].to_f
|
186
|
-
end
|
187
|
-
myargs.push(val_opts)
|
188
|
-
opts[:validators].push(myargs)
|
189
|
-
},
|
190
|
-
:bad_aa => lambda {|ar, opts|
|
191
|
-
## GET the FREQUENCY
|
192
|
-
myargs = [:bad_aa]
|
193
|
-
myargs.push( ar[0] )
|
194
|
-
val_opts = {}
|
195
|
-
val_opts[:background] =
|
196
|
-
if ar[1]
|
197
|
-
ar[1].to_f
|
198
|
-
else
|
199
|
-
DEFAULTS[:bad_aa][:bkg]
|
200
|
-
end
|
201
|
-
if ar[2]
|
202
|
-
val_opts[:false_to_total_ratio] = ar[2].to_f
|
203
|
-
end
|
204
|
-
myargs.push(val_opts)
|
205
|
-
opts[:validators].push(myargs)
|
206
|
-
},
|
207
|
-
:bad_aa_est => lambda {|ar, opts|
|
208
|
-
## GET the FREQUENCY
|
209
|
-
myargs = [:bad_aa_est]
|
210
|
-
myargs.push( ar[0] )
|
211
|
-
val_opts = {}
|
212
|
-
val_opts[:background] =
|
213
|
-
if ar[1]
|
214
|
-
ar[1].to_f
|
215
|
-
else
|
216
|
-
DEFAULTS[:bad_aa_est][:bkg]
|
217
|
-
end
|
218
|
-
if ar[2]
|
219
|
-
val_opts[:frequency] = ar[2].to_f
|
220
|
-
end
|
221
|
-
myargs.push(val_opts)
|
222
|
-
opts[:validators].push(myargs)
|
223
|
-
},
|
224
|
-
|
225
|
-
:tmm => lambda {|ar, opts|
|
226
|
-
myargs = [:tmm]
|
227
|
-
myargs.push( ar[0] )
|
228
|
-
val_opts = {}
|
229
|
-
val_opts[:min_num_tms] =
|
230
|
-
if ar[1] ; ar[1].to_i
|
231
|
-
else ; DEFAULTS[:tmm][:min_num_tmm_seqs]
|
232
|
-
end
|
233
|
-
val_opts[:soluble_fraction] = self.boolean(ar[2], DEFAULTS[:tmm][:expect_soluble])
|
234
|
-
val_opts[:no_include_tm_peps] =
|
235
|
-
if ar[3]
|
236
|
-
case ar[3]
|
237
|
-
when 'false' ; false
|
238
|
-
else ; ar[3].to_f
|
239
|
-
end
|
240
|
-
else ; DEFAULTS[:tmm][:no_include_tm_peps]
|
241
|
-
end
|
242
|
-
val_opts[:background] =
|
243
|
-
if ar[4] ; ar[4].to_f
|
244
|
-
else ; DEFAULTS[:tmm][:bkg]
|
245
|
-
end
|
246
|
-
if ar[5]
|
247
|
-
val_opts[:false_to_total_ratio] = ar[5].to_f
|
248
|
-
end
|
249
|
-
myargs.push(val_opts)
|
250
|
-
opts[:validators].push( myargs )
|
251
|
-
},
|
252
|
-
:pephits => lambda {|v,opts| opts[:pephits] = SpecID.new(v) },
|
253
|
-
:tps => lambda {|v,opts| opts[:validators].push([:tps, Fasta.new(v)]) },
|
254
|
-
:false_on_tie => lambda {|v,opts| opts[:ties] = false },
|
255
|
-
}
|
256
|
-
|
257
|
-
def self.requires_pephits?(spec_id_obj)
|
258
|
-
case spec_id_obj
|
259
|
-
when Proph::ProtSummary : true
|
260
|
-
# at least currently (subject to change)
|
261
|
-
when Proph::PepSummary : true
|
262
|
-
when SQTGroup
|
263
|
-
if spec_id_obj.peps.first.respond_to?(:q_value)
|
264
|
-
# its percolator output and we don't have other hits to use
|
265
|
-
true
|
266
|
-
else
|
267
|
-
false
|
268
|
-
end
|
269
|
-
else ; false
|
270
|
-
end
|
271
|
-
end
|
272
|
-
|
273
|
-
# remove the keys from opts involved in validators and return an array
|
274
|
-
# of validators
|
275
|
-
# postfilter is one of :top_per_scan, :top_per_aaseq,
|
276
|
-
# :top_per_aaseq_charge (of which last two are subsets of scan)
|
277
|
-
def self.prepare_validators(opts, false_on_tie, interactive, postfilter, spec_id)
|
278
|
-
|
279
|
-
validator_args = opts[:validators]
|
280
|
-
if validator_args.any? {|v| v.first == :to_qvalues }
|
281
|
-
prob_val_args_ar = validator_args.select {|v| v.first == :prob }.first
|
282
|
-
prob_method =
|
283
|
-
if prob_val_args_ar && prob_val_args_ar[1]
|
284
|
-
prob_val_args_ar[1]
|
285
|
-
else
|
286
|
-
:probability
|
287
|
-
end
|
288
|
-
validator_args.reject! {|v| v.first == :prob }
|
289
|
-
|
290
|
-
require 'vec'
|
291
|
-
require 'qvalue'
|
292
|
-
|
293
|
-
# get a list of p-values
|
294
|
-
pvals = spec_id.peps.map do |pep|
|
295
|
-
val = 1.0 - pep.send(prob_method)
|
296
|
-
val = 1e-9 if val == 0
|
297
|
-
val
|
298
|
-
end
|
299
|
-
File.open("TMP_PVALUES.txt", 'w') {|v| v.puts pvals.sort.join(" ") }
|
300
|
-
pvals = VecD.new(pvals)
|
301
|
-
#qvals = pvals.qvalues(false, :lambda_vals => 0.30 )
|
302
|
-
qvals = pvals.qvalues
|
303
|
-
qvals.zip(spec_id.peps) do |qval,pep|
|
304
|
-
pep.q_value = qval
|
305
|
-
end
|
306
|
-
end
|
307
|
-
|
308
|
-
validator_args.map! do |v|
|
309
|
-
if v.first == :to_qvalues || v.first == :perc_qval
|
310
|
-
[:qval]
|
311
|
-
else
|
312
|
-
v
|
313
|
-
end
|
314
|
-
end
|
315
|
-
|
316
|
-
correct_wins = !false_on_tie
|
317
|
-
need_false_to_total_ratio = []
|
318
|
-
need_frequency = []
|
319
|
-
transmem_vals = []
|
320
|
-
validators = validator_args.map do |args|
|
321
|
-
tp = args.shift
|
322
|
-
val_args = args.dup # protect the original keys
|
323
|
-
val_args =
|
324
|
-
case tp
|
325
|
-
when :tmm
|
326
|
-
val_args[1][:correct_wins] = correct_wins
|
327
|
-
if opts.key?(:fasta)
|
328
|
-
val_args[1][:fasta] = opts[:fasta]
|
329
|
-
end
|
330
|
-
val_args
|
331
|
-
when :bias
|
332
|
-
val_args[1][:correct_wins] = correct_wins
|
333
|
-
val_args
|
334
|
-
when :tps
|
335
|
-
val_args = [val_args[0], correct_wins]
|
336
|
-
val_args
|
337
|
-
when :decoy
|
338
|
-
val_args[0][:correct_wins] = correct_wins
|
339
|
-
# don't delete the key here since we need the decoy = regexp key
|
340
|
-
val_args
|
341
|
-
else ## bad_aa, prob, and qval are represented here:
|
342
|
-
val_args
|
343
|
-
end
|
344
|
-
val = Validator_symbols_to_classes[tp].new( *val_args )
|
345
|
-
# make some lists of validators based on pre-processing needs:
|
346
|
-
if tp == :tmm
|
347
|
-
transmem_vals << val
|
348
|
-
end
|
349
|
-
potential_digestion_classes = /Transmem|AA|AAEst|Bias/
|
350
|
-
if val.class.to_s =~ potential_digestion_classes
|
351
|
-
if val.class.to_s == 'Validator::AAEst'
|
352
|
-
need_frequency.push(val) if val.frequency.nil?
|
353
|
-
elsif !(val.false_to_total_ratio.nil?)
|
354
|
-
$stderr.puts "using false_to_total_ratio: #{val.false_to_total_ratio}"
|
355
|
-
else
|
356
|
-
need_false_to_total_ratio << val
|
357
|
-
end
|
358
|
-
end
|
359
|
-
val
|
360
|
-
end
|
361
|
-
|
362
|
-
if ((need_false_to_total_ratio.size > 0) or (need_frequency.size > 0))
|
363
|
-
if opts.key?(:digestion_objects)
|
364
|
-
#raise ArgumentError, "requires --digestion fasta,params argument!" if !opts.key?(:digestion_objects)
|
365
|
-
peps = Digestor.digest( *(opts[:digestion_objects]) )
|
366
|
-
need_false_to_total_ratio.each do |val|
|
367
|
-
val.set_false_to_total_ratio( peps )
|
368
|
-
end
|
369
|
-
if need_frequency.size > 0
|
370
|
-
need_frequency.each do |val|
|
371
|
-
val.set_frequency( opts[:digestion_objects][0] )
|
372
|
-
end
|
373
|
-
end
|
374
|
-
opts.delete(:digestion_objects)
|
375
|
-
else ## do the new and improved selection of non-top hits to get false_to_total_ratios and freqs
|
376
|
-
$stderr.puts "...using pephits to calculate background ratios"
|
377
|
-
# first_index, last_index
|
378
|
-
pephits =
|
379
|
-
if opts[:pephits] ## protein prophet (since it needs to get ratios somewhere
|
380
|
-
$stderr.puts "using --pephits"
|
381
|
-
opts[:pephits].peps
|
382
|
-
elsif requires_pephits?(spec_id)
|
383
|
-
raise ArgumentError, "with objects of class '#{spec_id.class}', one of your validators requires --pephits or --digestion"
|
384
|
-
else
|
385
|
-
$stderr.puts "using given spec_id.peps"
|
386
|
-
spec_id.peps
|
387
|
-
end
|
388
|
-
|
389
|
-
not_first_or_second_peps = Sequest.other_hits_sorted_by_xcorr(pephits, 2, 9, [:base_name, :first_scan, :charge])
|
390
|
-
pephits =
|
391
|
-
case postfilter
|
392
|
-
when :top_per_scan
|
393
|
-
$stderr.puts "using top_per_scan" ; not_first_or_second_peps
|
394
|
-
when :top_per_aaseq
|
395
|
-
# it doesn't matter which one is given since validators are
|
396
|
-
# based on amino acid sequence
|
397
|
-
$stderr.puts 'using top_per_aaseq'
|
398
|
-
not_first_or_second_peps.hash_by(:aaseq).values.map {|pep| pep.first }
|
399
|
-
when :top_per_aaseq_charge
|
400
|
-
$stderr.puts 'using top_per_aaseq_charge'
|
401
|
-
not_first_or_second_peps.hash_by(:aaseq, :charge).values.map {|pep| pep.first }
|
402
|
-
else
|
403
|
-
raise ArgumentError, "must have a valid postfilter method, yours: '#{postfilter}'"
|
404
|
-
end
|
405
|
-
|
406
|
-
need_false_to_total_ratio.each do |val|
|
407
|
-
val.set_false_to_total_ratio( pephits )
|
408
|
-
$stderr.puts "false_to_total_ratio for #{val.class.to_s}: #{val.false_to_total_ratio}"
|
409
|
-
end
|
410
|
-
if need_frequency.size > 0
|
411
|
-
need_frequency.each do |val|
|
412
|
-
$stderr.puts "Setting frequency!"
|
413
|
-
val.set_frequency( pephits )
|
414
|
-
end
|
415
|
-
end
|
416
|
-
end
|
417
|
-
end
|
418
|
-
|
419
|
-
if (transmem_vals.size > 0) # and interactive ## we'd like to just run this for interactive
|
420
|
-
# This is overkill if we are doing a single filtering job, but it
|
421
|
-
# ensures that it works in all the ways I'm doing it. Should
|
422
|
-
# refactor eventually !!
|
423
|
-
transmem_vals.each do |val| ## but, prob uses it too!
|
424
|
-
val.transmem_status_hash = val.create_transmem_status_hash(spec_id.peps)
|
425
|
-
end
|
426
|
-
end
|
427
|
-
validators
|
428
|
-
|
429
|
-
end
|
430
|
-
|
431
|
-
end
|
data/lib/validator/decoy.rb
DELETED
@@ -1,107 +0,0 @@
|
|
1
|
-
require 'validator'
|
2
|
-
|
3
|
-
class Validator::Decoy < Validator
|
4
|
-
include Precision::Calculator::Decoy
|
5
|
-
|
6
|
-
# a Regexp (if concatenated) or a String (the filename of separate run)
|
7
|
-
attr_accessor :constraint
|
8
|
-
|
9
|
-
attr_accessor :decoy_on_match
|
10
|
-
attr_accessor :correct_wins
|
11
|
-
# This is the the number of incorrect target hits over the total decoy hits
|
12
|
-
# The percent incorrect targets (PIT) expressed as a fraction (== 1 - PI_0).
|
13
|
-
# The rough, conservative ballpark estimate is the ratio of target hits to
|
14
|
-
# decoy hits. This can be refined by removing the number of true target
|
15
|
-
# hits from the targets used to calculate it.
|
16
|
-
attr_accessor :frit
|
17
|
-
|
18
|
-
attr_accessor :last_pep_was_decoy
|
19
|
-
|
20
|
-
attr_accessor :increment_normal
|
21
|
-
attr_accessor :increment_decoy
|
22
|
-
attr_accessor :increment_total_submitted
|
23
|
-
|
24
|
-
attr_reader :normal_peps_just_submitted
|
25
|
-
|
26
|
-
DEFAULTS = {
|
27
|
-
:decoy_on_match => true,
|
28
|
-
:correct_wins => true,
|
29
|
-
:frit => 1.0,
|
30
|
-
}
|
31
|
-
|
32
|
-
def initialize(opts={})
|
33
|
-
merged = DEFAULTS.merge(opts)
|
34
|
-
@constraint, @decoy_on_match, @correct_wins, @frit = merged.values_at(:constraint, :decoy_on_match, :correct_wins, :frit)
|
35
|
-
end
|
36
|
-
|
37
|
-
# returns [normal, decoy] (?? I think ??)
|
38
|
-
# reads the full protein reference
|
39
|
-
def partition(peps)
|
40
|
-
if @decoy_on_match
|
41
|
-
if @correct_wins
|
42
|
-
peps.partition do |pep|
|
43
|
-
!(pep.prots.all? {|prot| prot.reference.match(@constraint) })
|
44
|
-
end
|
45
|
-
else # fp wins
|
46
|
-
peps.partition do |pep|
|
47
|
-
!(pep.prots.any? {|prot| prot.reference.match(@constraint) })
|
48
|
-
end
|
49
|
-
end
|
50
|
-
else
|
51
|
-
if @correct_wins
|
52
|
-
peps.partition do |pep|
|
53
|
-
pep.prots.any? {|prot| prot.reference.match(@constraint) }
|
54
|
-
end
|
55
|
-
else
|
56
|
-
peps.partition do |pep|
|
57
|
-
pep.prots.all? {|prot| prot.reference.match(@constraint) }
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def initialize_increment
|
64
|
-
@increment_normal = 0
|
65
|
-
@increment_decoy = 0
|
66
|
-
@increment_total_submitted = 0
|
67
|
-
@increment_initialized = true
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
# does not deal in separate_peps right now!!
|
72
|
-
# will take an array or single peptide
|
73
|
-
def increment_pephits_precision(peps)
|
74
|
-
tmp = $VERBOSE; $VERBOSE = nil
|
75
|
-
initialize_increment unless @increment_initialized
|
76
|
-
$VERBOSE = tmp
|
77
|
-
|
78
|
-
to_submit =
|
79
|
-
if peps.is_a? SpecID::Pep
|
80
|
-
[peps]
|
81
|
-
else
|
82
|
-
peps
|
83
|
-
end
|
84
|
-
|
85
|
-
@increment_total_submitted += to_submit.size
|
86
|
-
(normal, decoy) = partition(to_submit)
|
87
|
-
@normal_peps_just_submitted = normal
|
88
|
-
@increment_normal += normal.size
|
89
|
-
@increment_decoy += decoy.size
|
90
|
-
calc_precision(@increment_normal, @increment_decoy, @frit)
|
91
|
-
end
|
92
|
-
|
93
|
-
def pephit_precision(peps, separate_peps=nil)
|
94
|
-
if separate_peps
|
95
|
-
calc_precision(peps.size, separate_peps.size, @frit)
|
96
|
-
else
|
97
|
-
(norm, decoy) = partition(peps)
|
98
|
-
calc_precision(norm.size, decoy.size, @frit)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
def to_param_string
|
103
|
-
"decoy="+ ["{constraint=#{(constraint ? constraint.inspect : '')}", "decoy_on_match=#{@decoy_on_match}", "correct_wins=#{@correct_wins}}"].join(", ")
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
|
@@ -1,70 +0,0 @@
|
|
1
|
-
require 'validator'
|
2
|
-
require 'fasta'
|
3
|
-
require 'spec_id/sequest/params'
|
4
|
-
|
5
|
-
# objects of this class can calculate pephit_precision given an array of
|
6
|
-
# SpecID::Pep objects using the pephit_precision method.
|
7
|
-
class Validator::DigestionBased < Validator
|
8
|
-
DEFAULTS = {
|
9
|
-
#:false_to_total_ratio => 1.0, # disable because this needs to be set
|
10
|
-
# explicitly
|
11
|
-
:background => 0.0,
|
12
|
-
}
|
13
|
-
|
14
|
-
# the number of tps
|
15
|
-
attr_accessor :increment_tps
|
16
|
-
# the number of fps
|
17
|
-
attr_accessor :increment_fps
|
18
|
-
|
19
|
-
# the total peptides submitted to the validator (regardless of tp, fp, or
|
20
|
-
# nil)
|
21
|
-
attr_accessor :increment_total_submitted
|
22
|
-
|
23
|
-
# the ratio of false hits to total peptides in the fasta file
|
24
|
-
attr_accessor :false_to_total_ratio
|
25
|
-
|
26
|
-
# the false_to_total_ratio calculated (but not applied)
|
27
|
-
attr_reader :calculated_background
|
28
|
-
|
29
|
-
# For a sample with no false hits in it, (under defaults) this is the
|
30
|
-
# fraction of peptides with the constraint over the total number of peptides
|
31
|
-
# from which these hits are derived.
|
32
|
-
attr_accessor :background
|
33
|
-
|
34
|
-
|
35
|
-
# expects that classes define a partition method, and a @background
|
36
|
-
def pephit_precision(peps)
|
37
|
-
## this gives us the fraction that are transmembrane (under defaults):
|
38
|
-
(tps, fps) = partition(peps)
|
39
|
-
(num_tps, num_fps) = calc_precision_prep(tps.size, fps.size)
|
40
|
-
calc_precision(num_tps, num_fps)
|
41
|
-
end
|
42
|
-
|
43
|
-
# returns [num_tps, num_fps]
|
44
|
-
def calc_precision_prep(num_tps, num_fps)
|
45
|
-
total_peps_passing_partition = num_tps + num_fps
|
46
|
-
num_fps = adjust_fps_for_background(num_tps, num_fps, background)
|
47
|
-
## we must use the false_to_total_ratio to estimate how many are really
|
48
|
-
## incorrect!
|
49
|
-
# FALSE/TOTAL = FALSE(found)/TOTAL(found)
|
50
|
-
# TOTAL(found) = FALSE(found) * TOTAL/FALSE
|
51
|
-
# = FALSE(found) / (FALSE/TOTAL)
|
52
|
-
total_false = num_fps / false_to_total_ratio
|
53
|
-
# NOTE: the partition algorithm drops peptides that are transmembrane
|
54
|
-
# under certain options. Thus, the total false estimate must be tempered
|
55
|
-
# by this lower number of total peptides.
|
56
|
-
adjusted_tps = total_peps_passing_partition.to_f - total_false
|
57
|
-
[adjusted_tps, total_false]
|
58
|
-
end
|
59
|
-
|
60
|
-
# returns self
|
61
|
-
# assumes partition returns (tps, fps)
|
62
|
-
def set_false_to_total_ratio(peps)
|
63
|
-
(tps, fps) = partition(peps)
|
64
|
-
self.false_to_total_ratio = fps.size.to_f / (tps.size + fps.size)
|
65
|
-
self
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
|
@@ -1,51 +0,0 @@
|
|
1
|
-
|
2
|
-
# calculates precision based on the Benjamini-Hochberg FDR method.
|
3
|
-
# @TODO: class should probably be renamed to reflect method used!
|
4
|
-
# or options given to specify different methods (i.e., q-value)??
|
5
|
-
class Validator::Probability
|
6
|
-
|
7
|
-
attr_accessor :prob_method
|
8
|
-
|
9
|
-
def initialize(prob_method=:probability)
|
10
|
-
@prob_method = prob_method
|
11
|
-
end
|
12
|
-
|
13
|
-
# objs should respond_to probability
|
14
|
-
def precision(objs)
|
15
|
-
return 1.0 if objs.size == 0
|
16
|
-
|
17
|
-
current_sum_one_minus_prob = 0.0
|
18
|
-
|
19
|
-
# this should work!
|
20
|
-
#objs.inject(0.0) {|sum,obj| sum + (1.0 - obj.probability) }
|
21
|
-
|
22
|
-
objs.each do |obj|
|
23
|
-
# SUM(1-probX)/#objs
|
24
|
-
current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
|
25
|
-
end
|
26
|
-
prec = 1.0 - (current_sum_one_minus_prob / objs.size)
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
|
-
# objs should respond_to probability
|
31
|
-
# These should be added from high probability(1.0) to low (0.0)
|
32
|
-
def increment_precision(objs)
|
33
|
-
if objs.is_a?(SpecID::Pep) or objs.is_a?(SpecID::Prot)
|
34
|
-
objs = [objs]
|
35
|
-
end
|
36
|
-
|
37
|
-
@total_objs ||= 0
|
38
|
-
@current_sum_one_minus_prob ||= 0.0
|
39
|
-
|
40
|
-
@total_objs += objs.size
|
41
|
-
objs.each do |obj|
|
42
|
-
@current_sum_one_minus_prob += 1.0 - obj.send(@prob_method)
|
43
|
-
end
|
44
|
-
prec = 1.0 - (@current_sum_one_minus_prob / @total_objs)
|
45
|
-
end
|
46
|
-
|
47
|
-
|
48
|
-
alias_method :pephit_precision, :precision
|
49
|
-
alias_method :prothit_precision, :precision
|
50
|
-
alias_method :increment_pephits_precision, :increment_precision
|
51
|
-
end
|