mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'validator/cmdline'
|
|
3
|
-
require 'spec_id'
|
|
4
|
-
|
|
5
|
-
module SpecID
|
|
6
|
-
module Precision
|
|
7
|
-
class Prob
|
|
8
|
-
class CmdlineParser
|
|
9
|
-
|
|
10
|
-
DEFAULTS = SpecID::Precision::Prob::PN_DEFAULTS.merge( { :output => [[:csv, nil]], } )
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
COMMAND_LINE = {
|
|
14
|
-
:sort_by_init => ['--sort_by_init', "sort the proteins based on init probability"],
|
|
15
|
-
:perc_qval => ['--perc_qval', "use percolator q-values to calculate precision"],
|
|
16
|
-
:to_qvalues => ['--to_qvalues', "transform probabilities into q-values",
|
|
17
|
-
"(includes pi_0 correction)",
|
|
18
|
-
"uses PROB [TYPE] if given and supercedes",
|
|
19
|
-
"the prob validation type",
|
|
20
|
-
"*NOTE: include all PeptideProphet results",
|
|
21
|
-
"(don't use any low prob cutoff) for",
|
|
22
|
-
"accurate results!"],
|
|
23
|
-
:prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
|
|
24
|
-
"TYPE = nsp [default] prophet nsp",
|
|
25
|
-
" (nsp also should be used for PeptideProphet results)",
|
|
26
|
-
" = init (for ProteinProphet results) use initial",
|
|
27
|
-
"probability instead of nsp probability",
|
|
28
|
-
],
|
|
29
|
-
# OUTPUT
|
|
30
|
-
:proteins => ["--proteins", "includes proteins (and validation)"],
|
|
31
|
-
:output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
|
|
32
|
-
"can be used multiple times",
|
|
33
|
-
":FILENAME is the filename to use (defaults to STDOUT)",
|
|
34
|
-
"valid formats are:",
|
|
35
|
-
" csv (default)",
|
|
36
|
-
" to_plot",
|
|
37
|
-
" calc_bkg_to_plot",
|
|
38
|
-
" yaml",
|
|
39
|
-
#" protein_summary (need to implement)",
|
|
40
|
-
#" html_table (need to implement)"
|
|
41
|
-
],
|
|
42
|
-
|
|
43
|
-
# VALIDATION MODIFIERS:
|
|
44
|
-
:pephits => ["--pephits <file>.srg", "an srg file pointing to the srf files for",
|
|
45
|
-
"the given -prot.xml run",
|
|
46
|
-
"[this or --digestion must be used for applicable]",
|
|
47
|
-
"validators (validators depending on a",
|
|
48
|
-
"false/total ratio)]"],
|
|
49
|
-
}.merge( Validator::Cmdline::COMMAND_LINE )
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# returns (spec_id_obj, options, option_parser_obj)
|
|
53
|
-
def parse(args)
|
|
54
|
-
opts = {}
|
|
55
|
-
opts[:output] = []
|
|
56
|
-
@out_used = false
|
|
57
|
-
opts[:sequest] = {}
|
|
58
|
-
opts[:validators] = []
|
|
59
|
-
# defaults
|
|
60
|
-
|
|
61
|
-
option_parser = OptionParser.new do |op|
|
|
62
|
-
def op.opt(arg, &block)
|
|
63
|
-
on(*COMMAND_LINE[arg], &block)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
def op.val_opt(arg, opts)
|
|
67
|
-
on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def op.exact_opt(opts, arg)
|
|
71
|
-
on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
op.banner = "USAGE: #{File.basename($0)} [OPTS] <file>-prot.xml | <file>.sqg"
|
|
75
|
-
op.separator ""
|
|
76
|
-
op.separator " RETURNS: precision across the number of hits"
|
|
77
|
-
op.separator " (based on probability or q-value)"
|
|
78
|
-
op.separator " (optional) other validation of the results."
|
|
79
|
-
op.separator ""
|
|
80
|
-
|
|
81
|
-
op.separator "OUTPUT OPTIONS: "
|
|
82
|
-
op.opt(:proteins) {|v| opts[:proteins] = true }
|
|
83
|
-
op.opt(:output) do |output|
|
|
84
|
-
# copied from rspec:
|
|
85
|
-
# This funky regexp checks whether we have a FILE_NAME or not
|
|
86
|
-
where = nil
|
|
87
|
-
if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
|
|
88
|
-
output = $1
|
|
89
|
-
where = $2
|
|
90
|
-
else
|
|
91
|
-
raise "When using several --output options only one of them can be without a file" if @out_used
|
|
92
|
-
@out_used = true
|
|
93
|
-
end
|
|
94
|
-
opts[:output] << [output, where]
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
op.separator "GENERAL OPTIONS:"
|
|
98
|
-
op.separator ""
|
|
99
|
-
op.opt(:sort_by_init) {|v| opts[:sort_by_init] = true }
|
|
100
|
-
op.separator "VALIDATION OPTIONS: "
|
|
101
|
-
op.separator " each option will calculate the precision"
|
|
102
|
-
op.separator ""
|
|
103
|
-
|
|
104
|
-
op.val_opt(:prob, opts)
|
|
105
|
-
op.val_opt(:perc_qval, opts)
|
|
106
|
-
op.val_opt(:to_qvalues, opts)
|
|
107
|
-
op.val_opt(:decoy, opts)
|
|
108
|
-
op.val_opt(:pephits, opts) # sets opts[:ties] = false
|
|
109
|
-
op.val_opt(:digestion, opts)
|
|
110
|
-
op.val_opt(:bias, opts)
|
|
111
|
-
op.val_opt(:bad_aa, opts)
|
|
112
|
-
op.val_opt(:bad_aa_est, opts)
|
|
113
|
-
|
|
114
|
-
op.val_opt(:tmm, opts)
|
|
115
|
-
op.val_opt(:fasta, opts)
|
|
116
|
-
op.val_opt(:tps, opts)
|
|
117
|
-
|
|
118
|
-
op.separator ""
|
|
119
|
-
op.separator "VALIDATION MODIFIERS: "
|
|
120
|
-
op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
|
|
121
|
-
|
|
122
|
-
end
|
|
123
|
-
option_parser.parse!(args)
|
|
124
|
-
|
|
125
|
-
# prepare validators
|
|
126
|
-
|
|
127
|
-
if args.size > 0
|
|
128
|
-
spec_id_obj = ::SpecID.new(args[0])
|
|
129
|
-
if opts[:ties] == nil # will be nil or false
|
|
130
|
-
opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
|
|
131
|
-
end
|
|
132
|
-
postfilter =
|
|
133
|
-
if spec_id_obj.class == SQTGroup or spec_id_obj.class == Proph::PepSummary
|
|
134
|
-
#puts 'making background estimates with: top_per_scan'
|
|
135
|
-
:top_per_scan
|
|
136
|
-
else
|
|
137
|
-
#puts 'making background estimates with: top_per_aaseq_charge'
|
|
138
|
-
:top_per_aaseq_charge
|
|
139
|
-
end
|
|
140
|
-
|
|
141
|
-
opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], postfilter, spec_id_obj)
|
|
142
|
-
|
|
143
|
-
if opts[:output].size == 0
|
|
144
|
-
opts[:output] = DEFAULTS[:output]
|
|
145
|
-
end
|
|
146
|
-
else
|
|
147
|
-
spec_id_obj = nil
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
[spec_id_obj, opts, option_parser]
|
|
151
|
-
end # parse
|
|
152
|
-
end # CmdlineParser
|
|
153
|
-
end # Prob
|
|
154
|
-
end # Precision
|
|
155
|
-
end # SpecID
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
require 'yaml'
|
|
2
|
-
require 'spec_id/precision/output'
|
|
3
|
-
require 'table'
|
|
4
|
-
require 'matrix'
|
|
5
|
-
|
|
6
|
-
module SpecID ; end
|
|
7
|
-
module SpecID::Precision ; end
|
|
8
|
-
class SpecID::Precision::Prob ; end
|
|
9
|
-
class SpecID::Precision::Prob::Output
|
|
10
|
-
include SpecID::Precision::Output
|
|
11
|
-
|
|
12
|
-
# returns array of data arrays and parallel labels
|
|
13
|
-
def to_cols_and_labels(answer_hash)
|
|
14
|
-
col_labels = %w(count probability peptide)
|
|
15
|
-
col_labels[1] = 'q_values' if answer_hash.key?(:q_values)
|
|
16
|
-
|
|
17
|
-
cols = []
|
|
18
|
-
cols << answer_hash[:count]
|
|
19
|
-
if answer_hash.key?(:q_values)
|
|
20
|
-
cols << answer_hash[:q_values]
|
|
21
|
-
else
|
|
22
|
-
cols << answer_hash[:probabilities]
|
|
23
|
-
end
|
|
24
|
-
cols << answer_hash[:aaseqs]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# if there is a single modified peptide, we'll include the column
|
|
28
|
-
if answer_hash.key?(:modified_peptides)
|
|
29
|
-
cols << answer_hash[:modified_peptides]
|
|
30
|
-
col_labels.push( 'modified_peptide' )
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
col_labels.push( 'charge' )
|
|
34
|
-
cols << answer_hash[:charges]
|
|
35
|
-
|
|
36
|
-
answer_hash[:pephits_precision].each do |ans|
|
|
37
|
-
col_labels.push( "#{ans[:validator]} (prob)" )
|
|
38
|
-
cols << ans[:values]
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
[cols, col_labels]
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def csv(handle, answer_hash)
|
|
45
|
-
(cols, col_labels) = to_cols_and_labels(answer_hash)
|
|
46
|
-
table = Table.new(Matrix[*cols].transpose, nil, col_labels)
|
|
47
|
-
handle.puts(table.to_s("\t"))
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def to_plot(handle, answer_hash)
|
|
51
|
-
tp = 'XYData'
|
|
52
|
-
basename_noext =
|
|
53
|
-
if handle.respond_to?(:path)
|
|
54
|
-
out = File.basename(handle.path).sub(/\.(\w)+$/,'')
|
|
55
|
-
else
|
|
56
|
-
'plot'
|
|
57
|
-
end
|
|
58
|
-
title = 'precision vs. num (aaseq+charge)'
|
|
59
|
-
xlabel = 'num hits'
|
|
60
|
-
ylabel = 'precision'
|
|
61
|
-
[tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
|
|
62
|
-
answer_hash[:pephits_precision].each do |hash|
|
|
63
|
-
handle.puts hash[:validator] # label
|
|
64
|
-
handle.puts answer_hash[:count] # x vals
|
|
65
|
-
handle.puts hash[:values] # y vals
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
def calc_bkg_to_plot(handle, answer_hash)
|
|
70
|
-
tp = 'XYData'
|
|
71
|
-
basename_noext =
|
|
72
|
-
if handle.respond_to?(:path)
|
|
73
|
-
out = File.basename(handle.path).sub(/\.(\w)+$/,'')
|
|
74
|
-
else
|
|
75
|
-
'calc_bkg_plot'
|
|
76
|
-
end
|
|
77
|
-
title = 'background vs. num (aaseq+charge)'
|
|
78
|
-
xlabel = 'num hits'
|
|
79
|
-
ylabel = 'background (false/total)'
|
|
80
|
-
[tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
|
|
81
|
-
answer_hash[:params][:validators].each do |hash|
|
|
82
|
-
handle.puts hash[:name] # label
|
|
83
|
-
handle.puts answer_hash[:count] # x vals
|
|
84
|
-
handle.puts hash[:calculated_backgrounds] # y vals
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def yaml(handle, answer_hash)
|
|
89
|
-
handle.puts answer_hash.to_yaml
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
# note that we require 'spec_id/precision/prob/cmdline' below!
|
|
2
|
-
|
|
3
|
-
require 'spec_id/precision/prob/output'
|
|
4
|
-
|
|
5
|
-
module SpecID ; end
|
|
6
|
-
module SpecID::Precision ; end
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
# for probability based spec identifications (true probabilities, not the
|
|
10
|
-
# bioworks p-value (which they call probability)).
|
|
11
|
-
class SpecID::Precision::Prob
|
|
12
|
-
|
|
13
|
-
PN_DEFAULTS = {
|
|
14
|
-
:proteins => false,
|
|
15
|
-
:validators => [],
|
|
16
|
-
:sort_by_init => false,
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
require 'spec_id/precision/prob/cmdline'
|
|
20
|
-
|
|
21
|
-
def precision_vs_num_hits_cmdline(args)
|
|
22
|
-
(spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
|
|
23
|
-
if spec_id_obj == nil
|
|
24
|
-
puts option_parser
|
|
25
|
-
return
|
|
26
|
-
end
|
|
27
|
-
final_answer = SpecID::Precision::Prob.new.precision_vs_num_hits(spec_id_obj, options)
|
|
28
|
-
options[:output].each do |output|
|
|
29
|
-
output[1] = $stdout unless output[1]
|
|
30
|
-
SpecID::Precision::Prob::Output.new(*output).print(final_answer).close
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# this is the way I was doing it:
|
|
35
|
-
# ajdusted = (1+R)*prec / (R*precision +1)
|
|
36
|
-
# # where R is the decoy_to_target ratio
|
|
37
|
-
|
|
38
|
-
# opts may include:
|
|
39
|
-
# :proteins => true|*false
|
|
40
|
-
# :validators => array of Validator objects
|
|
41
|
-
#
|
|
42
|
-
# This method will adjust the precision in the *probability* validators
|
|
43
|
-
# used in the decoy validator (both terms with pi_0 in the denominator go
|
|
44
|
-
# to zero if there is no decoy validator and the precision is not
|
|
45
|
-
# adjusted)
|
|
46
|
-
#
|
|
47
|
-
# ajdusted = (1+(1/pi_0))*prec / ((precision/pi_0) +1)
|
|
48
|
-
# # where pi_0 is the ratio incorrect target hits to total decoy hits
|
|
49
|
-
#
|
|
50
|
-
# NOTE: if you have decoy data, you MUST pass in a decoy validator for the
|
|
51
|
-
# decoy pephits to be removed from other validator analyses!
|
|
52
|
-
#
|
|
53
|
-
# returns a hash of data
|
|
54
|
-
# :pephits_precision => [{validator => <name>, values => [<precision>,...]},... ]
|
|
55
|
-
# :params => :validators => [array of validators] (includes
|
|
56
|
-
# :calculated_backgrounds)
|
|
57
|
-
# :aaseqs => array of aaseqs
|
|
58
|
-
# :charges => array of charge
|
|
59
|
-
# :modified_peptides => array of modified sequence (only included if
|
|
60
|
-
# applicable)
|
|
61
|
-
#
|
|
62
|
-
# NOTE: For protein prophet, the results are given on a peptide+charge
|
|
63
|
-
# basis.
|
|
64
|
-
#
|
|
65
|
-
# TODO: implement tihs guy:
|
|
66
|
-
# prothits_precision => {validator => <name>, values => {worst => ,
|
|
67
|
-
# normal, normal_stdev } }
|
|
68
|
-
def precision_vs_num_hits(spec_id, opts={})
|
|
69
|
-
|
|
70
|
-
opt = PN_DEFAULTS.merge(opts)
|
|
71
|
-
|
|
72
|
-
out = {}
|
|
73
|
-
num_pephits = [] # NOTE!: these are aaseq/aaseq_mod + charge for Prophet
|
|
74
|
-
val_hash = Hash.new {|hash,key| hash[key] = [] }
|
|
75
|
-
val_calc_bkg_hash = Hash.new {|hash,key| hash[key] = [] }
|
|
76
|
-
pepstrings = []
|
|
77
|
-
modified_peptides = []
|
|
78
|
-
pepcharges = []
|
|
79
|
-
probabilities = []
|
|
80
|
-
found_modified_peptide = false
|
|
81
|
-
|
|
82
|
-
check_precisions = []
|
|
83
|
-
check_precisions_decoy = []
|
|
84
|
-
|
|
85
|
-
# do we need to deal with decoy peptides? (true/false)
|
|
86
|
-
validators = opt[:validators].map
|
|
87
|
-
decoy_vals = validators.select {|val| val.class == Validator::Decoy }
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if decoy_vals.size > 1
|
|
91
|
-
raise(ArgumentError, "only one decoy validator allowed!")
|
|
92
|
-
else
|
|
93
|
-
decoy_val = decoy_vals.first
|
|
94
|
-
if decoy_val
|
|
95
|
-
pi_zero = decoy_val.pi_zero
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
validators.delete(decoy_val)
|
|
100
|
-
other_validators = validators
|
|
101
|
-
|
|
102
|
-
(probability_validators, other_validators) = other_validators.partition {|val| val.class == Validator::Probability }
|
|
103
|
-
if opt[:initial_probability]
|
|
104
|
-
probability_validators.each do |pv|
|
|
105
|
-
pv.prob_method = :initial_probability
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
n_count = 0
|
|
110
|
-
d_count = 0
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
# this is a peptide prophet
|
|
114
|
-
is_peptide_prophet =
|
|
115
|
-
if spec_id.peps.first.respond_to?(:fval) ; true
|
|
116
|
-
else ;false
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
use_q_value = other_validators.any? {|v| v.class == Validator::QValue }
|
|
120
|
-
|
|
121
|
-
## ORDER THE PEPTIDE HITS:
|
|
122
|
-
ordered_peps =
|
|
123
|
-
if use_q_value
|
|
124
|
-
spec_id.peps.sort_by {|v| v.q_value }
|
|
125
|
-
elsif is_peptide_prophet
|
|
126
|
-
spec_id.peps.reject {|v| v.probability == -1.0}.sort_by {|v| v.probability }.reverse
|
|
127
|
-
else
|
|
128
|
-
if opt[:sort_by_init]
|
|
129
|
-
spec_id.peps.sort_by{|v| [v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
|
|
130
|
-
else
|
|
131
|
-
spec_id.peps.sort_by{|v| [v.nsp_adjusted_probability, v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
# for probability based precision with decoy database (not using prophet's
|
|
136
|
-
# -d flag) we do this:
|
|
137
|
-
# foreach peptide.sorted_by_probability
|
|
138
|
-
# 1. update the running precision of the validator REGARDLESS of
|
|
139
|
-
# decoy/target status of peptide. the internal hit counts are
|
|
140
|
-
# incremented.
|
|
141
|
-
# 2. only increment reported HIT COUNTS on a non-decoy hit and record
|
|
142
|
-
# the precision as (1+R)*prec / (R*precision +1) where R is the ratio of
|
|
143
|
-
# decoy hits to target hits. If it is 1:1 (R = 1) then this becomes:
|
|
144
|
-
# 2*prec / (prec + 1)
|
|
145
|
-
|
|
146
|
-
## WORK THROUGH EACH PEPTIDE:
|
|
147
|
-
ordered_peps.each_with_index do |pep,i|
|
|
148
|
-
# probability validators must work on the entire set of normal and decoy
|
|
149
|
-
|
|
150
|
-
last_prob_values = probability_validators.map do |val|
|
|
151
|
-
reply = val.increment_pephits_precision(pep)
|
|
152
|
-
check_precisions << reply
|
|
153
|
-
reply
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
it_is_a_normal_pep =
|
|
157
|
-
if decoy_val
|
|
158
|
-
# get the decoy precision
|
|
159
|
-
decoy_precision = decoy_val.increment_pephits_precision(pep)
|
|
160
|
-
|
|
161
|
-
# continue with ONLY normal peptides
|
|
162
|
-
is_normal = (decoy_val.normal_peps_just_submitted.size > 0)
|
|
163
|
-
else
|
|
164
|
-
true
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
if it_is_a_normal_pep
|
|
168
|
-
check_precisions_decoy << false
|
|
169
|
-
else
|
|
170
|
-
check_precisions_decoy << true
|
|
171
|
-
end
|
|
172
|
-
|
|
173
|
-
if it_is_a_normal_pep
|
|
174
|
-
n_count += 1
|
|
175
|
-
|
|
176
|
-
# UPDATE validators:
|
|
177
|
-
val_hash[decoy_val].push(decoy_precision) if decoy_val
|
|
178
|
-
probability_validators.zip(last_prob_values) do |val,prec|
|
|
179
|
-
if decoy_val
|
|
180
|
-
raise ArgumentError, "pi_zero in decoy validator must not == 0" if pi_zero == 0
|
|
181
|
-
val_hash[val].push( ((1.0/pi_zero+1.0)*prec) / ((prec/pi_zero) + 1.0) )
|
|
182
|
-
else
|
|
183
|
-
val_hash[val] << prec
|
|
184
|
-
end
|
|
185
|
-
end
|
|
186
|
-
other_validators.each do |val|
|
|
187
|
-
val_hash[val] << val.increment_pephits_precision(pep)
|
|
188
|
-
if val.is_a? Validator::DigestionBased
|
|
189
|
-
val_calc_bkg_hash[val] << val.calculated_background
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
# UPDATE other basic useful information:
|
|
194
|
-
if pep.respond_to?(:mod_info)
|
|
195
|
-
modified_pep_string =
|
|
196
|
-
if pep.mod_info
|
|
197
|
-
found_modified_peptide = true
|
|
198
|
-
pep.mod_info.modified_peptide
|
|
199
|
-
else
|
|
200
|
-
nil
|
|
201
|
-
end
|
|
202
|
-
modified_peptides << modified_pep_string
|
|
203
|
-
else
|
|
204
|
-
modified_pep_string =
|
|
205
|
-
if pep.sequence =~ /[^A-Z\-\.]/
|
|
206
|
-
found_modified_peptide = true
|
|
207
|
-
pep.sequence
|
|
208
|
-
else
|
|
209
|
-
nil
|
|
210
|
-
end
|
|
211
|
-
modified_peptides << modified_pep_string
|
|
212
|
-
end
|
|
213
|
-
pepcharges << pep.charge
|
|
214
|
-
pepstrings << pep.aaseq
|
|
215
|
-
probabilities << pep.probability # this is the q_value if percolator
|
|
216
|
-
num_pephits << (i+1)
|
|
217
|
-
else
|
|
218
|
-
d_count += 1
|
|
219
|
-
end
|
|
220
|
-
end
|
|
221
|
-
if found_modified_peptide
|
|
222
|
-
out[:modified_peptides] = modified_peptides
|
|
223
|
-
end
|
|
224
|
-
if use_q_value
|
|
225
|
-
out[:q_values] = probabilities
|
|
226
|
-
else
|
|
227
|
-
out[:probabilities] = probabilities
|
|
228
|
-
end
|
|
229
|
-
# out[:pephits] = ordered_peps # just in case they want to see
|
|
230
|
-
out[:count] = num_pephits
|
|
231
|
-
out[:aaseqs] = pepstrings
|
|
232
|
-
out[:charges] = pepcharges
|
|
233
|
-
out[:pephits_precision] = opt[:validators].map do |val|
|
|
234
|
-
hsh = {}
|
|
235
|
-
hsh[:validator] = Validator::Validator_to_string[val.class.to_s]
|
|
236
|
-
hsh[:values] = val_hash[val]
|
|
237
|
-
hsh
|
|
238
|
-
end
|
|
239
|
-
out[:params] = {}
|
|
240
|
-
out[:params][:validators] = Validator.sensible_validator_hashes(opt[:validators]).zip(opt[:validators]).map do |hash,val|
|
|
241
|
-
hash.delete(:calculated_background)
|
|
242
|
-
hash[:calculated_backgrounds] = val_calc_bkg_hash[val]
|
|
243
|
-
hash
|
|
244
|
-
end
|
|
245
|
-
out
|
|
246
|
-
end
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'arrayclass'
|
|
3
|
-
require 'spec_id/sequest/pepxml'
|
|
4
|
-
require 'spec_id/parser/proph'
|
|
5
|
-
|
|
6
|
-
module Sequest ; end
|
|
7
|
-
class Sequest::PepXML ; end
|
|
8
|
-
class Sequest::PepXML::MSMSRunSummary ; end
|
|
9
|
-
class Sequest::PepXML::SearchHit ; end
|
|
10
|
-
|
|
11
|
-
module SpecID ; end
|
|
12
|
-
module SpecID::Prot ; end
|
|
13
|
-
module SpecID::Pep ; end
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
module Proph
|
|
18
|
-
|
|
19
|
-
class PepSummary
|
|
20
|
-
include SpecID
|
|
21
|
-
|
|
22
|
-
Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
|
|
23
|
-
|
|
24
|
-
# inherits prots and peps
|
|
25
|
-
|
|
26
|
-
# the protein groups
|
|
27
|
-
# currently these are just xml nodes returned!
|
|
28
|
-
attr_accessor :peptideprophet_summary
|
|
29
|
-
attr_accessor :msms_run_summaries
|
|
30
|
-
attr_accessor :version
|
|
31
|
-
|
|
32
|
-
def hi_prob_best ; true end
|
|
33
|
-
|
|
34
|
-
def get_version(file)
|
|
35
|
-
answer = nil
|
|
36
|
-
File.open(file) do |fh|
|
|
37
|
-
8.times do
|
|
38
|
-
line = fh.gets
|
|
39
|
-
answer =
|
|
40
|
-
if line =~ Filetype_and_version_re_new
|
|
41
|
-
$1.dup
|
|
42
|
-
end
|
|
43
|
-
break if answer
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
raise(ArgumentError, "couldn't detect version in #{file}") unless answer
|
|
47
|
-
answer
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def search_hit_class
|
|
51
|
-
PepSummary::Pep
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def initialize(file=nil)
|
|
55
|
-
if file
|
|
56
|
-
@version = get_version(file)
|
|
57
|
-
spec_id = SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# this is a SpecID::Pep (by interface: not including stuff yet)
|
|
63
|
-
class PepSummary::Pep < Sequest::PepXML::SearchHit
|
|
64
|
-
# aaseq is defined in SearchHit
|
|
65
|
-
|
|
66
|
-
%w(probability fval ntt nmc massd prots q_value).each do |guy|
|
|
67
|
-
self.add_member(guy)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# returns self
|
|
71
|
-
def from_pepxml_node(node)
|
|
72
|
-
super(node)
|
|
73
|
-
|
|
74
|
-
an_res = node.find_first('child::analysis_result')
|
|
75
|
-
pp_n = an_res.find_first('child::peptideprophet_result')
|
|
76
|
-
self.probability = pp_n['probability'].to_f
|
|
77
|
-
pp_n.find('descendant::parameter').each do |par_n|
|
|
78
|
-
case par_n['name']
|
|
79
|
-
when 'fval'
|
|
80
|
-
self.fval = par_n['value'].to_f
|
|
81
|
-
when 'ntt'
|
|
82
|
-
self.ntt = par_n['value'].to_i
|
|
83
|
-
when 'nmc'
|
|
84
|
-
self.nmc = par_n['value'].to_i
|
|
85
|
-
when 'massd'
|
|
86
|
-
self.massd = par_n['value'].to_f
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
self
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
|
|
94
|
-
|
|
95
|
-
class PepSummary::Prot
|
|
96
|
-
def first_entry ; self[0] end ## name
|
|
97
|
-
def reference ; self[0] + ' ' + self[1] end
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|