mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
|
|
2
|
+
require 'validator/cmdline'
|
|
3
|
+
require 'spec_id'
|
|
4
|
+
|
|
5
|
+
module SpecID
|
|
6
|
+
module Precision
|
|
7
|
+
class Prob
|
|
8
|
+
class CmdlineParser
|
|
9
|
+
|
|
10
|
+
DEFAULTS = SpecID::Precision::Prob::PN_DEFAULTS.merge( { :output => [[:csv, nil]], } )
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
COMMAND_LINE = {
|
|
14
|
+
:sort_by_init => ['--sort_by_init', "sort the proteins based on init probability"],
|
|
15
|
+
:prob => ['--prob [TYPE]', "use prophet probabilites to calculate precision",
|
|
16
|
+
"TYPE = *nsp|init"],
|
|
17
|
+
# OUTPUT
|
|
18
|
+
:proteins => ["--proteins", "includes proteins (and validation)"],
|
|
19
|
+
:output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
|
|
20
|
+
"can be used multiple times",
|
|
21
|
+
":FILENAME is the filename to use (defaults to STDOUT)",
|
|
22
|
+
"valid formats are:",
|
|
23
|
+
" csv (default)",
|
|
24
|
+
" to_plot",
|
|
25
|
+
" calc_bkg_to_plot",
|
|
26
|
+
" yaml",
|
|
27
|
+
#" protein_summary (need to implement)",
|
|
28
|
+
#" html_table (need to implement)"
|
|
29
|
+
],
|
|
30
|
+
|
|
31
|
+
# VALIDATION MODIFIERS:
|
|
32
|
+
:hits_separate => ["--hits_separate", "target/decoy hits are normally together when choosing",
|
|
33
|
+
"the top hit per peptide (in prefilter and postfilter)",
|
|
34
|
+
"in BOTH catenated and separate searches. This flag",
|
|
35
|
+
"separates them when finding the top hit per scan.",
|
|
36
|
+
"[This option modifies behavior of --decoy options]"],
|
|
37
|
+
|
|
38
|
+
}.merge( Validator::Cmdline::COMMAND_LINE )
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# returns (spec_id_obj, options, option_parser_obj)
|
|
42
|
+
def parse(args)
|
|
43
|
+
opts = {}
|
|
44
|
+
opts[:output] = []
|
|
45
|
+
@out_used = false
|
|
46
|
+
opts[:sequest] = {}
|
|
47
|
+
opts[:validators] = []
|
|
48
|
+
# defaults
|
|
49
|
+
|
|
50
|
+
option_parser = OptionParser.new do |op|
|
|
51
|
+
def op.opt(arg, &block)
|
|
52
|
+
on(*COMMAND_LINE[arg], &block)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def op.val_opt(arg, opts)
|
|
56
|
+
on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def op.exact_opt(opts, arg)
|
|
60
|
+
on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
op.banner = "USAGE: #{File.basename($0)} [OPTS] <file>-prot.xml"
|
|
64
|
+
op.separator ""
|
|
65
|
+
op.separator " RETURNS: precision across the number of hits (based on probability)"
|
|
66
|
+
op.separator " (optional) other validation of the results."
|
|
67
|
+
op.separator ""
|
|
68
|
+
|
|
69
|
+
op.separator "OUTPUT OPTIONS: "
|
|
70
|
+
op.opt(:proteins) {|v| opts[:proteins] = true }
|
|
71
|
+
op.opt(:output) do |output|
|
|
72
|
+
# copied from rspec:
|
|
73
|
+
# This funky regexp checks whether we have a FILE_NAME or not
|
|
74
|
+
where = nil
|
|
75
|
+
if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
|
|
76
|
+
output = $1
|
|
77
|
+
where = $2
|
|
78
|
+
else
|
|
79
|
+
raise "When using several --output options only one of them can be without a file" if @out_used
|
|
80
|
+
@out_used = true
|
|
81
|
+
end
|
|
82
|
+
opts[:output] << [output, where]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
op.separator "GENERAL OPTIONS:"
|
|
86
|
+
op.separator ""
|
|
87
|
+
op.opt(:sort_by_init) {|v| opts[:sort_by_init] = true }
|
|
88
|
+
op.separator "VALIDATION OPTIONS: "
|
|
89
|
+
op.separator " each option will calculate the precision"
|
|
90
|
+
op.separator ""
|
|
91
|
+
|
|
92
|
+
op.val_opt(:prob, opts)
|
|
93
|
+
op.val_opt(:decoy, opts)
|
|
94
|
+
op.val_opt(:digestion, opts)
|
|
95
|
+
op.val_opt(:bias, opts)
|
|
96
|
+
op.val_opt(:bad_aa, opts)
|
|
97
|
+
|
|
98
|
+
op.val_opt(:tmm, opts)
|
|
99
|
+
op.val_opt(:tps, opts)
|
|
100
|
+
|
|
101
|
+
op.separator ""
|
|
102
|
+
op.separator "VALIDATION MODIFIERS: "
|
|
103
|
+
op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
|
|
104
|
+
|
|
105
|
+
end
|
|
106
|
+
option_parser.parse!(args)
|
|
107
|
+
|
|
108
|
+
# prepare validators
|
|
109
|
+
|
|
110
|
+
if args.size > 0
|
|
111
|
+
spec_id_obj =
|
|
112
|
+
if args[0] =~ /\.srf$/i
|
|
113
|
+
::SpecID.new(args)
|
|
114
|
+
else
|
|
115
|
+
::SpecID.new(args[0])
|
|
116
|
+
end
|
|
117
|
+
if opts[:ties] == nil # will be nil or false
|
|
118
|
+
opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
|
|
119
|
+
end
|
|
120
|
+
opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], spec_id_obj)
|
|
121
|
+
|
|
122
|
+
if opts[:output].size == 0
|
|
123
|
+
opts[:output] = DEFAULTS[:output]
|
|
124
|
+
end
|
|
125
|
+
else
|
|
126
|
+
spec_id_obj = nil
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
[spec_id_obj, opts, option_parser]
|
|
130
|
+
end # parse
|
|
131
|
+
end # CmdlineParser
|
|
132
|
+
end # Prob
|
|
133
|
+
end # Precision
|
|
134
|
+
end # SpecID
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
|
|
2
|
+
require 'spec_id/precision/output'
|
|
3
|
+
require 'table'
|
|
4
|
+
require 'matrix'
|
|
5
|
+
|
|
6
|
+
module SpecID ; end
|
|
7
|
+
module SpecID::Precision ; end
|
|
8
|
+
class SpecID::Precision::Prob ; end
|
|
9
|
+
class SpecID::Precision::Prob::Output
|
|
10
|
+
include SpecID::Precision::Output
|
|
11
|
+
|
|
12
|
+
# returns array of data arrays and parallel labels
|
|
13
|
+
def to_cols_and_labels(answer_hash)
|
|
14
|
+
col_labels = %w(count probability peptide)
|
|
15
|
+
|
|
16
|
+
cols = []
|
|
17
|
+
cols << answer_hash[:count]
|
|
18
|
+
cols << answer_hash[:probabilities]
|
|
19
|
+
cols << answer_hash[:aaseqs]
|
|
20
|
+
|
|
21
|
+
# if there is a single modified peptide, we'll include the column
|
|
22
|
+
if answer_hash.key?(:modified_peptides)
|
|
23
|
+
cols << answer_hash[:modified_peptides]
|
|
24
|
+
col_labels.push( 'modified_peptide' )
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
col_labels.push( 'charge' )
|
|
28
|
+
cols << answer_hash[:charges]
|
|
29
|
+
|
|
30
|
+
answer_hash[:pephits_precision].each do |ans|
|
|
31
|
+
col_labels.push( "#{ans[:validator]} (prob)" )
|
|
32
|
+
cols << ans[:values]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
[cols, col_labels]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def csv(handle, answer_hash)
|
|
39
|
+
(cols, col_labels) = to_cols_and_labels(answer_hash)
|
|
40
|
+
table = Table.new(Matrix[*cols].transpose, nil, col_labels)
|
|
41
|
+
handle.puts(table.to_s("\t"))
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def to_plot(handle, answer_hash)
|
|
45
|
+
tp = 'XYData'
|
|
46
|
+
basename_noext =
|
|
47
|
+
if handle.respond_to?(:path)
|
|
48
|
+
out = File.basename(handle.path).sub(/\.(\w)+$/,'')
|
|
49
|
+
else
|
|
50
|
+
'plot'
|
|
51
|
+
end
|
|
52
|
+
title = 'precision vs. num (aaseq+charge)'
|
|
53
|
+
xlabel = 'num hits'
|
|
54
|
+
ylabel = 'precision'
|
|
55
|
+
[tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
|
|
56
|
+
answer_hash[:pephits_precision].each do |hash|
|
|
57
|
+
handle.puts hash[:validator] # label
|
|
58
|
+
handle.puts answer_hash[:count] # x vals
|
|
59
|
+
handle.puts hash[:values] # y vals
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def calc_bkg_to_plot(handle, answer_hash)
|
|
64
|
+
tp = 'XYData'
|
|
65
|
+
basename_noext =
|
|
66
|
+
if handle.respond_to?(:path)
|
|
67
|
+
out = File.basename(handle.path).sub(/\.(\w)+$/,'')
|
|
68
|
+
else
|
|
69
|
+
'calc_bkg_plot'
|
|
70
|
+
end
|
|
71
|
+
title = 'background vs. num (aaseq+charge)'
|
|
72
|
+
xlabel = 'num hits'
|
|
73
|
+
ylabel = 'background (false/total)'
|
|
74
|
+
[tp, basename_noext, title, xlabel, ylabel].each {|v| handle.puts v }
|
|
75
|
+
answer_hash[:params][:validators].each do |hash|
|
|
76
|
+
handle.puts hash[:name] # label
|
|
77
|
+
handle.puts answer_hash[:count] # x vals
|
|
78
|
+
handle.puts hash[:calculated_backgrounds] # y vals
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def yaml(handle, answer_hash)
|
|
83
|
+
handle.puts answer_hash.to_yaml
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# note that we require 'spec_id/precision/prob/cmdline' below!
|
|
2
|
+
|
|
3
|
+
require 'spec_id/precision/prob/output'
|
|
4
|
+
|
|
5
|
+
module SpecID ; end
|
|
6
|
+
module SpecID::Precision ; end
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# for probability based spec identifications (true probabilities, not the
|
|
10
|
+
# bioworks p-value (which they call probability)).
|
|
11
|
+
class SpecID::Precision::Prob
|
|
12
|
+
|
|
13
|
+
PN_DEFAULTS = {
|
|
14
|
+
:proteins => false,
|
|
15
|
+
:validators => [],
|
|
16
|
+
:sort_by_init => false,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
require 'spec_id/precision/prob/cmdline'
|
|
20
|
+
|
|
21
|
+
def precision_vs_num_hits_cmdline(args)
|
|
22
|
+
(spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
|
|
23
|
+
if spec_id_obj == nil
|
|
24
|
+
puts option_parser
|
|
25
|
+
return
|
|
26
|
+
end
|
|
27
|
+
final_answer = SpecID::Precision::Prob.new.precision_vs_num_hits(spec_id_obj, options)
|
|
28
|
+
options[:output].each do |output|
|
|
29
|
+
output[1] = $stdout unless output[1]
|
|
30
|
+
SpecID::Precision::Prob::Output.new(*output).print(final_answer).close
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# opts may include:
|
|
35
|
+
# :proteins => true|*false
|
|
36
|
+
# :validators => array of Validator objects
|
|
37
|
+
# NOTE: if you have decoy data, you MUST pass in a decoy validator for the
|
|
38
|
+
# decoy pephits to be removed from other validator analyses!
|
|
39
|
+
# (precision based on peptide probabilities are adjusted to account for
|
|
40
|
+
# the decoy peptides being present: Precision(no_decoy) = (2*Prec)/(Prec+1)
|
|
41
|
+
# which is derived from the 50/50 rule for decoy vs. embedded false hits
|
|
42
|
+
#
|
|
43
|
+
# returns a hash of data
|
|
44
|
+
# :pephits_precision => [{validator => <name>, values => [<precision>,...]},... ]
|
|
45
|
+
# :params => :validators => [array of validators] (includes
|
|
46
|
+
# :calculated_backgrounds)
|
|
47
|
+
# :aaseqs => array of aaseqs
|
|
48
|
+
# :charges => array of charge
|
|
49
|
+
# :modified_peptides => array of modified sequence (only included if
|
|
50
|
+
# applicable)
|
|
51
|
+
#
|
|
52
|
+
#
|
|
53
|
+
# TODO: implement tihs guy:
|
|
54
|
+
# prothits_precision => {validator => <name>, values => {worst => ,
|
|
55
|
+
# normal, normal_stdev } }
|
|
56
|
+
def precision_vs_num_hits(spec_id, opts={})
|
|
57
|
+
|
|
58
|
+
opt = PN_DEFAULTS.merge(opts)
|
|
59
|
+
|
|
60
|
+
out = {}
|
|
61
|
+
num_pephits = [] # NOTE!: these are aaseq/aaseq_mod + charge (not really a pephit, but BEST)
|
|
62
|
+
val_hash = Hash.new {|hash,key| hash[key] = [] }
|
|
63
|
+
val_calc_bkg_hash = Hash.new {|hash,key| hash[key] = [] }
|
|
64
|
+
pepstrings = []
|
|
65
|
+
modified_peptides = []
|
|
66
|
+
pepcharges = []
|
|
67
|
+
probabilities = []
|
|
68
|
+
found_modified_peptide = false
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# do we need to deal with decoy peptides? (true/false)
|
|
72
|
+
validators = opt[:validators].map
|
|
73
|
+
decoy_vals = validators.select {|val| val.class == Validator::Decoy }
|
|
74
|
+
|
|
75
|
+
if decoy_vals.size > 1
|
|
76
|
+
raise(ArgumentError, "only one decoy validator allowed!")
|
|
77
|
+
else
|
|
78
|
+
decoy_val = decoy_vals.first
|
|
79
|
+
end
|
|
80
|
+
validators.delete(decoy_val)
|
|
81
|
+
other_validators = validators
|
|
82
|
+
|
|
83
|
+
(probability_validators, other_validators) = other_validators.partition {|val| val.class == Validator::Probability }
|
|
84
|
+
if opt[:initial_probability]
|
|
85
|
+
probability_validators.each do |pv|
|
|
86
|
+
pv.prob_method = :initial_probability
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
n_count = 0
|
|
91
|
+
d_count = 0
|
|
92
|
+
ordered_peps =
|
|
93
|
+
if opt[:sort_by_init]
|
|
94
|
+
spec_id.peps.sort_by{|v| [v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
|
|
95
|
+
else
|
|
96
|
+
spec_id.peps.sort_by{|v| [v.nsp_adjusted_probability, v.initial_probability, v.n_instances, ( v.is_nondegenerate_evidence ? 1 : 0 ), v.n_enzymatic_termini, ( v.is_contributing_evidence ? 1 : 0 ), v.n_sibling_peptides] }.reverse
|
|
97
|
+
end
|
|
98
|
+
ordered_peps.each_with_index do |pep,i|
|
|
99
|
+
# probability validators must work on the entire set of normal and decoy
|
|
100
|
+
|
|
101
|
+
last_prob_values = probability_validators.map do |val|
|
|
102
|
+
val.increment_pephits_precision(pep)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it_is_a_normal_pep =
|
|
106
|
+
if decoy_val
|
|
107
|
+
# get the decoy precision
|
|
108
|
+
decoy_precision = decoy_val.increment_pephits_precision(pep)
|
|
109
|
+
|
|
110
|
+
# continue with ONLY normal peptides
|
|
111
|
+
is_normal = (decoy_val.normal_peps_just_submitted.size > 0)
|
|
112
|
+
else
|
|
113
|
+
true
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
if it_is_a_normal_pep
|
|
117
|
+
n_count += 1
|
|
118
|
+
|
|
119
|
+
# UPDATE validators:
|
|
120
|
+
val_hash[decoy_val] << decoy_precision
|
|
121
|
+
probability_validators.zip(last_prob_values) do |val,prec|
|
|
122
|
+
val_hash[val] << ( (prec * 2.0) / (prec + 1.0) )
|
|
123
|
+
end
|
|
124
|
+
other_validators.each do |val|
|
|
125
|
+
val_hash[val] << val.increment_pephits_precision(pep)
|
|
126
|
+
if val.is_a? Validator::DigestionBased
|
|
127
|
+
val_calc_bkg_hash[val] << val.calculated_background
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# UPDATE other basic useful information:
|
|
132
|
+
modified_pep_string =
|
|
133
|
+
if pep.mod_info
|
|
134
|
+
found_modified_peptide = true
|
|
135
|
+
pep.mod_info.modified_peptide
|
|
136
|
+
else
|
|
137
|
+
nil
|
|
138
|
+
end
|
|
139
|
+
modified_peptides << modified_pep_string
|
|
140
|
+
pepcharges << pep.charge
|
|
141
|
+
pepstrings << pep.aaseq
|
|
142
|
+
probabilities << pep.probability
|
|
143
|
+
num_pephits << (i+1)
|
|
144
|
+
else
|
|
145
|
+
d_count += 1
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
if found_modified_peptide
|
|
149
|
+
out[:modified_peptides] = modified_peptides
|
|
150
|
+
end
|
|
151
|
+
out[:probabilities] = probabilities
|
|
152
|
+
out[:count] = num_pephits
|
|
153
|
+
out[:aaseqs] = pepstrings
|
|
154
|
+
out[:charges] = pepcharges
|
|
155
|
+
out[:pephits_precision] = opt[:validators].map do |val|
|
|
156
|
+
hsh = {}
|
|
157
|
+
hsh[:validator] = Validator::Validator_to_string[val.class.to_s]
|
|
158
|
+
hsh[:values] = val_hash[val]
|
|
159
|
+
hsh
|
|
160
|
+
end
|
|
161
|
+
out[:params] = {}
|
|
162
|
+
out[:params][:validators] = Validator.sensible_validator_hashes(opt[:validators]).zip(opt[:validators]).map do |hash,val|
|
|
163
|
+
hash.delete(:calculated_background)
|
|
164
|
+
hash[:calculated_backgrounds] = val_calc_bkg_hash[val]
|
|
165
|
+
hash
|
|
166
|
+
end
|
|
167
|
+
out
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
|
|
2
|
+
require 'array_class'
|
|
3
|
+
puts "REQUIRING"
|
|
4
|
+
puts( require 'spec_id/sequest/pepxml' )
|
|
5
|
+
require 'spec_id/parser/proph'
|
|
6
|
+
|
|
7
|
+
module Sequest ; end
|
|
8
|
+
class Sequest::PepXML ; end
|
|
9
|
+
class Sequest::PepXML::MSMSRunSummary ; end
|
|
10
|
+
class Sequest::PepXML::SearchHit ; end
|
|
11
|
+
|
|
12
|
+
module SpecID ; end
|
|
13
|
+
module SpecID::Prot ; end
|
|
14
|
+
module SpecID::Pep ; end
|
|
15
|
+
|
|
16
|
+
module Proph
|
|
17
|
+
|
|
18
|
+
class PepSummary < Sequest::PepXML::MSMSRunSummary
|
|
19
|
+
# MSMSRunSummary is a SpecID object!
|
|
20
|
+
|
|
21
|
+
Filetype_and_version_re_new = /version="PeptideProphet v([\d\.]+) /
|
|
22
|
+
|
|
23
|
+
# inherits prots and peps
|
|
24
|
+
|
|
25
|
+
# the protein groups
|
|
26
|
+
# currently these are just xml nodes returned!
|
|
27
|
+
attr_accessor :peptideprophet_summary
|
|
28
|
+
attr_accessor :spectrum_queries
|
|
29
|
+
attr_accessor :version
|
|
30
|
+
|
|
31
|
+
def hi_prob_best ; true end
|
|
32
|
+
|
|
33
|
+
def get_version(file)
|
|
34
|
+
answer = nil
|
|
35
|
+
File.open(file) do |fh|
|
|
36
|
+
8.times do
|
|
37
|
+
line = fh.gets
|
|
38
|
+
answer =
|
|
39
|
+
if line =~ Filetype_and_version_re_new
|
|
40
|
+
$1.dup
|
|
41
|
+
end
|
|
42
|
+
break if answer
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
raise(ArgumentError, "couldn't detect version in #{file}") unless answer
|
|
46
|
+
answer
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def search_hit_class
|
|
50
|
+
PepSummary::Pep
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def initialize(file=nil)
|
|
54
|
+
@prots = nil
|
|
55
|
+
if file
|
|
56
|
+
@version = get_version(file)
|
|
57
|
+
#@prot_groups = ProtSummary::Parser.new.parse_file(file)
|
|
58
|
+
SpecID::Parser::PepProph.new(:spec_id).parse(file, :spec_id => self)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
class PepSummary::Pep < Sequest::PepXML::SearchHit
|
|
64
|
+
%w(probability fval ntt nmc massd).each do |guy|
|
|
65
|
+
self.add_member(guy)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# returns self
|
|
69
|
+
def from_pepxml_node(node, spec_query)
|
|
70
|
+
super(node, spec_query)
|
|
71
|
+
#pp_n = node.find_first('descendant::peptideprophet_result')
|
|
72
|
+
an_res = node.find_first('child::analysis_result')
|
|
73
|
+
pp_n = an_res.find_first('child::peptideprophet_result')
|
|
74
|
+
self.probability = pp_n['probability'].to_f
|
|
75
|
+
pp_n.find('descendant::parameter').each do |par_n|
|
|
76
|
+
case par_n['name']
|
|
77
|
+
when 'fval'
|
|
78
|
+
self.fval = par_n['value'].to_f
|
|
79
|
+
when 'ntt'
|
|
80
|
+
self.ntt = par_n['value'].to_i
|
|
81
|
+
when 'nmc'
|
|
82
|
+
self.nmc = par_n['value'].to_i
|
|
83
|
+
when 'massd'
|
|
84
|
+
self.massd = par_n['value'].to_f
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
self
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
|