mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
require 'validator/cmdline'
|
|
2
|
+
require 'spec_id'
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module SpecID
|
|
6
|
+
module Precision
|
|
7
|
+
class Filter
|
|
8
|
+
class CmdlineParser
|
|
9
|
+
|
|
10
|
+
DEFAULTS = SpecID::Precision::Filter::FV_DEFAULTS.merge( { :output => [[:text_table,nil]], } )
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
COMMAND_LINE = {
|
|
14
|
+
# SEQUEST
|
|
15
|
+
:xcorr1 => ["-1", "--xcorr1 N", Float, "xcorr at +1 charge default: #{DEFAULTS[:sequest][:xcorr1]}"],
|
|
16
|
+
:xcorr2 => ["-2", "--xcorr2 N", Float, "xcorr at +2 charge default: #{DEFAULTS[:sequest][:xcorr2]}"],
|
|
17
|
+
:xcorr3 => ["-3", "--xcorr3 N", Float, "xcorr at +3 charge default: #{DEFAULTS[:sequest][:xcorr3]}"],
|
|
18
|
+
|
|
19
|
+
:deltacn => ["-d", "--deltacn N", Float, ">= deltacn default: #{DEFAULTS[:sequest][:deltacn]}"],
|
|
20
|
+
:ppm => ["-p", "--ppm N", Float, "<= ppm default: #{DEFAULTS[:sequest][:ppm]}",
|
|
21
|
+
"if bioworks.xml, then ppm = deltamass*10^6/mass"],
|
|
22
|
+
:no_deltacnstar => ["--no_deltacnstar", "Do not pass deltacn of top hit with no 2nd hit",
|
|
23
|
+
"(these are set at 1.1 by bioworks)"],
|
|
24
|
+
|
|
25
|
+
# OUTPUT
|
|
26
|
+
:proteins => ["--proteins", "includes proteins (and validation)"],
|
|
27
|
+
:output => ["-o", "--output format[:FILENAME]", "format to output filtering results.",
|
|
28
|
+
"can be used multiple times",
|
|
29
|
+
":FILENAME is the filename to use (defaults to STDOUT)",
|
|
30
|
+
"valid formats are:",
|
|
31
|
+
" text_table (default)",
|
|
32
|
+
" yaml",
|
|
33
|
+
#" protein_summary (need to implement)",
|
|
34
|
+
#" html_table (need to implement)"
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
# VALIDATION MODIFIERS:
|
|
38
|
+
:hits_separate => ["--hits_separate", "target/decoy hits are normally together when choosing",
|
|
39
|
+
"the top hit per peptide (in prefilter and postfilter)",
|
|
40
|
+
"in BOTH catenated and separate searches. This flag",
|
|
41
|
+
"separates them when finding the top hit per scan.",
|
|
42
|
+
"[This option modifies behavior of --decoy options]"],
|
|
43
|
+
|
|
44
|
+
# OTHER:
|
|
45
|
+
:prefilter => ["--prefilter", "finds the top hit per file+scan+charge and removes",
|
|
46
|
+
"others. Speeds up filtering with '--interactive'."],
|
|
47
|
+
:postfilter => ["--postfilter ARG", "ARG = top_per_scan | top_per_aaseq[_charge]",
|
|
48
|
+
"'top_per_scan' hashes on filename+scan.",
|
|
49
|
+
"'top_per_aaseq' hashes only on aaseq",
|
|
50
|
+
"'top_per_aaseq_charge' hashes on aaseq+charge."],
|
|
51
|
+
:top_hit_by => ["--top_hit_by ARG", "ARG = xcorr | probability (xcorr default)"],
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
:interactive => ["-i", "--interactive [FILENAME]", "interactive filtering",
|
|
56
|
+
"FILENAME given, then the interactive commands are",
|
|
57
|
+
"read out of that file. NOTE: The flag without the",
|
|
58
|
+
"filename must not be placed in front of an input",
|
|
59
|
+
"file argument! e.g., -i bioworks.xml # -> bad!",
|
|
60
|
+
"e.g., bioworks.xml -i # -> ok!"],
|
|
61
|
+
|
|
62
|
+
:interactive_verbose => ["--interactive_verbose", "give help and hints in interactive mode"],
|
|
63
|
+
|
|
64
|
+
}.merge( Validator::Cmdline::COMMAND_LINE )
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# returns (spec_id_obj, options, option_parser_obj)
|
|
68
|
+
def parse(args)
|
|
69
|
+
opts = {}
|
|
70
|
+
opts[:output] = []
|
|
71
|
+
@out_used = false
|
|
72
|
+
opts[:sequest] = {}
|
|
73
|
+
opts[:validators] = []
|
|
74
|
+
# defaults
|
|
75
|
+
|
|
76
|
+
option_parser = OptionParser.new do |op|
|
|
77
|
+
def op.opt(arg, &block)
|
|
78
|
+
on(*COMMAND_LINE[arg], &block)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def op.val_opt(arg, opts)
|
|
82
|
+
on(*COMMAND_LINE[arg]) {|ar| Validator::Cmdline::PrepArgs[arg].call(ar, opts) }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def op.exact_opt(opts, arg)
|
|
86
|
+
on(*COMMAND_LINE[arg]) {|v| opts[arg] = v}
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
op.banner = "USAGE: #{File.basename($0)} [OPTS] <bioworks.xml | bioworks.srg | .srf ....srf>"
|
|
90
|
+
op.separator ""
|
|
91
|
+
op.separator " EXPECTS: the multiconsensus XML export of Bioworks 3.X (bioworks.xml) -or- *.srf files"
|
|
92
|
+
op.separator " grouped together (bioworks.srg) [type 'srf_group.rb' at the cmd line]"
|
|
93
|
+
|
|
94
|
+
op.separator " multiple .srf files may also be entered."
|
|
95
|
+
op.separator " RETURNS: the number of peptides/proteins ID'd at given thresholds with"
|
|
96
|
+
op.separator " (optional) validation of the results."
|
|
97
|
+
op.separator ""
|
|
98
|
+
|
|
99
|
+
#op.separator("** 'dcn*' is the number of peptides with deltacn == 1.1")
|
|
100
|
+
#op.separator(" (these are peptides who are the only hit with xcorr > 0)")
|
|
101
|
+
op.separator "SEQUEST OPTIONS: "
|
|
102
|
+
op.exact_opt(opts[:sequest], :xcorr1)
|
|
103
|
+
op.exact_opt(opts[:sequest], :xcorr2)
|
|
104
|
+
op.exact_opt(opts[:sequest], :xcorr3)
|
|
105
|
+
op.exact_opt(opts[:sequest], :deltacn)
|
|
106
|
+
op.exact_opt(opts[:sequest], :ppm)
|
|
107
|
+
op.opt(:no_deltacnstar) {|v| opts[:sequest][:include_deltacnstar] = false}
|
|
108
|
+
op.separator ""
|
|
109
|
+
op.separator "OUTPUT OPTIONS: "
|
|
110
|
+
op.opt(:proteins) {|v| opts[:proteins] = true }
|
|
111
|
+
op.opt(:output) do |output|
|
|
112
|
+
# copied from rspec:
|
|
113
|
+
# This funky regexp checks whether we have a FILE_NAME or not
|
|
114
|
+
where = nil
|
|
115
|
+
if (output =~ /([a-zA-Z_]+(?:::[a-zA-Z_]+)*):?(.*)/) && ($2 != '')
|
|
116
|
+
output = $1
|
|
117
|
+
where = $2
|
|
118
|
+
else
|
|
119
|
+
raise "When using several --output options only one of them can be without a file" if @out_used
|
|
120
|
+
@out_used = true
|
|
121
|
+
end
|
|
122
|
+
opts[:output] << [output, where]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
op.separator ""
|
|
126
|
+
op.separator "VALIDATION OPTIONS: "
|
|
127
|
+
op.separator " each option will calculate the precision"
|
|
128
|
+
op.separator ""
|
|
129
|
+
|
|
130
|
+
op.val_opt(:decoy, opts)
|
|
131
|
+
op.val_opt(:digestion, opts)
|
|
132
|
+
op.val_opt(:bias, opts)
|
|
133
|
+
op.val_opt(:bad_aa, opts)
|
|
134
|
+
|
|
135
|
+
op.val_opt(:tmm, opts)
|
|
136
|
+
op.val_opt(:tps, opts)
|
|
137
|
+
|
|
138
|
+
op.separator ""
|
|
139
|
+
op.separator "VALIDATION MODIFIERS: "
|
|
140
|
+
op.val_opt(:false_on_tie, opts) # sets opts[:ties] = false
|
|
141
|
+
|
|
142
|
+
op.opt(:hits_separate) { opts[:hits_together] = false } # :top_hits_together
|
|
143
|
+
|
|
144
|
+
op.separator ""
|
|
145
|
+
op.separator "OTHER OPTIONS: "
|
|
146
|
+
op.opt(:interactive) {|v| opts[:interactive] = v }
|
|
147
|
+
op.opt(:interactive_verbose) {|v| opts[:interactive_verbose] = v }
|
|
148
|
+
|
|
149
|
+
op.opt(:top_hit_by) {|v| opts[:top_hit_by] = v.to_sym}
|
|
150
|
+
op.opt(:postfilter) {|v| opts[:postfilter] = v.to_sym}
|
|
151
|
+
op.opt(:prefilter) {|v| opts[:prefilter] = true }
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
#op.on("--yaml", "spits out yaml-ized data") {|v| opts[:tabulate = v }
|
|
155
|
+
#op.on("--combined_score", "shows the combined score") {|v| opts[:combined_score = v }
|
|
156
|
+
#op.on("--marshal", "will write marshaled data or read existing") {|v| opts[:marshal = v }
|
|
157
|
+
#op.on("--log <file>", "also writes all output to file") {|v| opts[:log = v }
|
|
158
|
+
### NEED TO IMPLEMENT THIS:
|
|
159
|
+
##op.on("--protein_summary", "writes passing proteins to .summary.html files") {|v| opts[:protein_summary = v }
|
|
160
|
+
#op.on("-z", "--occams_razor", "will show minimal set of proteins") {|v| opts[:occams_razor = v }
|
|
161
|
+
|
|
162
|
+
end
|
|
163
|
+
option_parser.parse!(args)
|
|
164
|
+
|
|
165
|
+
# prepare interactive object if necessary:
|
|
166
|
+
if v = opts[:interactive]
|
|
167
|
+
klass = SpecID::Precision::Filter::Interactive
|
|
168
|
+
if v.is_a? String
|
|
169
|
+
opts[:interactive] = klass.new(v, opts[:interactive_verbose])
|
|
170
|
+
else
|
|
171
|
+
opts[:interactive] = klass.new(nil, opts[:interactive_verbose])
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
opts[:sequest] = DEFAULTS[:sequest].merge(opts[:sequest])
|
|
177
|
+
|
|
178
|
+
# prepare validators
|
|
179
|
+
|
|
180
|
+
if args.size > 0
|
|
181
|
+
spec_id_obj =
|
|
182
|
+
if args[0] =~ /\.srf$/i
|
|
183
|
+
::SpecID.new(args)
|
|
184
|
+
else
|
|
185
|
+
::SpecID.new(args[0])
|
|
186
|
+
end
|
|
187
|
+
if opts[:ties] == nil # will be nil or false
|
|
188
|
+
opts[:ties] = Validator::Cmdline::DEFAULTS[:ties]
|
|
189
|
+
end
|
|
190
|
+
opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], spec_id_obj)
|
|
191
|
+
|
|
192
|
+
if opts[:output].size == 0
|
|
193
|
+
opts[:output] = DEFAULTS[:output]
|
|
194
|
+
end
|
|
195
|
+
else
|
|
196
|
+
spec_id_obj = nil
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
[spec_id_obj, opts, option_parser]
|
|
200
|
+
end # parse
|
|
201
|
+
end # CmdlineParser
|
|
202
|
+
end # Filter
|
|
203
|
+
end # Precision
|
|
204
|
+
end # SpecID
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
|
|
2
|
+
module SpecID ; end
|
|
3
|
+
module SpecID::Precision ; end
|
|
4
|
+
|
|
5
|
+
class SpecID::Precision::Filter
|
|
6
|
+
class Interactive
|
|
7
|
+
attr_accessor :file
|
|
8
|
+
attr_accessor :verbose
|
|
9
|
+
|
|
10
|
+
# the file contains the interactive commands
|
|
11
|
+
def initialize(file=nil, verbose=false)
|
|
12
|
+
@verbose = verbose
|
|
13
|
+
if file
|
|
14
|
+
@file = file
|
|
15
|
+
@lines = IO.readlines(file)
|
|
16
|
+
else
|
|
17
|
+
@lines = nil
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def passing(opts, answer)
|
|
22
|
+
puts "****************************************************" if @verbose
|
|
23
|
+
end
|
|
24
|
+
# takes opts and modifies the keys in question, or returns nil
|
|
25
|
+
# shortcut map takes each proper key and designates a shortcut (if any)
|
|
26
|
+
|
|
27
|
+
def filter_args(opts_to_change, changing_keys, shortcut_map, casting_map)
|
|
28
|
+
shortcut_order = changing_keys.map {|k| shortcut_map[k] }
|
|
29
|
+
casting_array = changing_keys.map {|k| casting_map[k] }
|
|
30
|
+
return_val = true
|
|
31
|
+
reply = nil
|
|
32
|
+
base_args = opts_to_change.values_at( *changing_keys )
|
|
33
|
+
#b = base_args
|
|
34
|
+
current_values = changing_keys.map {|v| "#{shortcut_map[v]}:#{opts_to_change[v]}" }
|
|
35
|
+
out(current_values.join(" ")) if @verbose
|
|
36
|
+
#out "#{b[0]} #{b[1]} #{b[2]} dcn:#{b[3]} ppm:#{b[4]}"
|
|
37
|
+
loop do
|
|
38
|
+
reply =
|
|
39
|
+
if @lines
|
|
40
|
+
if @lines.size > 0
|
|
41
|
+
@lines.shift.chomp
|
|
42
|
+
else
|
|
43
|
+
'q'
|
|
44
|
+
end
|
|
45
|
+
else
|
|
46
|
+
gets.chomp
|
|
47
|
+
end
|
|
48
|
+
answer = prep_reply(reply, base_args, shortcut_order, casting_array)
|
|
49
|
+
if answer == false
|
|
50
|
+
out(interactive_help(changing_keys, shortcut_map)) if @verbose
|
|
51
|
+
elsif answer == nil
|
|
52
|
+
return nil
|
|
53
|
+
else
|
|
54
|
+
answer.zip(changing_keys) do |newval,changing_key|
|
|
55
|
+
opts_to_change[changing_key] = newval
|
|
56
|
+
end
|
|
57
|
+
return_val = true
|
|
58
|
+
break
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
return_val
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def out(string)
|
|
65
|
+
puts string
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def interactive_help(changing_keys, shortcut_map)
|
|
69
|
+
shortcuts = changing_keys.map {|v| shortcut_map[v] }
|
|
70
|
+
as_array = shortcuts.map {|v| "<#{v}>" }
|
|
71
|
+
as_hash = shortcuts.map {|v| "#{v}:<#{v}>" }
|
|
72
|
+
string = []
|
|
73
|
+
string << "******************************************************************************"
|
|
74
|
+
string << "INTERACTIVE FILTERING HELP:"
|
|
75
|
+
string << "enter as an array of values : #{as_array.join(' ')}"
|
|
76
|
+
string << "or as keys and values : #{as_hash.join(' ')}"
|
|
77
|
+
string << "or some of the keys and values : #{as_hash.last}"
|
|
78
|
+
if changing_keys.size >= 3
|
|
79
|
+
string << "or mix array and keys/values : #{as_array[0]} #{as_array[1]} #{as_hash.last}"
|
|
80
|
+
end
|
|
81
|
+
string << "etc..."
|
|
82
|
+
string << "<enter> to (re)run current values"
|
|
83
|
+
string << "'q' to quit"
|
|
84
|
+
string << "******************************************************************************"
|
|
85
|
+
string.join("\n")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# assumes its already chomped
|
|
89
|
+
# updates the 5 globals
|
|
90
|
+
# returns nil if 'q'
|
|
91
|
+
def prep_reply(reply, base, shortcut_order, casting_array)
|
|
92
|
+
if reply == 'q'
|
|
93
|
+
return nil
|
|
94
|
+
end
|
|
95
|
+
if reply =~ /^\s*$/
|
|
96
|
+
base
|
|
97
|
+
elsif reply
|
|
98
|
+
arr = reply.split(/\s+/)
|
|
99
|
+
to_change_ar = []
|
|
100
|
+
to_change_hash = {}
|
|
101
|
+
arr.each do |it|
|
|
102
|
+
if it.include? ':'
|
|
103
|
+
(k,v) = it.split(':')
|
|
104
|
+
to_change_hash[k] = v
|
|
105
|
+
else
|
|
106
|
+
to_change_ar << it
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
to_change_ar.each_with_index do |tc,i|
|
|
110
|
+
base[i] = tc
|
|
111
|
+
end
|
|
112
|
+
to_change_hash.each do |k,v|
|
|
113
|
+
index = shortcut_order.index(k)
|
|
114
|
+
if index.nil?
|
|
115
|
+
out("BAD ARG: #{k}:#{v}") if @verbose
|
|
116
|
+
end
|
|
117
|
+
base[index] = v
|
|
118
|
+
end
|
|
119
|
+
base.zip(casting_array).map do |v,cast_proc|
|
|
120
|
+
begin
|
|
121
|
+
cast_proc.call(v)
|
|
122
|
+
rescue NoMethodError
|
|
123
|
+
out "BAD ARG: #{tc}" if @verbose
|
|
124
|
+
return false
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
else
|
|
128
|
+
false
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
require 'table'
|
|
2
|
+
require 'spec_id/precision/output'
|
|
3
|
+
|
|
4
|
+
module SpecID ; end
|
|
5
|
+
module SpecID::Precision ; end
|
|
6
|
+
class SpecID::Precision::Filter ; end
|
|
7
|
+
|
|
8
|
+
class SpecID::Precision::Filter::Output
|
|
9
|
+
include SpecID::Precision::Output
|
|
10
|
+
|
|
11
|
+
ProtPrecAbbr = {
|
|
12
|
+
:normal => 'nrm',
|
|
13
|
+
:normal_stdev => 'nrm_std',
|
|
14
|
+
:worst => 'worst',
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
GTE = '>='
|
|
18
|
+
LTE = '<='
|
|
19
|
+
MSial_operator = {
|
|
20
|
+
'xcorr1' => GTE,
|
|
21
|
+
'xcorr2' => GTE,
|
|
22
|
+
'xcorr3' => GTE,
|
|
23
|
+
'deltacn' => GTE,
|
|
24
|
+
'ppm' => LTE,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# takes a hash {:normal => x, :normal_stdev => y :worst => z }
|
|
28
|
+
# and returns a string
|
|
29
|
+
def protein_precision_to_s(hash)
|
|
30
|
+
"#{hash[:worst]}--#{hash[:normal]}+/-#{hash[:normal_stdev]}"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# num tps tmm badAA decoy
|
|
34
|
+
# pep
|
|
35
|
+
# prot
|
|
36
|
+
#
|
|
37
|
+
def params_as_string(params_hash)
|
|
38
|
+
hash = SpecID::Precision::Output.symbol_keys_to_string(params_hash)
|
|
39
|
+
cleanup_params_hash(hash)
|
|
40
|
+
hash_as_string(hash)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def text_table(fh, answer)
|
|
44
|
+
col_headings = ['num']
|
|
45
|
+
if answer[:params][:validators]
|
|
46
|
+
val_strings = answer[:params][:validators].map do |val|
|
|
47
|
+
Validator::Validator_to_string[val.class.to_s]
|
|
48
|
+
end
|
|
49
|
+
col_headings.push( *val_strings )
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
data_rows = []
|
|
53
|
+
# push on the peptide row
|
|
54
|
+
row_headings = ['peps']
|
|
55
|
+
pep_row = []
|
|
56
|
+
pep_row << answer[:pephits].size
|
|
57
|
+
if answer[:params][:validators]
|
|
58
|
+
answer[:params][:validators].zip( answer[:pephits_precision] ) do |val, precision|
|
|
59
|
+
pep_row << precision
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
data_rows << pep_row
|
|
63
|
+
|
|
64
|
+
# push on the protein row
|
|
65
|
+
if answer[:prothits]
|
|
66
|
+
[:worst, :normal, :normal_stdev].each do |guy|
|
|
67
|
+
prot_row = []
|
|
68
|
+
row_headings << "prots(#{ProtPrecAbbr[guy]})"
|
|
69
|
+
if guy == :worst
|
|
70
|
+
prot_row << answer[:prothits].size
|
|
71
|
+
else
|
|
72
|
+
prot_row << '"'
|
|
73
|
+
end
|
|
74
|
+
answer[:prothits_precision].each do |precision|
|
|
75
|
+
prot_row.push(precision[guy])
|
|
76
|
+
end
|
|
77
|
+
data_rows << prot_row
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
params_string = params_as_string(answer[:params])
|
|
81
|
+
table = Table.new( data_rows, row_headings, col_headings )
|
|
82
|
+
fh.puts params_string
|
|
83
|
+
fh.puts ""
|
|
84
|
+
fh.puts( table.to_formatted_string )
|
|
85
|
+
fh.puts ""
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def yaml(fh, answer)
|
|
89
|
+
final_output = { :params => answer[:params].dup }
|
|
90
|
+
#"PEPHITS"
|
|
91
|
+
#answer[:pephits]
|
|
92
|
+
final_output[:pephits] = answer[:pephits].size
|
|
93
|
+
if answer[:prothits]
|
|
94
|
+
final_output[:prothits_precision] = answer[:params][:validators].zip( answer[:prothits_precision] ).map do |val, precision|
|
|
95
|
+
{'validator' => Validator::Validator_to_string[val.class.to_s], 'values' => precision }
|
|
96
|
+
end
|
|
97
|
+
final_output[:prothits] = answer[:prothits].size
|
|
98
|
+
|
|
99
|
+
#final_output[:prothits_precision] = {} if answer[:prothits_precision]
|
|
100
|
+
#final_output[:prothits] = answer[:prothits].size
|
|
101
|
+
#answer[:params][:validators].zip( answer[:prothits_precision] ) do |val, precision|
|
|
102
|
+
# final_output[:prothits_precision][Validator::Validator_to_string[val.class.to_s]] = precision
|
|
103
|
+
#end
|
|
104
|
+
end
|
|
105
|
+
final_output[:pephits_precision] = answer[:params][:validators].zip( answer[:pephits_precision] ).map do |val, precision|
|
|
106
|
+
{ 'validator' => Validator::Validator_to_string[val.class.to_s], 'value' => precision }
|
|
107
|
+
end
|
|
108
|
+
final_output[:pephits] = answer[:pephits].size
|
|
109
|
+
final_output_as_strings = SpecID::Precision::Output.symbol_keys_to_string(final_output)
|
|
110
|
+
cleanup_params_hash(final_output_as_strings['params'])
|
|
111
|
+
fh.print(final_output_as_strings.to_yaml )
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# returns nil
|
|
116
|
+
def cleanup_params_hash(hash)
|
|
117
|
+
################################
|
|
118
|
+
# OUTPUT
|
|
119
|
+
################################
|
|
120
|
+
hash['output'] = hash['output'].map do |output|
|
|
121
|
+
if output[1] == nil
|
|
122
|
+
output[1] = 'STDOUT'
|
|
123
|
+
end
|
|
124
|
+
output.join(" => ")
|
|
125
|
+
end
|
|
126
|
+
%w(postfilter top_hit_by).each do |st|
|
|
127
|
+
hash[st] = hash[st].to_s
|
|
128
|
+
end
|
|
129
|
+
if hash['interactive']
|
|
130
|
+
if file = hash['interactive'].file
|
|
131
|
+
hash['interactive'] = file
|
|
132
|
+
else
|
|
133
|
+
hash['interactive'] = true
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
if hash['decoy']
|
|
137
|
+
if hash['decoy']['regexp']
|
|
138
|
+
hash['decoy']['regexp'] = hash['decoy']['regexp'].inspect
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
if x = hash['validators']
|
|
142
|
+
hash['validators'] = Validator.sensible_validator_hashes(x)
|
|
143
|
+
end
|
|
144
|
+
nil
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
end
|