protk 1.2.6.pre5 → 1.3.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +84 -45
- data/bin/add_retention_times.rb +9 -5
- data/bin/augustus_to_proteindb.rb +7 -11
- data/bin/interprophet.rb +28 -46
- data/bin/make_decoy.rb +16 -48
- data/bin/mascot_search.rb +57 -71
- data/bin/mascot_to_pepxml.rb +13 -26
- data/bin/msgfplus_search.rb +70 -107
- data/bin/omssa_search.rb +52 -109
- data/bin/peptide_prophet.rb +44 -119
- data/bin/pepxml_to_table.rb +24 -27
- data/bin/protein_prophet.rb +22 -82
- data/bin/protxml_to_gff.rb +22 -519
- data/bin/protxml_to_table.rb +2 -16
- data/bin/sixframe.rb +10 -32
- data/bin/tandem_search.rb +30 -403
- data/bin/tandem_to_pepxml.rb +43 -0
- data/bin/unimod_to_loc.rb +1 -1
- data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
- data/ext/decoymaker/extconf.rb +3 -0
- data/lib/protk/constants.rb +16 -2
- data/lib/protk/data/default_config.yml +2 -1
- data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
- data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
- data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
- data/lib/protk/data/tandem_params.xml +17 -54
- data/lib/protk/fastadb.rb +2 -2
- data/lib/protk/prophet_tool.rb +1 -1
- data/lib/protk/protxml_to_gff_tool.rb +474 -0
- data/lib/protk/search_tool.rb +58 -103
- data/lib/protk/setup_rakefile.rake +9 -5
- data/lib/protk/tandem_search_tool.rb +256 -0
- data/lib/protk/tool.rb +85 -104
- data/lib/protk.rb +1 -6
- metadata +24 -103
- data/bin/annotate_ids.rb +0 -59
- data/bin/asapratio.rb +0 -27
- data/bin/blastxml_to_table.rb +0 -119
- data/bin/correct_omssa_retention_times.rb +0 -27
- data/bin/feature_finder.rb +0 -95
- data/bin/file_convert.rb +0 -164
- data/bin/generate_omssa_loc.rb +0 -42
- data/bin/gffmerge.rb +0 -208
- data/bin/libra.rb +0 -70
- data/bin/toppas_pipeline.rb +0 -84
- data/bin/uniprot_annotation.rb +0 -141
- data/bin/xls_to_table.rb +0 -52
- data/bin/xpress.rb +0 -27
- data/ext/protk/decoymaker/extconf.rb +0 -3
- data/ext/protk/simplealign/extconf.rb +0 -3
- data/lib/protk/biotools_excel_converter.rb +0 -60
- data/lib/protk/eupathdb_gene_information_table.rb +0 -158
- data/lib/protk/gapped_aligner.rb +0 -264
- data/lib/protk/protein_annotator.rb +0 -646
- data/lib/protk/spreadsheet_extensions.rb +0 -79
- data/lib/protk/xtandem_defaults.rb +0 -11
data/bin/peptide_prophet.rb
CHANGED
@@ -17,102 +17,34 @@ input_stager = nil
|
|
17
17
|
|
18
18
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
19
19
|
#
|
20
|
-
prophet_tool=ProphetTool.new([
|
20
|
+
prophet_tool=ProphetTool.new([
|
21
|
+
:glyco,
|
22
|
+
:explicit_output,
|
23
|
+
:over_write,
|
24
|
+
:maldi,
|
25
|
+
:prefix,
|
26
|
+
:database])
|
21
27
|
prophet_tool.option_parser.banner = "Run PeptideProphet on a set of pep.xml input files.\n\nUsage: peptide_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
prophet_tool.
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
prophet_tool.
|
30
|
-
prophet_tool.
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
prophet_tool.
|
35
|
-
prophet_tool.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
prophet_tool.
|
40
|
-
|
41
|
-
|
42
|
-
end
|
43
|
-
|
44
|
-
prophet_tool.options.usert = false
|
45
|
-
prophet_tool.option_parser.on( '--usert',"Use hydrophobicity / RT information" ) do
|
46
|
-
prophet_tool.options.usert = true
|
47
|
-
end
|
48
|
-
|
49
|
-
prophet_tool.options.accurate_mass = false
|
50
|
-
prophet_tool.option_parser.on( '--accurate-mass',"Use accurate mass binning" ) do
|
51
|
-
prophet_tool.options.accurate_mass = true
|
52
|
-
end
|
53
|
-
|
54
|
-
prophet_tool.options.no_ntt = false
|
55
|
-
prophet_tool.option_parser.on( '--no-ntt',"Don't use NTT model" ) do
|
56
|
-
prophet_tool.options.no_ntt = true
|
57
|
-
end
|
58
|
-
|
59
|
-
prophet_tool.options.no_nmc = false
|
60
|
-
prophet_tool.option_parser.on( '--no-nmc',"Don't use NMC model" ) do
|
61
|
-
prophet_tool.options.no_nmc = true
|
62
|
-
end
|
63
|
-
|
64
|
-
prophet_tool.options.usegamma = false
|
65
|
-
prophet_tool.option_parser.on( '--usegamma',"Use Gamma distribution to model the negatives" ) do
|
66
|
-
prophet_tool.options.usegamma = true
|
67
|
-
end
|
68
|
-
|
69
|
-
prophet_tool.options.use_only_expect = false
|
70
|
-
prophet_tool.option_parser.on( '--use-only-expect',"Only use Expect Score as the discriminant" ) do
|
71
|
-
prophet_tool.options.use_only_expect = true
|
72
|
-
end
|
73
|
-
|
74
|
-
prophet_tool.options.force_fit = false
|
75
|
-
prophet_tool.option_parser.on( '--force-fit',"Force fitting of mixture model and bypass checks" ) do
|
76
|
-
prophet_tool.options.force_fit = true
|
77
|
-
end
|
78
|
-
|
79
|
-
prophet_tool.options.allow_alt_instruments=false
|
80
|
-
prophet_tool.option_parser.on( '--allow-alt-instruments',"Warning instead of exit with error if instrument types between runs is different" ) do
|
81
|
-
prophet_tool.options.allow_alt_instruments = true
|
82
|
-
end
|
83
|
-
|
84
|
-
prophet_tool.options.one_ata_time = false
|
85
|
-
prophet_tool.option_parser.on( '-F', '--one-ata-time', 'Create a separate pproph output file for each analysis' ) do
|
86
|
-
prophet_tool.options.one_ata_time = true
|
87
|
-
end
|
88
|
-
|
89
|
-
prophet_tool.options.decoy_prefix="decoy"
|
90
|
-
prophet_tool.option_parser.on( '--decoy-prefix prefix', 'Prefix for decoy sequences') do |prefix|
|
91
|
-
prophet_tool.options.decoy_prefix = prefix
|
92
|
-
end
|
93
|
-
|
94
|
-
prophet_tool.options.no_decoys = false
|
95
|
-
prophet_tool.option_parser.on( '--no-decoy', 'Don\'t use decoy sequences to pin down the negative distribution') do
|
96
|
-
prophet_tool.options.no_decoys = true
|
97
|
-
end
|
98
|
-
|
99
|
-
prophet_tool.options.experiment_label=nil
|
100
|
-
prophet_tool.option_parser.on('--experiment-label label','used to commonly label all spectra belonging to one experiment (required by iProphet)') do |label|
|
101
|
-
prophet_tool.options.experiment_label = label
|
102
|
-
end
|
103
|
-
|
104
|
-
prophet_tool.options.override_database=nil
|
105
|
-
prophet_tool.option_parser.on( '--override-database database', 'Manually specify database') do |database|
|
106
|
-
prophet_tool.options.override_database = database
|
107
|
-
end
|
108
|
-
|
109
|
-
exit unless prophet_tool.check_options
|
110
|
-
|
111
|
-
if ( ARGV[0].nil? )
|
112
|
-
puts "You must supply an input file"
|
113
|
-
puts prophet_tool.option_parser
|
114
|
-
exit
|
115
|
-
end
|
28
|
+
@output_suffix="_pproph"
|
29
|
+
prophet_tool.options.database=nil
|
30
|
+
|
31
|
+
prophet_tool.add_boolean_option(:useicat,false,['--useicat',"Use icat information"])
|
32
|
+
prophet_tool.add_boolean_option(:phospho,false,['--phospho',"Use phospho information"])
|
33
|
+
prophet_tool.add_boolean_option(:usepi,false,['--usepi',"Use pI information"])
|
34
|
+
prophet_tool.add_boolean_option(:usert,false,['--usert',"Use hydrophobicity / RT information"])
|
35
|
+
prophet_tool.add_boolean_option(:accurate_mass,false,['--accurate-mass',"Use accurate mass binning"])
|
36
|
+
prophet_tool.add_boolean_option(:no_ntt,false,['--no-ntt',"Don't use NTT model"])
|
37
|
+
prophet_tool.add_boolean_option(:no_nmc,false,['--no-nmc',"Don't use NMC model"])
|
38
|
+
prophet_tool.add_boolean_option(:usegamma,false,['--usegamma',"Use Gamma distribution to model the negatives"])
|
39
|
+
prophet_tool.add_boolean_option(:use_only_expect,false,['--use-only-expect',"Only use Expect Score as the discriminant"])
|
40
|
+
prophet_tool.add_boolean_option(:force_fit,false,['--force-fit',"Force fitting of mixture model and bypass checks"])
|
41
|
+
prophet_tool.add_boolean_option(:allow_alt_instruments,false,['--allow-alt-instruments',"Warning instead of exit with error if instrument types between runs is different"])
|
42
|
+
prophet_tool.add_boolean_option(:one_ata_time,false,['-F', '--one-ata-time', 'Create a separate pproph output file for each analysis'])
|
43
|
+
prophet_tool.add_value_option(:decoy_prefix,"decoy",['--decoy-prefix prefix', 'Prefix for decoy sequences'])
|
44
|
+
prophet_tool.add_boolean_option(:no_decoys,false,['--no-decoy', 'Don\'t use decoy sequences to pin down the negative distribution'])
|
45
|
+
prophet_tool.add_value_option(:experiment_label,nil,['--experiment-label label','used to commonly label all spectra belonging to one experiment (required by iProphet)'])
|
46
|
+
|
47
|
+
exit unless prophet_tool.check_options(true)
|
116
48
|
|
117
49
|
throw "When --output and -F options are set only one file at a time can be run" if ( ARGV.length> 1 ) && ( prophet_tool.explicit_output!=nil ) && (prophet_tool.one_ata_time!=nil)
|
118
50
|
|
@@ -133,10 +65,11 @@ inputs.each {|file_name|
|
|
133
65
|
name=file_name.chomp
|
134
66
|
|
135
67
|
engine=prophet_tool.extract_engine(name)
|
136
|
-
if prophet_tool.
|
137
|
-
db_path = prophet_tool.
|
68
|
+
if prophet_tool.database
|
69
|
+
db_path = prophet_tool.database_info.path
|
138
70
|
else
|
139
71
|
db_path=prophet_tool.extract_db(name)
|
72
|
+
throw "Unable to find database #{db_path} used for searching. Specify database path using -d option" unless File.exist?(db_path)
|
140
73
|
end
|
141
74
|
|
142
75
|
|
@@ -157,10 +90,10 @@ inputs=file_info.collect do |info|
|
|
157
90
|
end
|
158
91
|
throw "All files to be analyzed must have been searched with the same database and search engine" unless (info[1][:engine]==engine) && (info[1][:database])
|
159
92
|
|
160
|
-
retname=
|
161
|
-
if ( info[0]=~/\.dat$/)
|
162
|
-
|
163
|
-
end
|
93
|
+
retname= info[0]
|
94
|
+
# if ( info[0]=~/\.dat$/)
|
95
|
+
# retname=info[0]
|
96
|
+
# end
|
164
97
|
|
165
98
|
retname
|
166
99
|
|
@@ -219,9 +152,7 @@ def generate_command(genv,prophet_tool,inputs,output,database,engine)
|
|
219
152
|
|
220
153
|
if prophet_tool.useicat
|
221
154
|
cmd << " -Oi "
|
222
|
-
|
223
|
-
|
224
|
-
if prophet_tool.nouseicat
|
155
|
+
else
|
225
156
|
cmd << " -Of"
|
226
157
|
end
|
227
158
|
|
@@ -255,9 +186,7 @@ def run_peptide_prophet(genv,prophet_tool,cmd,output_path,engine)
|
|
255
186
|
if ( !prophet_tool.over_write && Pathname.new(output_path).exist? )
|
256
187
|
genv.log("Skipping analysis on existing file #{output_path}",:warn)
|
257
188
|
else
|
258
|
-
|
259
|
-
job_params={:jobid=>engine, :vmem=>"900mb", :queue => "lowmem"}
|
260
|
-
code=prophet_tool.run(cmd,genv,job_params,jobscript_path)
|
189
|
+
code=prophet_tool.run(cmd,genv)
|
261
190
|
throw "Command failed with exit code #{code}" unless code==0
|
262
191
|
end
|
263
192
|
end
|
@@ -265,27 +194,23 @@ end
|
|
265
194
|
|
266
195
|
cmd=""
|
267
196
|
if ( prophet_tool.one_ata_time )
|
268
|
-
|
269
|
-
|
270
|
-
output_file_name=
|
197
|
+
|
198
|
+
inputs.each do |input|
|
199
|
+
output_file_name=Tool.default_output_path(input,".pep.xml",prophet_tool.output_prefix,@output_suffix)
|
271
200
|
|
272
201
|
cmd=generate_command(genv,prophet_tool,input,output_file_name,database,engine)
|
202
|
+
run_peptide_prophet(genv,prophet_tool,cmd,output_file_name,engine)
|
203
|
+
end
|
273
204
|
|
274
|
-
run_peptide_prophet(genv,prophet_tool,cmd,output_file_base_name,engine)
|
275
|
-
|
276
|
-
|
277
|
-
}
|
278
205
|
else
|
206
|
+
|
279
207
|
if (prophet_tool.explicit_output==nil)
|
280
|
-
output_file_name="
|
208
|
+
output_file_name=Tool.default_output_path(inputs,".pep.xml",prophet_tool.output_prefix,@output_suffix)
|
281
209
|
else
|
282
|
-
|
283
210
|
output_file_name=prophet_tool.explicit_output
|
284
|
-
|
285
211
|
end
|
212
|
+
|
286
213
|
cmd=generate_command(genv,prophet_tool,inputs,output_file_name,database,engine)
|
287
|
-
puts cmd
|
288
|
-
%x['ls']
|
289
214
|
run_peptide_prophet(genv,prophet_tool,cmd,output_file_name,engine)
|
290
215
|
|
291
216
|
end
|
data/bin/pepxml_to_table.rb
CHANGED
@@ -19,26 +19,17 @@ include LibXML
|
|
19
19
|
tool=Tool.new([:explicit_output])
|
20
20
|
tool.option_parser.banner = "Convert a pepXML file to a tab delimited table.\n\nUsage: pepxml_to_table.rb [options] file1.pep.xml"
|
21
21
|
|
22
|
-
exit unless tool.check_options
|
23
|
-
|
24
|
-
if ( ARGV[0].nil? )
|
25
|
-
puts "You must supply an input file"
|
26
|
-
puts tool.option_parser
|
27
|
-
exit
|
28
|
-
end
|
29
|
-
|
30
|
-
# Obtain a global environment object
|
31
|
-
#genv=Constants.new
|
22
|
+
exit unless tool.check_options(true)
|
32
23
|
|
33
24
|
input_file=ARGV[0]
|
34
25
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
26
|
+
if tool.explicit_output
|
27
|
+
output_fh=File.new("#{tool.explicit_output}",'w')
|
28
|
+
else
|
29
|
+
output_fh=$stdout
|
30
|
+
end
|
40
31
|
|
41
|
-
output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\tpeptideprophet_prob\tinterprophet_prob\n"
|
32
|
+
output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\traw_score\tpeptideprophet_prob\tinterprophet_prob\n"
|
42
33
|
|
43
34
|
XML::Error.set_handler(&XML::Error::QUIET_HANDLER)
|
44
35
|
|
@@ -68,18 +59,24 @@ spectrum_queries.each do |query|
|
|
68
59
|
start_scan=query.attributes['start_scan']
|
69
60
|
end_scan=query.attributes['end_scan']
|
70
61
|
|
71
|
-
|
72
|
-
|
62
|
+
run_summary_node=query.parent
|
63
|
+
# puts run_summary_node
|
64
|
+
search_summary_node=run_summary_node.find("./#{pepxml_ns_prefix}search_summary",pepxml_ns)[0]
|
65
|
+
# puts search_summary_node.attributes.each { |e| puts e }
|
66
|
+
search_engine=search_summary_node.attributes['search_engine']
|
67
|
+
|
68
|
+
# search_engine=""
|
73
69
|
|
74
|
-
search_engine=query.parent.attributes['search_engine']
|
75
70
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
71
|
+
raw_score=""
|
72
|
+
case search_engine
|
73
|
+
when /[Tt]andem/
|
74
|
+
search_score_nodes=top_search_hit.find("./#{pepxml_ns_prefix}search_score[@name=\"expect\"]",[pepxml_ns])
|
75
|
+
raw_score=search_score_nodes[0].attributes['value']
|
76
|
+
when /MS\-GF/
|
77
|
+
search_score_nodes=top_search_hit.find("./#{pepxml_ns_prefix}search_score[@name=\"EValue\"]",[pepxml_ns])
|
78
|
+
raw_score=search_score_nodes[0].attributes['value']
|
79
|
+
end
|
83
80
|
|
84
81
|
|
85
82
|
pp_result=top_search_hit.find("./#{pepxml_ns_prefix}analysis_result/#{pepxml_ns_prefix}peptideprophet_result/@probability",pepxml_ns)
|
@@ -90,7 +87,7 @@ spectrum_queries.each do |query|
|
|
90
87
|
peptide_prophet_prob=pp_result[0].value if ( pp_result.length>0 )
|
91
88
|
interprophet_prob=ip_result[0].value if ( ip_result.length>0)
|
92
89
|
|
93
|
-
output_fh.write "#{protein}\t#{peptide}\t#{assumed_charge}\t#{calc_neutral_pep_mass}\t#{neutral_mass}\t#{retention_time}\t#{start_scan}\t#{end_scan}\t#{search_engine}\t#{peptide_prophet_prob}\t#{interprophet_prob}\n"
|
90
|
+
output_fh.write "#{protein}\t#{peptide}\t#{assumed_charge}\t#{calc_neutral_pep_mass}\t#{neutral_mass}\t#{retention_time}\t#{start_scan}\t#{end_scan}\t#{search_engine}\t#{raw_score}\t#{peptide_prophet_prob}\t#{interprophet_prob}\n"
|
94
91
|
|
95
92
|
end
|
96
93
|
|
data/bin/protein_prophet.rb
CHANGED
@@ -17,98 +17,41 @@ for_galaxy = GalaxyUtil.for_galaxy?
|
|
17
17
|
|
18
18
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
19
19
|
#
|
20
|
-
prophet_tool=ProphetTool.new([:glyco,:explicit_output,:over_write,:
|
20
|
+
prophet_tool=ProphetTool.new([:glyco,:explicit_output,:over_write,:prefix])
|
21
21
|
prophet_tool.option_parser.banner = "Run ProteinProphet on a set of pep.xml input files.\n\nUsage: protein_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
22
|
-
prophet_tool.options.output_suffix="_protproph"
|
23
22
|
|
24
|
-
|
25
|
-
prophet_tool.option_parser.on( '--iprophet-input',"Inputs are from iProphet" ) do
|
26
|
-
prophet_tool.options.iproph = true
|
27
|
-
end
|
28
|
-
|
29
|
-
prophet_tool.options.nooccam = false
|
30
|
-
prophet_tool.option_parser.on( '--no-occam',"Do not attempt to derive the simplest protein list explaining observed peptides" ) do
|
31
|
-
prophet_tool.options.nooccam = true
|
32
|
-
end
|
33
|
-
|
34
|
-
prophet_tool.options.groupwts = false
|
35
|
-
prophet_tool.option_parser.on( '--group-wts',"Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" ) do
|
36
|
-
prophet_tool.options.groupwts = true
|
37
|
-
end
|
38
|
-
|
39
|
-
prophet_tool.options.normprotlen = false
|
40
|
-
prophet_tool.option_parser.on( '--norm-protlen',"Normalize NSP using Protein Length" ) do
|
41
|
-
prophet_tool.options.normprotlen = true
|
42
|
-
end
|
23
|
+
@output_suffix="_protproph"
|
43
24
|
|
44
|
-
prophet_tool.
|
45
|
-
prophet_tool.
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
prophet_tool.
|
50
|
-
prophet_tool.
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
prophet_tool.
|
55
|
-
prophet_tool.
|
56
|
-
prophet_tool.options.allpeps = true
|
57
|
-
end
|
58
|
-
|
59
|
-
prophet_tool.options.unmapped = false
|
60
|
-
prophet_tool.option_parser.on( '--unmapped',"Report results for unmapped proteins" ) do
|
61
|
-
prophet_tool.options.unmapped = true
|
62
|
-
end
|
63
|
-
|
64
|
-
prophet_tool.options.instances = false
|
65
|
-
prophet_tool.option_parser.on( '--instances',"Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" ) do
|
66
|
-
prophet_tool.options.instances = true
|
67
|
-
end
|
68
|
-
|
69
|
-
prophet_tool.options.delude = false
|
70
|
-
prophet_tool.option_parser.on( '--delude',"Do NOT use peptide degeneracy information when assessing proteins" ) do
|
71
|
-
prophet_tool.options.delude = true
|
72
|
-
end
|
73
|
-
|
74
|
-
prophet_tool.options.minprob = 0.05
|
75
|
-
prophet_tool.option_parser.on( '--minprob mp',"Minimum peptide prophet probability for peptides to be considered" ) do |mp|
|
76
|
-
prophet_tool.options.minprob = mp
|
77
|
-
end
|
78
|
-
|
79
|
-
prophet_tool.options.minindep = 0
|
80
|
-
prophet_tool.option_parser.on( '--minindep mp',"Minimum percentage of independent peptides required for a protein" ) do |mp|
|
81
|
-
prophet_tool.options.minindep = mp
|
82
|
-
end
|
83
|
-
|
84
|
-
exit unless prophet_tool.check_options
|
85
|
-
|
86
|
-
if ( ARGV[0].nil? )
|
87
|
-
puts "You must supply an input file"
|
88
|
-
puts prophet_tool.option_parser
|
89
|
-
exit
|
90
|
-
end
|
25
|
+
prophet_tool.add_boolean_option(:iproph,false,['--iprophet-input',"Inputs are from iProphet"])
|
26
|
+
prophet_tool.add_boolean_option(:nooccam,false,['--no-occam',"Do not attempt to derive the simplest protein list explaining observed peptides"])
|
27
|
+
prophet_tool.add_boolean_option(:groupwts,false,['--group-wts',"Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold"])
|
28
|
+
prophet_tool.add_boolean_option(:normprotlen,false,['--norm-protlen',"Normalize NSP using Protein Length"])
|
29
|
+
prophet_tool.add_boolean_option(:logprobs,false,['--log-prob',"Use the log of probability in the confidence calculations"])
|
30
|
+
prophet_tool.add_boolean_option(:confem,false,['--confem',"Use the EM to compute probability given the confidence"])
|
31
|
+
prophet_tool.add_boolean_option(:allpeps,false,['--allpeps',"Consider all possible peptides in the database in the confidence model"])
|
32
|
+
prophet_tool.add_boolean_option(:unmapped,false,['--unmapped',"Report results for unmapped proteins"])
|
33
|
+
prophet_tool.add_boolean_option(:instances,false,['--instances',"Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment"])
|
34
|
+
prophet_tool.add_boolean_option(:delude,false,['--delude',"Do NOT use peptide degeneracy information when assessing proteins"])
|
35
|
+
prophet_tool.add_value_option(:minprob,0.05,['--minprob mp',"Minimum peptide prophet probability for peptides to be considered"])
|
36
|
+
prophet_tool.add_value_option(:minindep,0,['--minindep mp',"Minimum percentage of independent peptides required for a protein"])
|
91
37
|
|
38
|
+
exit unless prophet_tool.check_options(true)
|
92
39
|
|
93
40
|
# Obtain a global environment object
|
94
41
|
genv=Constants.new
|
95
42
|
|
96
|
-
|
97
|
-
output_file="#{prophet_tool.output_prefix}interact#{prophet_tool.output_suffix}.prot.xml"
|
98
|
-
else
|
99
|
-
output_file=prophet_tool.explicit_output
|
100
|
-
end
|
43
|
+
inputs = ARGV.collect {|file_name| file_name.chomp }
|
101
44
|
|
102
|
-
|
45
|
+
if ( prophet_tool.explicit_output )
|
46
|
+
output_file=prophet_tool.explicit_output
|
47
|
+
else
|
48
|
+
output_file=Tool.default_output_path(inputs,".prot.xml",prophet_tool.output_prefix,@output_suffix)
|
49
|
+
end
|
103
50
|
|
104
51
|
if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
|
105
52
|
|
106
53
|
cmd="ProteinProphet NOPLOT "
|
107
54
|
|
108
|
-
inputs = ARGV.collect {|file_name|
|
109
|
-
file_name.chomp
|
110
|
-
}
|
111
|
-
|
112
55
|
if for_galaxy
|
113
56
|
inputs = inputs.collect {|ip| GalaxyUtil.stage_pepxml(ip) }
|
114
57
|
end
|
@@ -122,10 +65,7 @@ if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
|
|
122
65
|
|
123
66
|
# Run the analysis
|
124
67
|
#
|
125
|
-
|
126
|
-
job_params={:jobid=>"protproph", :vmem=>"900mb", :queue => "lowmem"}
|
127
|
-
genv.log("Running #{cmd}",:info)
|
128
|
-
code = prophet_tool.run(cmd,genv,job_params,jobscript_path)
|
68
|
+
code = prophet_tool.run(cmd,genv)
|
129
69
|
throw "Command failed with exit code #{code}" unless code==0
|
130
70
|
else
|
131
71
|
genv.log("Protein Prophet output file #{output_file} already exists. Run with -r option to replace",:warn)
|