protk 1.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
data/bin/omssa_search.rb
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 14/12/2010
|
5
|
+
#
|
6
|
+
# Runs an MS/MS search using the OMSSA search engine
|
7
|
+
#
|
8
|
+
$VERBOSE=nil
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/command_runner'
|
12
|
+
require 'protk/search_tool'
|
13
|
+
|
14
|
+
|
15
|
+
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
16
|
+
#
|
17
|
+
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
18
|
+
search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
|
19
|
+
search_tool.options.output_suffix="_omssa"
|
20
|
+
|
21
|
+
search_tool.options.add_retention_times=true
|
22
|
+
search_tool.option_parser.on( '-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times' ) do
|
23
|
+
search_tool.options.add_retention_times=false
|
24
|
+
end
|
25
|
+
|
26
|
+
search_tool.options.max_hit_expect=1
|
27
|
+
search_tool.option_parser.on( '--max-hit-expect exp', 'Expect values less than this are considered to be hits' ) do |exp|
|
28
|
+
search_tool.options.max_hit_expect=exp
|
29
|
+
end
|
30
|
+
|
31
|
+
search_tool.options.intensity_cut_off=0.0005
|
32
|
+
search_tool.option_parser.on( '--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity' ) do |co|
|
33
|
+
search_tool.options.intensity_cut_off=co
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
search_tool.option_parser.parse!
|
38
|
+
|
39
|
+
# Environment with global constants
|
40
|
+
#
|
41
|
+
genv=Constants.new
|
42
|
+
|
43
|
+
# Set search engine specific parameters on the SearchTool object
|
44
|
+
#
|
45
|
+
rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
|
46
|
+
repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
|
47
|
+
|
48
|
+
case
|
49
|
+
when Pathname.new(search_tool.database).exist? # It's an explicitly named db
|
50
|
+
current_db=Pathname.new(search_tool.database).realpath.to_s
|
51
|
+
else
|
52
|
+
current_db=search_tool.current_database :fasta
|
53
|
+
end
|
54
|
+
|
55
|
+
fragment_tol = search_tool.fragment_tol
|
56
|
+
precursor_tol = search_tool.precursor_tol
|
57
|
+
|
58
|
+
|
59
|
+
throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
|
60
|
+
|
61
|
+
# Run the search engine on each input file
|
62
|
+
#
|
63
|
+
ARGV.each do |filename|
|
64
|
+
|
65
|
+
if ( search_tool.explicit_output!=nil)
|
66
|
+
output_path=search_tool.explicit_output
|
67
|
+
else
|
68
|
+
output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
|
69
|
+
end
|
70
|
+
|
71
|
+
# We always perform searches on mgf files so
|
72
|
+
#
|
73
|
+
input_path="#{search_tool.input_base_path(filename.chomp)}.mgf"
|
74
|
+
input_ext=Pathname.new(filename).extname
|
75
|
+
|
76
|
+
if ( input_ext==".dat" )
|
77
|
+
# This is a file provided by galaxy so we need to leave the .dat extension
|
78
|
+
input_path="#{search_tool.input_base_path(filename.chomp)}.dat"
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Only proceed if the output file is not present or we have opted to over-write it
|
83
|
+
#
|
84
|
+
if ( search_tool.over_write || !Pathname.new(output_path).exist? )
|
85
|
+
|
86
|
+
# The basic command
|
87
|
+
#
|
88
|
+
cmd= "#{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
|
89
|
+
|
90
|
+
#Missed cleavages
|
91
|
+
#
|
92
|
+
cmd << " -v #{search_tool.missed_cleavages}"
|
93
|
+
|
94
|
+
# Precursor tolerance
|
95
|
+
#
|
96
|
+
if ( search_tool.precursor_tolu=="ppm")
|
97
|
+
cmd << " -teppm"
|
98
|
+
end
|
99
|
+
cmd << " -te #{search_tool.precursor_tol}"
|
100
|
+
|
101
|
+
# Fragment ion tolerance
|
102
|
+
#
|
103
|
+
cmd << " -to #{fragment_tol}" #Always in Da
|
104
|
+
|
105
|
+
# Set the search type (monoisotopic vs average masses) and whether to use strict monoisotopic masses
|
106
|
+
#
|
107
|
+
if ( search_tool.precursor_search_type=="monoisotopic")
|
108
|
+
if ( search_tool.strict_monoisotopic_mass )
|
109
|
+
cmd << " -tem 0"
|
110
|
+
else
|
111
|
+
cmd << " -tem 4 -ti #{search_tool.num_peaks_for_multi_isotope_search}"
|
112
|
+
end
|
113
|
+
else
|
114
|
+
cmd << " -tem 1"
|
115
|
+
end
|
116
|
+
|
117
|
+
# Enzyme
|
118
|
+
#
|
119
|
+
if ( search_tool.enzyme!="Trypsin")
|
120
|
+
cmd << " -e #{search_tool.enzyme}"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Variable Modifications
|
124
|
+
#
|
125
|
+
if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
|
126
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
127
|
+
if ( var_mods !="" )
|
128
|
+
cmd << " -mv #{var_mods}"
|
129
|
+
end
|
130
|
+
else
|
131
|
+
# Add options related to peptide modifications
|
132
|
+
#
|
133
|
+
if ( search_tool.glyco )
|
134
|
+
cmd << " -mv 119 "
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
|
139
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
140
|
+
if ( fix_mods !="")
|
141
|
+
cmd << " -mf #{fix_mods}"
|
142
|
+
end
|
143
|
+
else
|
144
|
+
if ( search_tool.has_modifications )
|
145
|
+
cmd << " -mf "
|
146
|
+
if ( search_tool.carbamidomethyl )
|
147
|
+
cmd<<"3 "
|
148
|
+
end
|
149
|
+
|
150
|
+
if ( search_tool.methionine_oxidation )
|
151
|
+
cmd<<"1 "
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
if ( search_tool.searched_ions !="" && !search_tool.searched_ions=~/None/)
|
158
|
+
searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
159
|
+
if ( searched_ions!="")
|
160
|
+
cmd << " -i #{searched_ions}"
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
# Infer precursor charges or respect charges in input file
|
166
|
+
#
|
167
|
+
if ( search_tool.respect_precursor_charges )
|
168
|
+
cmd << " -zcc 1"
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
# Max expect
|
173
|
+
#
|
174
|
+
cmd << " -he #{search_tool.max_hit_expect}"
|
175
|
+
|
176
|
+
# Intensity cut-off
|
177
|
+
cmd << " -ci #{search_tool.intensity_cut_off}"
|
178
|
+
|
179
|
+
# Up to here we've formulated the omssa command. The rest is cleanup
|
180
|
+
p "Running:#{cmd}"
|
181
|
+
|
182
|
+
# Add retention time corrections
|
183
|
+
#
|
184
|
+
if (search_tool.options.add_retention_times)
|
185
|
+
cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
|
186
|
+
end
|
187
|
+
|
188
|
+
# Correct the pepXML file
|
189
|
+
#
|
190
|
+
cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
|
191
|
+
genv.log("Running repair script command #{cmd}",:info)
|
192
|
+
|
193
|
+
# Run the search
|
194
|
+
#
|
195
|
+
job_params= {:jobid => search_tool.jobid_from_filename(filename) }
|
196
|
+
job_params[:queue]="lowmem"
|
197
|
+
job_params[:vmem]="900mb"
|
198
|
+
search_tool.run(cmd,genv,job_params)
|
199
|
+
|
200
|
+
|
201
|
+
else
|
202
|
+
genv.log("Skipping search on existing file #{output_path}",:warn)
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 18/1/2011
|
5
|
+
#
|
6
|
+
# A wrapper for PeptideProphet
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/command_runner'
|
12
|
+
require 'protk/prophet_tool'
|
13
|
+
|
14
|
+
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
15
|
+
#
|
16
|
+
prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true,:maldi=>true})
|
17
|
+
prophet_tool.option_parser.banner = "Run PeptideProphet on a set of pep.xml input files.\n\nUsage: peptide_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
18
|
+
prophet_tool.options.output_suffix="_pproph"
|
19
|
+
|
20
|
+
prophet_tool.options.useicat = false
|
21
|
+
prophet_tool.option_parser.on( '--useicat',"Use icat information" ) do
|
22
|
+
prophet_tool.options.useicat = true
|
23
|
+
end
|
24
|
+
|
25
|
+
prophet_tool.options.nouseicat = false
|
26
|
+
prophet_tool.option_parser.on( '--no-useicat',"Do not use icat information" ) do
|
27
|
+
prophet_tool.options.nouseicat = true
|
28
|
+
end
|
29
|
+
|
30
|
+
prophet_tool.options.phospho = false
|
31
|
+
prophet_tool.option_parser.on( '--phospho',"Use phospho information" ) do
|
32
|
+
prophet_tool.options.phospho = true
|
33
|
+
end
|
34
|
+
|
35
|
+
prophet_tool.options.usepi = false
|
36
|
+
prophet_tool.option_parser.on( '--usepi',"Use pI information" ) do
|
37
|
+
prophet_tool.options.usepi = true
|
38
|
+
end
|
39
|
+
|
40
|
+
prophet_tool.options.usert = false
|
41
|
+
prophet_tool.option_parser.on( '--usert',"Use hydrophobicity / RT information" ) do
|
42
|
+
prophet_tool.options.usert = true
|
43
|
+
end
|
44
|
+
|
45
|
+
prophet_tool.options.accurate_mass = false
|
46
|
+
prophet_tool.option_parser.on( '--accurate-mass',"Use accurate mass binning" ) do
|
47
|
+
prophet_tool.options.accurate_mass = true
|
48
|
+
end
|
49
|
+
|
50
|
+
prophet_tool.options.no_ntt = false
|
51
|
+
prophet_tool.option_parser.on( '--no-ntt',"Don't use NTT model" ) do
|
52
|
+
prophet_tool.options.no_ntt = true
|
53
|
+
end
|
54
|
+
|
55
|
+
prophet_tool.options.no_nmc = false
|
56
|
+
prophet_tool.option_parser.on( '--no-nmc',"Don't use NMC model" ) do
|
57
|
+
prophet_tool.options.no_nmc = true
|
58
|
+
end
|
59
|
+
|
60
|
+
prophet_tool.options.usegamma = false
|
61
|
+
prophet_tool.option_parser.on( '--usegamma',"Use Gamma distribution to model the negatives" ) do
|
62
|
+
prophet_tool.options.usegamma = true
|
63
|
+
end
|
64
|
+
|
65
|
+
prophet_tool.options.use_only_expect = false
|
66
|
+
prophet_tool.option_parser.on( '--use-only-expect',"Only use Expect Score as the discriminant" ) do
|
67
|
+
prophet_tool.options.use_only_expect = true
|
68
|
+
end
|
69
|
+
|
70
|
+
prophet_tool.options.force_fit = false
|
71
|
+
prophet_tool.option_parser.on( '--force-fit',"Force fitting of mixture model and bypass checks" ) do
|
72
|
+
prophet_tool.options.force_fit = true
|
73
|
+
end
|
74
|
+
|
75
|
+
prophet_tool.options.allow_alt_instruments=false
|
76
|
+
prophet_tool.option_parser.on( '--allow-alt-instruments',"Warning instead of exit with error if instrument types between runs is different" ) do
|
77
|
+
prophet_tool.options.allow_alt_instruments = true
|
78
|
+
end
|
79
|
+
|
80
|
+
prophet_tool.options.one_ata_time = false
|
81
|
+
prophet_tool.option_parser.on( '-F', '--one-ata-time', 'Create a separate pproph output file for each analysis' ) do
|
82
|
+
prophet_tool.options.one_ata_time = true
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
prophet_tool.option_parser.parse!
|
87
|
+
|
88
|
+
throw "When --output and -F options are set only one file at a time can be run" if ( ARGV.length> 1 ) && ( prophet_tool.explicit_output!=nil ) && (prophet_tool.one_ata_time!=nil)
|
89
|
+
|
90
|
+
# Obtain a global environment object
|
91
|
+
genv=Constants.new
|
92
|
+
|
93
|
+
|
94
|
+
# Interrogate all the input files to obtain the database and search engine from them
|
95
|
+
#
|
96
|
+
genv.log("Determining search engine and database used to create input files ...",:info)
|
97
|
+
file_info={}
|
98
|
+
ARGV.each {|file_name|
|
99
|
+
name=file_name.chomp
|
100
|
+
|
101
|
+
engine=prophet_tool.extract_engine(name)
|
102
|
+
db_path=prophet_tool.extract_db(name)
|
103
|
+
|
104
|
+
|
105
|
+
file_info[name]={:engine=>engine , :database=>db_path }
|
106
|
+
}
|
107
|
+
|
108
|
+
# Check that all searches were performed with the same engine and database
|
109
|
+
#
|
110
|
+
#
|
111
|
+
engine=nil
|
112
|
+
database=nil
|
113
|
+
inputs=file_info.collect do |info|
|
114
|
+
if ( engine==nil)
|
115
|
+
engine=info[1][:engine]
|
116
|
+
end
|
117
|
+
if ( database==nil)
|
118
|
+
database=info[1][:database]
|
119
|
+
end
|
120
|
+
throw "All files to be analyzed must have been searched with the same database and search engine" unless (info[1][:engine]==engine) && (info[1][:database])
|
121
|
+
|
122
|
+
retname= "#{prophet_tool.input_base_path(info[0],".pep.xml")}.pep.xml"
|
123
|
+
if ( info[0]=~/\.dat$/)
|
124
|
+
retname=info[0]
|
125
|
+
end
|
126
|
+
|
127
|
+
retname
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
def generate_command(genv,prophet_tool,inputs,output,database,engine)
|
132
|
+
|
133
|
+
cmd="#{genv.xinteract} -N#{output} -l7 -eT -D#{database} "
|
134
|
+
|
135
|
+
if prophet_tool.glyco
|
136
|
+
cmd << " -Og "
|
137
|
+
end
|
138
|
+
|
139
|
+
if prophet_tool.phospho
|
140
|
+
cmd << " -OH "
|
141
|
+
end
|
142
|
+
|
143
|
+
if prophet_tool.usepi
|
144
|
+
cmd << " -OI "
|
145
|
+
end
|
146
|
+
|
147
|
+
if prophet_tool.usert
|
148
|
+
cmd << " -OR "
|
149
|
+
end
|
150
|
+
|
151
|
+
if prophet_tool.accurate_mass
|
152
|
+
cmd << " -OA "
|
153
|
+
end
|
154
|
+
|
155
|
+
if prophet_tool.no_ntt
|
156
|
+
cmd << " -ON "
|
157
|
+
end
|
158
|
+
|
159
|
+
if prophet_tool.no_nmc
|
160
|
+
cmd << " -OM "
|
161
|
+
end
|
162
|
+
|
163
|
+
if prophet_tool.usegamma
|
164
|
+
cmd << " -OG "
|
165
|
+
end
|
166
|
+
|
167
|
+
if prophet_tool.use_only_expect
|
168
|
+
cmd << " -OE "
|
169
|
+
end
|
170
|
+
|
171
|
+
if prophet_tool.force_fit
|
172
|
+
cmd << " -OF "
|
173
|
+
end
|
174
|
+
|
175
|
+
if prophet_tool.allow_alt_instruments
|
176
|
+
cmd << " -Ow "
|
177
|
+
end
|
178
|
+
|
179
|
+
if prophet_tool.useicat
|
180
|
+
cmd << " -Oi "
|
181
|
+
end
|
182
|
+
|
183
|
+
if prophet_tool.nouseicat
|
184
|
+
cmd << " -Of"
|
185
|
+
end
|
186
|
+
|
187
|
+
if prophet_tool.maldi
|
188
|
+
cmd << " -I2 -T3 -I4 -I5 -I6 -I7 "
|
189
|
+
end
|
190
|
+
|
191
|
+
if engine=="omssa" || engine=="phenyx"
|
192
|
+
cmd << "-Op -P -ddecoy "
|
193
|
+
else
|
194
|
+
cmd << "-ddecoy "
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
if ( inputs.class==Array)
|
199
|
+
cmd << " #{inputs.join(" ")}"
|
200
|
+
else
|
201
|
+
cmd << " #{inputs}"
|
202
|
+
end
|
203
|
+
|
204
|
+
cmd
|
205
|
+
end
|
206
|
+
|
207
|
+
def run_peptide_prophet(genv,prophet_tool,cmd,output_path,engine)
|
208
|
+
if ( !prophet_tool.over_write && Pathname.new(output_path).exist? )
|
209
|
+
genv.log("Skipping analysis on existing file #{output_path}",:warn)
|
210
|
+
else
|
211
|
+
jobscript_path="#{output_path}.pbs.sh"
|
212
|
+
job_params={:jobid=>engine, :vmem=>"900mb", :queue => "lowmem"}
|
213
|
+
code=prophet_tool.run(cmd,genv,job_params,jobscript_path)
|
214
|
+
throw "Command failed with exit code #{code}" unless code==0
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
cmd=""
|
220
|
+
if ( prophet_tool.one_ata_time )
|
221
|
+
inputs.each { |input|
|
222
|
+
|
223
|
+
output_file_name="#{prophet_tool.output_prefix}#{input}_#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
|
224
|
+
|
225
|
+
cmd=generate_command(genv,prophet_tool,input,output_file_name,database,engine)
|
226
|
+
|
227
|
+
run_peptide_prophet(genv,prophet_tool,cmd,output_file_base_name,engine)
|
228
|
+
|
229
|
+
|
230
|
+
}
|
231
|
+
else
|
232
|
+
if (prophet_tool.explicit_output==nil)
|
233
|
+
output_file_name="#{prophet_tool.output_prefix}#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
|
234
|
+
else
|
235
|
+
|
236
|
+
output_file_name=prophet_tool.explicit_output
|
237
|
+
|
238
|
+
end
|
239
|
+
cmd=generate_command(genv,prophet_tool,inputs,output_file_name,database,engine)
|
240
|
+
|
241
|
+
run_peptide_prophet(genv,prophet_tool,cmd,output_file_name,engine)
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 18/1/2011
|
5
|
+
#
|
6
|
+
# Convert a pepXML file to a tab delimited table
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'libxml'
|
11
|
+
require 'protk/constants'
|
12
|
+
require 'protk/command_runner'
|
13
|
+
require 'protk/tool'
|
14
|
+
|
15
|
+
include LibXML
|
16
|
+
|
17
|
+
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
18
|
+
#
|
19
|
+
tool=Tool.new({:explicit_output=>true})
|
20
|
+
tool.option_parser.banner = "Convert a pepXML file to a tab delimited table.\n\nUsage: pepxml_to_table.rb [options] file1.pep.xml"
|
21
|
+
|
22
|
+
tool.option_parser.parse!
|
23
|
+
|
24
|
+
# Obtain a global environment object
|
25
|
+
#genv=Constants.new
|
26
|
+
|
27
|
+
input_file=ARGV[0]
|
28
|
+
|
29
|
+
output_file="#{input_file}.txt"
|
30
|
+
|
31
|
+
output_file = tool.explicit_output if tool.explicit_output!=nil
|
32
|
+
|
33
|
+
output_fh=File.new("#{output_file}",'w')
|
34
|
+
|
35
|
+
output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\tpeptideprophet_prob\tinterprophet_prob\n"
|
36
|
+
|
37
|
+
pepxml_parser=XML::Parser.file("#{input_file}")
|
38
|
+
pepxml_doc=pepxml_parser.parse
|
39
|
+
|
40
|
+
spectrum_queries=pepxml_doc.find('//xmlns:spectrum_query','xmlns:http://regis-web.systemsbiology.net/pepXML')
|
41
|
+
|
42
|
+
spectrum_queries.each do |query|
|
43
|
+
|
44
|
+
retention_time=query.attributes['retention_time_sec']
|
45
|
+
neutral_mass=query.attributes['precursor_neutral_mass']
|
46
|
+
assumed_charge=query.attributes['assumed_charge']
|
47
|
+
|
48
|
+
top_search_hit=query.find('./xmlns:search_result/xmlns:search_hit','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
|
49
|
+
peptide=top_search_hit.attributes['peptide']
|
50
|
+
protein=top_search_hit.attributes['protein']
|
51
|
+
calc_neutral_pep_mass=top_search_hit.attributes['calc_neutral_pep_mass']
|
52
|
+
start_scan=query.attributes['start_scan']
|
53
|
+
end_scan=query.attributes['end_scan']
|
54
|
+
|
55
|
+
search_engine=""
|
56
|
+
search_score_names=top_search_hit.find('./xmlns:search_score/@name','xmlns:http://regis-web.systemsbiology.net/pepXML').collect {|s| s.to_s}
|
57
|
+
|
58
|
+
if ( search_score_names.length==2 && search_score_names.grep(/^name.*=.*pvalue/))
|
59
|
+
search_engine="omssa"
|
60
|
+
elsif ( search_score_names.grep(/^name.*=.*ionscore/))
|
61
|
+
search_engine="mascot"
|
62
|
+
elsif ( search_score_names.grep(/^name.*=.*hyperscore/) )
|
63
|
+
search_engine="x!tandem"
|
64
|
+
end
|
65
|
+
|
66
|
+
pp_result=top_search_hit.find('./xmlns:analysis_result/xmlns:peptideprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
|
67
|
+
ip_result=top_search_hit.find('./xmlns:analysis_result/xmlns:interprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
|
68
|
+
|
69
|
+
peptide_prophet_prob=""
|
70
|
+
interprophet_prob=""
|
71
|
+
peptide_prophet_prob=pp_result[0].value if ( pp_result.length>0 )
|
72
|
+
interprophet_prob=ip_result[0].value if ( ip_result.length>0)
|
73
|
+
|
74
|
+
output_fh.write "#{protein}\t#{peptide}\t#{assumed_charge}\t#{calc_neutral_pep_mass}\t#{neutral_mass}\t#{retention_time}\t#{start_scan}\t#{end_scan}\t#{search_engine}\t#{peptide_prophet_prob}\t#{interprophet_prob}\n"
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
output_fh.close
|
@@ -0,0 +1,140 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 17/1/2011
|
5
|
+
#
|
6
|
+
# Runs the Protein Prophet tool on a set of pep.xml files. Accepts input from peptide_prophet or interprophet.
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/command_runner'
|
12
|
+
require 'protk/prophet_tool'
|
13
|
+
require 'protk/galaxy_stager'
|
14
|
+
require 'protk/galaxy_util'
|
15
|
+
|
16
|
+
for_galaxy = GalaxyUtil.for_galaxy
|
17
|
+
|
18
|
+
if for_galaxy
|
19
|
+
# Stage files for galaxy
|
20
|
+
original_input_file = ARGV[0]
|
21
|
+
original_input_path = Pathname.new("#{original_input_file}")
|
22
|
+
input_stager = GalaxyStager.new("#{original_input_file}", :extension => '.pep.xml')
|
23
|
+
ARGV.push("-o")
|
24
|
+
ARGV.push("protein_prophet_results.prot.xml")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
28
|
+
#
|
29
|
+
prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true})
|
30
|
+
prophet_tool.option_parser.banner = "Run ProteinProphet on a set of pep.xml input files.\n\nUsage: protein_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
31
|
+
prophet_tool.options.output_suffix="_protproph"
|
32
|
+
|
33
|
+
prophet_tool.options.iproph = false
|
34
|
+
prophet_tool.option_parser.on( '--iprophet-input',"Inputs are from iProphet" ) do
|
35
|
+
prophet_tool.options.iproph = true
|
36
|
+
end
|
37
|
+
|
38
|
+
prophet_tool.options.nooccam = false
|
39
|
+
prophet_tool.option_parser.on( '--no-occam',"Do not attempt to derive the simplest protein list explaining observed peptides" ) do
|
40
|
+
prophet_tool.options.nooccam = true
|
41
|
+
end
|
42
|
+
|
43
|
+
prophet_tool.options.groupwts = false
|
44
|
+
prophet_tool.option_parser.on( '--group-wts',"Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" ) do
|
45
|
+
prophet_tool.options.groupwts = true
|
46
|
+
end
|
47
|
+
|
48
|
+
prophet_tool.options.normprotlen = false
|
49
|
+
prophet_tool.option_parser.on( '--norm-protlen',"Normalize NSP using Protein Length" ) do
|
50
|
+
prophet_tool.options.normprotlen = true
|
51
|
+
end
|
52
|
+
|
53
|
+
prophet_tool.options.logprobs = false
|
54
|
+
prophet_tool.option_parser.on( '--log-prob',"Use the log of probability in the confidence calculations" ) do
|
55
|
+
prophet_tool.options.logprobs = true
|
56
|
+
end
|
57
|
+
|
58
|
+
prophet_tool.options.confem = false
|
59
|
+
prophet_tool.option_parser.on( '--confem',"Use the EM to compute probability given the confidence" ) do
|
60
|
+
prophet_tool.options.confem = true
|
61
|
+
end
|
62
|
+
|
63
|
+
prophet_tool.options.allpeps = false
|
64
|
+
prophet_tool.option_parser.on( '--allpeps',"Consider all possible peptides in the database in the confidence model" ) do
|
65
|
+
prophet_tool.options.allpeps = true
|
66
|
+
end
|
67
|
+
|
68
|
+
prophet_tool.options.unmapped = false
|
69
|
+
prophet_tool.option_parser.on( '--unmapped',"Report results for unmapped proteins" ) do
|
70
|
+
prophet_tool.options.unmapped = true
|
71
|
+
end
|
72
|
+
|
73
|
+
prophet_tool.options.instances = false
|
74
|
+
prophet_tool.option_parser.on( '--instances',"Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" ) do
|
75
|
+
prophet_tool.options.instances = true
|
76
|
+
end
|
77
|
+
|
78
|
+
prophet_tool.options.delude = false
|
79
|
+
prophet_tool.option_parser.on( '--delude',"Do NOT use peptide degeneracy information when assessing proteins" ) do
|
80
|
+
prophet_tool.options.delude = true
|
81
|
+
end
|
82
|
+
|
83
|
+
prophet_tool.options.minprob = 0.05
|
84
|
+
prophet_tool.option_parser.on( '--minprob mp',"Minimum peptide prophet probability for peptides to be considered" ) do |mp|
|
85
|
+
prophet_tool.options.minprob = mp
|
86
|
+
end
|
87
|
+
|
88
|
+
prophet_tool.options.minindep = 0
|
89
|
+
prophet_tool.option_parser.on( '--minindep mp',"Minimum percentage of independent peptides required for a protein" ) do |mp|
|
90
|
+
prophet_tool.options.minindep = mp
|
91
|
+
end
|
92
|
+
|
93
|
+
prophet_tool.option_parser.parse!
|
94
|
+
|
95
|
+
|
96
|
+
# Obtain a global environment object
|
97
|
+
genv=Constants.new
|
98
|
+
|
99
|
+
if ( prophet_tool.explicit_output==nil )
|
100
|
+
output_file="#{prophet_tool.output_prefix}interact#{prophet_tool.output_suffix}.prot.xml"
|
101
|
+
else
|
102
|
+
output_file=prophet_tool.explicit_output
|
103
|
+
end
|
104
|
+
|
105
|
+
p output_file
|
106
|
+
|
107
|
+
if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
|
108
|
+
|
109
|
+
cmd="#{genv.proteinprophet} "
|
110
|
+
|
111
|
+
inputs = ARGV.collect {|file_name|
|
112
|
+
file_name.chomp
|
113
|
+
}
|
114
|
+
|
115
|
+
cmd << " #{inputs.join(" ")} #{output_file}"
|
116
|
+
|
117
|
+
if ( prophet_tool.glyco )
|
118
|
+
cmd << " GLYC "
|
119
|
+
end
|
120
|
+
|
121
|
+
# Run the analysis
|
122
|
+
#
|
123
|
+
jobscript_path="#{output_file}.pbs.sh"
|
124
|
+
job_params={:jobid=>"protproph", :vmem=>"900mb", :queue => "lowmem"}
|
125
|
+
genv.log("Running #{cmd}",:info)
|
126
|
+
code = prophet_tool.run(cmd,genv,job_params,jobscript_path)
|
127
|
+
throw "Command failed with exit code #{code}" unless code==0
|
128
|
+
else
|
129
|
+
genv.log("Protein Prophet output file #{output_file} already exists. Run with -r option to replace",:warn)
|
130
|
+
end
|
131
|
+
|
132
|
+
if for_galaxy
|
133
|
+
# Restore references to peptide prophet xml so downstream tools like
|
134
|
+
# libra can find it.
|
135
|
+
input_stager.restore_references("protein_prophet_results.prot.xml")
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
data/bin/protk_setup.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 20/10/2012
|
5
|
+
#
|
6
|
+
# Post-install setup for protk.
|
7
|
+
# Installs third party tools
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/setup_tool'
|
12
|
+
require 'yaml'
|
13
|
+
require 'pp'
|
14
|
+
|
15
|
+
|
16
|
+
# Setup specific command-line options for this tool. Other options are inherited from Tool
|
17
|
+
#
|
18
|
+
tool=SetupTool.new
|
19
|
+
if ( tool.option_parser.banner=="")
|
20
|
+
tool.option_parser.banner = "Post install tasks for protk.\nUsage: protk_setup.rb [options] toolname"
|
21
|
+
end
|
22
|
+
|
23
|
+
tool.option_parser.parse!
|
24
|
+
|
25
|
+
# Create install directory if it doesn't already exist
|
26
|
+
#
|
27
|
+
env=Constants.new
|
28
|
+
|
29
|
+
ARGV.each do |toolname|
|
30
|
+
tool.install toolname
|
31
|
+
end
|