protk 1.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 14/12/2010
5
+ #
6
+ # Runs an MS/MS search using the OMSSA search engine
7
+ #
8
+ $VERBOSE=nil
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/search_tool'
13
+
14
+
15
+ # Setup specific command-line options for this tool. Other options are inherited from SearchTool
16
+ #
17
+ search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
18
+ search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
19
+ search_tool.options.output_suffix="_omssa"
20
+
21
+ search_tool.options.add_retention_times=true
22
+ search_tool.option_parser.on( '-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times' ) do
23
+ search_tool.options.add_retention_times=false
24
+ end
25
+
26
+ search_tool.options.max_hit_expect=1
27
+ search_tool.option_parser.on( '--max-hit-expect exp', 'Expect values less than this are considered to be hits' ) do |exp|
28
+ search_tool.options.max_hit_expect=exp
29
+ end
30
+
31
+ search_tool.options.intensity_cut_off=0.0005
32
+ search_tool.option_parser.on( '--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity' ) do |co|
33
+ search_tool.options.intensity_cut_off=co
34
+ end
35
+
36
+
37
+ search_tool.option_parser.parse!
38
+
39
+ # Environment with global constants
40
+ #
41
+ genv=Constants.new
42
+
43
+ # Set search engine specific parameters on the SearchTool object
44
+ #
45
+ rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
46
+ repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
47
+
48
+ case
49
+ when Pathname.new(search_tool.database).exist? # It's an explicitly named db
50
+ current_db=Pathname.new(search_tool.database).realpath.to_s
51
+ else
52
+ current_db=search_tool.current_database :fasta
53
+ end
54
+
55
+ fragment_tol = search_tool.fragment_tol
56
+ precursor_tol = search_tool.precursor_tol
57
+
58
+
59
+ throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
60
+
61
+ # Run the search engine on each input file
62
+ #
63
+ ARGV.each do |filename|
64
+
65
+ if ( search_tool.explicit_output!=nil)
66
+ output_path=search_tool.explicit_output
67
+ else
68
+ output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
69
+ end
70
+
71
+ # We always perform searches on mgf files so
72
+ #
73
+ input_path="#{search_tool.input_base_path(filename.chomp)}.mgf"
74
+ input_ext=Pathname.new(filename).extname
75
+
76
+ if ( input_ext==".dat" )
77
+ # This is a file provided by galaxy so we need to leave the .dat extension
78
+ input_path="#{search_tool.input_base_path(filename.chomp)}.dat"
79
+ end
80
+
81
+
82
+ # Only proceed if the output file is not present or we have opted to over-write it
83
+ #
84
+ if ( search_tool.over_write || !Pathname.new(output_path).exist? )
85
+
86
+ # The basic command
87
+ #
88
+ cmd= "#{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
89
+
90
+ #Missed cleavages
91
+ #
92
+ cmd << " -v #{search_tool.missed_cleavages}"
93
+
94
+ # Precursor tolerance
95
+ #
96
+ if ( search_tool.precursor_tolu=="ppm")
97
+ cmd << " -teppm"
98
+ end
99
+ cmd << " -te #{search_tool.precursor_tol}"
100
+
101
+ # Fragment ion tolerance
102
+ #
103
+ cmd << " -to #{fragment_tol}" #Always in Da
104
+
105
+ # Set the search type (monoisotopic vs average masses) and whether to use strict monoisotopic masses
106
+ #
107
+ if ( search_tool.precursor_search_type=="monoisotopic")
108
+ if ( search_tool.strict_monoisotopic_mass )
109
+ cmd << " -tem 0"
110
+ else
111
+ cmd << " -tem 4 -ti #{search_tool.num_peaks_for_multi_isotope_search}"
112
+ end
113
+ else
114
+ cmd << " -tem 1"
115
+ end
116
+
117
+ # Enzyme
118
+ #
119
+ if ( search_tool.enzyme!="Trypsin")
120
+ cmd << " -e #{search_tool.enzyme}"
121
+ end
122
+
123
+ # Variable Modifications
124
+ #
125
+ if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
126
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
127
+ if ( var_mods !="" )
128
+ cmd << " -mv #{var_mods}"
129
+ end
130
+ else
131
+ # Add options related to peptide modifications
132
+ #
133
+ if ( search_tool.glyco )
134
+ cmd << " -mv 119 "
135
+ end
136
+ end
137
+
138
+ if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
139
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
140
+ if ( fix_mods !="")
141
+ cmd << " -mf #{fix_mods}"
142
+ end
143
+ else
144
+ if ( search_tool.has_modifications )
145
+ cmd << " -mf "
146
+ if ( search_tool.carbamidomethyl )
147
+ cmd<<"3 "
148
+ end
149
+
150
+ if ( search_tool.methionine_oxidation )
151
+ cmd<<"1 "
152
+ end
153
+
154
+ end
155
+ end
156
+
157
+ if ( search_tool.searched_ions !="" && !search_tool.searched_ions=~/None/)
158
+ searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
159
+ if ( searched_ions!="")
160
+ cmd << " -i #{searched_ions}"
161
+ end
162
+
163
+ end
164
+
165
+ # Infer precursor charges or respect charges in input file
166
+ #
167
+ if ( search_tool.respect_precursor_charges )
168
+ cmd << " -zcc 1"
169
+ end
170
+
171
+
172
+ # Max expect
173
+ #
174
+ cmd << " -he #{search_tool.max_hit_expect}"
175
+
176
+ # Intensity cut-off
177
+ cmd << " -ci #{search_tool.intensity_cut_off}"
178
+
179
+ # Up to here we've formulated the omssa command. The rest is cleanup
180
+ p "Running:#{cmd}"
181
+
182
+ # Add retention time corrections
183
+ #
184
+ if (search_tool.options.add_retention_times)
185
+ cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
186
+ end
187
+
188
+ # Correct the pepXML file
189
+ #
190
+ cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
191
+ genv.log("Running repair script command #{cmd}",:info)
192
+
193
+ # Run the search
194
+ #
195
+ job_params= {:jobid => search_tool.jobid_from_filename(filename) }
196
+ job_params[:queue]="lowmem"
197
+ job_params[:vmem]="900mb"
198
+ search_tool.run(cmd,genv,job_params)
199
+
200
+
201
+ else
202
+ genv.log("Skipping search on existing file #{output_path}",:warn)
203
+ end
204
+
205
+ end
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # A wrapper for PeptideProphet
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/prophet_tool'
13
+
14
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
15
+ #
16
+ prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true,:maldi=>true})
17
+ prophet_tool.option_parser.banner = "Run PeptideProphet on a set of pep.xml input files.\n\nUsage: peptide_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
18
+ prophet_tool.options.output_suffix="_pproph"
19
+
20
+ prophet_tool.options.useicat = false
21
+ prophet_tool.option_parser.on( '--useicat',"Use icat information" ) do
22
+ prophet_tool.options.useicat = true
23
+ end
24
+
25
+ prophet_tool.options.nouseicat = false
26
+ prophet_tool.option_parser.on( '--no-useicat',"Do not use icat information" ) do
27
+ prophet_tool.options.nouseicat = true
28
+ end
29
+
30
+ prophet_tool.options.phospho = false
31
+ prophet_tool.option_parser.on( '--phospho',"Use phospho information" ) do
32
+ prophet_tool.options.phospho = true
33
+ end
34
+
35
+ prophet_tool.options.usepi = false
36
+ prophet_tool.option_parser.on( '--usepi',"Use pI information" ) do
37
+ prophet_tool.options.usepi = true
38
+ end
39
+
40
+ prophet_tool.options.usert = false
41
+ prophet_tool.option_parser.on( '--usert',"Use hydrophobicity / RT information" ) do
42
+ prophet_tool.options.usert = true
43
+ end
44
+
45
+ prophet_tool.options.accurate_mass = false
46
+ prophet_tool.option_parser.on( '--accurate-mass',"Use accurate mass binning" ) do
47
+ prophet_tool.options.accurate_mass = true
48
+ end
49
+
50
+ prophet_tool.options.no_ntt = false
51
+ prophet_tool.option_parser.on( '--no-ntt',"Don't use NTT model" ) do
52
+ prophet_tool.options.no_ntt = true
53
+ end
54
+
55
+ prophet_tool.options.no_nmc = false
56
+ prophet_tool.option_parser.on( '--no-nmc',"Don't use NMC model" ) do
57
+ prophet_tool.options.no_nmc = true
58
+ end
59
+
60
+ prophet_tool.options.usegamma = false
61
+ prophet_tool.option_parser.on( '--usegamma',"Use Gamma distribution to model the negatives" ) do
62
+ prophet_tool.options.usegamma = true
63
+ end
64
+
65
+ prophet_tool.options.use_only_expect = false
66
+ prophet_tool.option_parser.on( '--use-only-expect',"Only use Expect Score as the discriminant" ) do
67
+ prophet_tool.options.use_only_expect = true
68
+ end
69
+
70
+ prophet_tool.options.force_fit = false
71
+ prophet_tool.option_parser.on( '--force-fit',"Force fitting of mixture model and bypass checks" ) do
72
+ prophet_tool.options.force_fit = true
73
+ end
74
+
75
+ prophet_tool.options.allow_alt_instruments=false
76
+ prophet_tool.option_parser.on( '--allow-alt-instruments',"Warning instead of exit with error if instrument types between runs is different" ) do
77
+ prophet_tool.options.allow_alt_instruments = true
78
+ end
79
+
80
+ prophet_tool.options.one_ata_time = false
81
+ prophet_tool.option_parser.on( '-F', '--one-ata-time', 'Create a separate pproph output file for each analysis' ) do
82
+ prophet_tool.options.one_ata_time = true
83
+ end
84
+
85
+
86
+ prophet_tool.option_parser.parse!
87
+
88
+ throw "When --output and -F options are set only one file at a time can be run" if ( ARGV.length> 1 ) && ( prophet_tool.explicit_output!=nil ) && (prophet_tool.one_ata_time!=nil)
89
+
90
+ # Obtain a global environment object
91
+ genv=Constants.new
92
+
93
+
94
+ # Interrogate all the input files to obtain the database and search engine from them
95
+ #
96
+ genv.log("Determining search engine and database used to create input files ...",:info)
97
+ file_info={}
98
+ ARGV.each {|file_name|
99
+ name=file_name.chomp
100
+
101
+ engine=prophet_tool.extract_engine(name)
102
+ db_path=prophet_tool.extract_db(name)
103
+
104
+
105
+ file_info[name]={:engine=>engine , :database=>db_path }
106
+ }
107
+
108
+ # Check that all searches were performed with the same engine and database
109
+ #
110
+ #
111
+ engine=nil
112
+ database=nil
113
+ inputs=file_info.collect do |info|
114
+ if ( engine==nil)
115
+ engine=info[1][:engine]
116
+ end
117
+ if ( database==nil)
118
+ database=info[1][:database]
119
+ end
120
+ throw "All files to be analyzed must have been searched with the same database and search engine" unless (info[1][:engine]==engine) && (info[1][:database])
121
+
122
+ retname= "#{prophet_tool.input_base_path(info[0],".pep.xml")}.pep.xml"
123
+ if ( info[0]=~/\.dat$/)
124
+ retname=info[0]
125
+ end
126
+
127
+ retname
128
+
129
+ end
130
+
131
+ def generate_command(genv,prophet_tool,inputs,output,database,engine)
132
+
133
+ cmd="#{genv.xinteract} -N#{output} -l7 -eT -D#{database} "
134
+
135
+ if prophet_tool.glyco
136
+ cmd << " -Og "
137
+ end
138
+
139
+ if prophet_tool.phospho
140
+ cmd << " -OH "
141
+ end
142
+
143
+ if prophet_tool.usepi
144
+ cmd << " -OI "
145
+ end
146
+
147
+ if prophet_tool.usert
148
+ cmd << " -OR "
149
+ end
150
+
151
+ if prophet_tool.accurate_mass
152
+ cmd << " -OA "
153
+ end
154
+
155
+ if prophet_tool.no_ntt
156
+ cmd << " -ON "
157
+ end
158
+
159
+ if prophet_tool.no_nmc
160
+ cmd << " -OM "
161
+ end
162
+
163
+ if prophet_tool.usegamma
164
+ cmd << " -OG "
165
+ end
166
+
167
+ if prophet_tool.use_only_expect
168
+ cmd << " -OE "
169
+ end
170
+
171
+ if prophet_tool.force_fit
172
+ cmd << " -OF "
173
+ end
174
+
175
+ if prophet_tool.allow_alt_instruments
176
+ cmd << " -Ow "
177
+ end
178
+
179
+ if prophet_tool.useicat
180
+ cmd << " -Oi "
181
+ end
182
+
183
+ if prophet_tool.nouseicat
184
+ cmd << " -Of"
185
+ end
186
+
187
+ if prophet_tool.maldi
188
+ cmd << " -I2 -T3 -I4 -I5 -I6 -I7 "
189
+ end
190
+
191
+ if engine=="omssa" || engine=="phenyx"
192
+ cmd << "-Op -P -ddecoy "
193
+ else
194
+ cmd << "-ddecoy "
195
+ end
196
+
197
+
198
+ if ( inputs.class==Array)
199
+ cmd << " #{inputs.join(" ")}"
200
+ else
201
+ cmd << " #{inputs}"
202
+ end
203
+
204
+ cmd
205
+ end
206
+
207
+ def run_peptide_prophet(genv,prophet_tool,cmd,output_path,engine)
208
+ if ( !prophet_tool.over_write && Pathname.new(output_path).exist? )
209
+ genv.log("Skipping analysis on existing file #{output_path}",:warn)
210
+ else
211
+ jobscript_path="#{output_path}.pbs.sh"
212
+ job_params={:jobid=>engine, :vmem=>"900mb", :queue => "lowmem"}
213
+ code=prophet_tool.run(cmd,genv,job_params,jobscript_path)
214
+ throw "Command failed with exit code #{code}" unless code==0
215
+ end
216
+ end
217
+
218
+
219
+ cmd=""
220
+ if ( prophet_tool.one_ata_time )
221
+ inputs.each { |input|
222
+
223
+ output_file_name="#{prophet_tool.output_prefix}#{input}_#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
224
+
225
+ cmd=generate_command(genv,prophet_tool,input,output_file_name,database,engine)
226
+
227
+ run_peptide_prophet(genv,prophet_tool,cmd,output_file_base_name,engine)
228
+
229
+
230
+ }
231
+ else
232
+ if (prophet_tool.explicit_output==nil)
233
+ output_file_name="#{prophet_tool.output_prefix}#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
234
+ else
235
+
236
+ output_file_name=prophet_tool.explicit_output
237
+
238
+ end
239
+ cmd=generate_command(genv,prophet_tool,inputs,output_file_name,database,engine)
240
+
241
+ run_peptide_prophet(genv,prophet_tool,cmd,output_file_name,engine)
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,78 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # Convert a pepXML file to a tab delimited table
7
+ #
8
+ #
9
+
10
+ require 'libxml'
11
+ require 'protk/constants'
12
+ require 'protk/command_runner'
13
+ require 'protk/tool'
14
+
15
+ include LibXML
16
+
17
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
18
+ #
19
+ tool=Tool.new({:explicit_output=>true})
20
+ tool.option_parser.banner = "Convert a pepXML file to a tab delimited table.\n\nUsage: pepxml_to_table.rb [options] file1.pep.xml"
21
+
22
+ tool.option_parser.parse!
23
+
24
+ # Obtain a global environment object
25
+ #genv=Constants.new
26
+
27
+ input_file=ARGV[0]
28
+
29
+ output_file="#{input_file}.txt"
30
+
31
+ output_file = tool.explicit_output if tool.explicit_output!=nil
32
+
33
+ output_fh=File.new("#{output_file}",'w')
34
+
35
+ output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\tpeptideprophet_prob\tinterprophet_prob\n"
36
+
37
+ pepxml_parser=XML::Parser.file("#{input_file}")
38
+ pepxml_doc=pepxml_parser.parse
39
+
40
+ spectrum_queries=pepxml_doc.find('//xmlns:spectrum_query','xmlns:http://regis-web.systemsbiology.net/pepXML')
41
+
42
+ spectrum_queries.each do |query|
43
+
44
+ retention_time=query.attributes['retention_time_sec']
45
+ neutral_mass=query.attributes['precursor_neutral_mass']
46
+ assumed_charge=query.attributes['assumed_charge']
47
+
48
+ top_search_hit=query.find('./xmlns:search_result/xmlns:search_hit','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
49
+ peptide=top_search_hit.attributes['peptide']
50
+ protein=top_search_hit.attributes['protein']
51
+ calc_neutral_pep_mass=top_search_hit.attributes['calc_neutral_pep_mass']
52
+ start_scan=query.attributes['start_scan']
53
+ end_scan=query.attributes['end_scan']
54
+
55
+ search_engine=""
56
+ search_score_names=top_search_hit.find('./xmlns:search_score/@name','xmlns:http://regis-web.systemsbiology.net/pepXML').collect {|s| s.to_s}
57
+
58
+ if ( search_score_names.length==2 && search_score_names.grep(/^name.*=.*pvalue/))
59
+ search_engine="omssa"
60
+ elsif ( search_score_names.grep(/^name.*=.*ionscore/))
61
+ search_engine="mascot"
62
+ elsif ( search_score_names.grep(/^name.*=.*hyperscore/) )
63
+ search_engine="x!tandem"
64
+ end
65
+
66
+ pp_result=top_search_hit.find('./xmlns:analysis_result/xmlns:peptideprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
67
+ ip_result=top_search_hit.find('./xmlns:analysis_result/xmlns:interprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
68
+
69
+ peptide_prophet_prob=""
70
+ interprophet_prob=""
71
+ peptide_prophet_prob=pp_result[0].value if ( pp_result.length>0 )
72
+ interprophet_prob=ip_result[0].value if ( ip_result.length>0)
73
+
74
+ output_fh.write "#{protein}\t#{peptide}\t#{assumed_charge}\t#{calc_neutral_pep_mass}\t#{neutral_mass}\t#{retention_time}\t#{start_scan}\t#{end_scan}\t#{search_engine}\t#{peptide_prophet_prob}\t#{interprophet_prob}\n"
75
+
76
+ end
77
+
78
+ output_fh.close
@@ -0,0 +1,140 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 17/1/2011
5
+ #
6
+ # Runs the Protein Prophet tool on a set of pep.xml files. Accepts input from peptide_prophet or interprophet.
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/prophet_tool'
13
+ require 'protk/galaxy_stager'
14
+ require 'protk/galaxy_util'
15
+
16
+ for_galaxy = GalaxyUtil.for_galaxy
17
+
18
+ if for_galaxy
19
+ # Stage files for galaxy
20
+ original_input_file = ARGV[0]
21
+ original_input_path = Pathname.new("#{original_input_file}")
22
+ input_stager = GalaxyStager.new("#{original_input_file}", :extension => '.pep.xml')
23
+ ARGV.push("-o")
24
+ ARGV.push("protein_prophet_results.prot.xml")
25
+ end
26
+
27
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
28
+ #
29
+ prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true})
30
+ prophet_tool.option_parser.banner = "Run ProteinProphet on a set of pep.xml input files.\n\nUsage: protein_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
31
+ prophet_tool.options.output_suffix="_protproph"
32
+
33
+ prophet_tool.options.iproph = false
34
+ prophet_tool.option_parser.on( '--iprophet-input',"Inputs are from iProphet" ) do
35
+ prophet_tool.options.iproph = true
36
+ end
37
+
38
+ prophet_tool.options.nooccam = false
39
+ prophet_tool.option_parser.on( '--no-occam',"Do not attempt to derive the simplest protein list explaining observed peptides" ) do
40
+ prophet_tool.options.nooccam = true
41
+ end
42
+
43
+ prophet_tool.options.groupwts = false
44
+ prophet_tool.option_parser.on( '--group-wts',"Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" ) do
45
+ prophet_tool.options.groupwts = true
46
+ end
47
+
48
+ prophet_tool.options.normprotlen = false
49
+ prophet_tool.option_parser.on( '--norm-protlen',"Normalize NSP using Protein Length" ) do
50
+ prophet_tool.options.normprotlen = true
51
+ end
52
+
53
+ prophet_tool.options.logprobs = false
54
+ prophet_tool.option_parser.on( '--log-prob',"Use the log of probability in the confidence calculations" ) do
55
+ prophet_tool.options.logprobs = true
56
+ end
57
+
58
+ prophet_tool.options.confem = false
59
+ prophet_tool.option_parser.on( '--confem',"Use the EM to compute probability given the confidence" ) do
60
+ prophet_tool.options.confem = true
61
+ end
62
+
63
+ prophet_tool.options.allpeps = false
64
+ prophet_tool.option_parser.on( '--allpeps',"Consider all possible peptides in the database in the confidence model" ) do
65
+ prophet_tool.options.allpeps = true
66
+ end
67
+
68
+ prophet_tool.options.unmapped = false
69
+ prophet_tool.option_parser.on( '--unmapped',"Report results for unmapped proteins" ) do
70
+ prophet_tool.options.unmapped = true
71
+ end
72
+
73
+ prophet_tool.options.instances = false
74
+ prophet_tool.option_parser.on( '--instances',"Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" ) do
75
+ prophet_tool.options.instances = true
76
+ end
77
+
78
+ prophet_tool.options.delude = false
79
+ prophet_tool.option_parser.on( '--delude',"Do NOT use peptide degeneracy information when assessing proteins" ) do
80
+ prophet_tool.options.delude = true
81
+ end
82
+
83
+ prophet_tool.options.minprob = 0.05
84
+ prophet_tool.option_parser.on( '--minprob mp',"Minimum peptide prophet probability for peptides to be considered" ) do |mp|
85
+ prophet_tool.options.minprob = mp
86
+ end
87
+
88
+ prophet_tool.options.minindep = 0
89
+ prophet_tool.option_parser.on( '--minindep mp',"Minimum percentage of independent peptides required for a protein" ) do |mp|
90
+ prophet_tool.options.minindep = mp
91
+ end
92
+
93
+ prophet_tool.option_parser.parse!
94
+
95
+
96
+ # Obtain a global environment object
97
+ genv=Constants.new
98
+
99
+ if ( prophet_tool.explicit_output==nil )
100
+ output_file="#{prophet_tool.output_prefix}interact#{prophet_tool.output_suffix}.prot.xml"
101
+ else
102
+ output_file=prophet_tool.explicit_output
103
+ end
104
+
105
+ p output_file
106
+
107
+ if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
108
+
109
+ cmd="#{genv.proteinprophet} "
110
+
111
+ inputs = ARGV.collect {|file_name|
112
+ file_name.chomp
113
+ }
114
+
115
+ cmd << " #{inputs.join(" ")} #{output_file}"
116
+
117
+ if ( prophet_tool.glyco )
118
+ cmd << " GLYC "
119
+ end
120
+
121
+ # Run the analysis
122
+ #
123
+ jobscript_path="#{output_file}.pbs.sh"
124
+ job_params={:jobid=>"protproph", :vmem=>"900mb", :queue => "lowmem"}
125
+ genv.log("Running #{cmd}",:info)
126
+ code = prophet_tool.run(cmd,genv,job_params,jobscript_path)
127
+ throw "Command failed with exit code #{code}" unless code==0
128
+ else
129
+ genv.log("Protein Prophet output file #{output_file} already exists. Run with -r option to replace",:warn)
130
+ end
131
+
132
+ if for_galaxy
133
+ # Restore references to peptide prophet xml so downstream tools like
134
+ # libra can find it.
135
+ input_stager.restore_references("protein_prophet_results.prot.xml")
136
+ end
137
+
138
+
139
+
140
+
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 20/10/2012
5
+ #
6
+ # Post-install setup for protk.
7
+ # Installs third party tools
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/setup_tool'
12
+ require 'yaml'
13
+ require 'pp'
14
+
15
+
16
+ # Setup specific command-line options for this tool. Other options are inherited from Tool
17
+ #
18
+ tool=SetupTool.new
19
+ if ( tool.option_parser.banner=="")
20
+ tool.option_parser.banner = "Post install tasks for protk.\nUsage: protk_setup.rb [options] toolname"
21
+ end
22
+
23
+ tool.option_parser.parse!
24
+
25
+ # Create install directory if it doesn't already exist
26
+ #
27
+ env=Constants.new
28
+
29
+ ARGV.each do |toolname|
30
+ tool.install toolname
31
+ end