protk 1.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 14/12/2010
5
+ #
6
+ # Runs an MS/MS search using the OMSSA search engine
7
+ #
8
+ $VERBOSE=nil
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/search_tool'
13
+
14
+
15
+ # Setup specific command-line options for this tool. Other options are inherited from SearchTool
16
+ #
17
+ search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
18
+ search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
19
+ search_tool.options.output_suffix="_omssa"
20
+
21
+ search_tool.options.add_retention_times=true
22
+ search_tool.option_parser.on( '-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times' ) do
23
+ search_tool.options.add_retention_times=false
24
+ end
25
+
26
+ search_tool.options.max_hit_expect=1
27
+ search_tool.option_parser.on( '--max-hit-expect exp', 'Expect values less than this are considered to be hits' ) do |exp|
28
+ search_tool.options.max_hit_expect=exp
29
+ end
30
+
31
+ search_tool.options.intensity_cut_off=0.0005
32
+ search_tool.option_parser.on( '--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity' ) do |co|
33
+ search_tool.options.intensity_cut_off=co
34
+ end
35
+
36
+
37
+ search_tool.option_parser.parse!
38
+
39
+ # Environment with global constants
40
+ #
41
+ genv=Constants.new
42
+
43
+ # Set search engine specific parameters on the SearchTool object
44
+ #
45
+ rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
46
+ repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
47
+
48
+ case
49
+ when Pathname.new(search_tool.database).exist? # It's an explicitly named db
50
+ current_db=Pathname.new(search_tool.database).realpath.to_s
51
+ else
52
+ current_db=search_tool.current_database :fasta
53
+ end
54
+
55
+ fragment_tol = search_tool.fragment_tol
56
+ precursor_tol = search_tool.precursor_tol
57
+
58
+
59
+ throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
60
+
61
+ # Run the search engine on each input file
62
+ #
63
+ ARGV.each do |filename|
64
+
65
+ if ( search_tool.explicit_output!=nil)
66
+ output_path=search_tool.explicit_output
67
+ else
68
+ output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
69
+ end
70
+
71
+ # We always perform searches on mgf files so
72
+ #
73
+ input_path="#{search_tool.input_base_path(filename.chomp)}.mgf"
74
+ input_ext=Pathname.new(filename).extname
75
+
76
+ if ( input_ext==".dat" )
77
+ # This is a file provided by galaxy so we need to leave the .dat extension
78
+ input_path="#{search_tool.input_base_path(filename.chomp)}.dat"
79
+ end
80
+
81
+
82
+ # Only proceed if the output file is not present or we have opted to over-write it
83
+ #
84
+ if ( search_tool.over_write || !Pathname.new(output_path).exist? )
85
+
86
+ # The basic command
87
+ #
88
+ cmd= "#{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
89
+
90
+ #Missed cleavages
91
+ #
92
+ cmd << " -v #{search_tool.missed_cleavages}"
93
+
94
+ # Precursor tolerance
95
+ #
96
+ if ( search_tool.precursor_tolu=="ppm")
97
+ cmd << " -teppm"
98
+ end
99
+ cmd << " -te #{search_tool.precursor_tol}"
100
+
101
+ # Fragment ion tolerance
102
+ #
103
+ cmd << " -to #{fragment_tol}" #Always in Da
104
+
105
+ # Set the search type (monoisotopic vs average masses) and whether to use strict monoisotopic masses
106
+ #
107
+ if ( search_tool.precursor_search_type=="monoisotopic")
108
+ if ( search_tool.strict_monoisotopic_mass )
109
+ cmd << " -tem 0"
110
+ else
111
+ cmd << " -tem 4 -ti #{search_tool.num_peaks_for_multi_isotope_search}"
112
+ end
113
+ else
114
+ cmd << " -tem 1"
115
+ end
116
+
117
+ # Enzyme
118
+ #
119
+ if ( search_tool.enzyme!="Trypsin")
120
+ cmd << " -e #{search_tool.enzyme}"
121
+ end
122
+
123
+ # Variable Modifications
124
+ #
125
+ if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
126
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
127
+ if ( var_mods !="" )
128
+ cmd << " -mv #{var_mods}"
129
+ end
130
+ else
131
+ # Add options related to peptide modifications
132
+ #
133
+ if ( search_tool.glyco )
134
+ cmd << " -mv 119 "
135
+ end
136
+ end
137
+
138
+ if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
139
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
140
+ if ( fix_mods !="")
141
+ cmd << " -mf #{fix_mods}"
142
+ end
143
+ else
144
+ if ( search_tool.has_modifications )
145
+ cmd << " -mf "
146
+ if ( search_tool.carbamidomethyl )
147
+ cmd<<"3 "
148
+ end
149
+
150
+ if ( search_tool.methionine_oxidation )
151
+ cmd<<"1 "
152
+ end
153
+
154
+ end
155
+ end
156
+
157
+ if ( search_tool.searched_ions !="" && !search_tool.searched_ions=~/None/)
158
+ searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
159
+ if ( searched_ions!="")
160
+ cmd << " -i #{searched_ions}"
161
+ end
162
+
163
+ end
164
+
165
+ # Infer precursor charges or respect charges in input file
166
+ #
167
+ if ( search_tool.respect_precursor_charges )
168
+ cmd << " -zcc 1"
169
+ end
170
+
171
+
172
+ # Max expect
173
+ #
174
+ cmd << " -he #{search_tool.max_hit_expect}"
175
+
176
+ # Intensity cut-off
177
+ cmd << " -ci #{search_tool.intensity_cut_off}"
178
+
179
+ # Up to here we've formulated the omssa command. The rest is cleanup
180
+ p "Running:#{cmd}"
181
+
182
+ # Add retention time corrections
183
+ #
184
+ if (search_tool.options.add_retention_times)
185
+ cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
186
+ end
187
+
188
+ # Correct the pepXML file
189
+ #
190
+ cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
191
+ genv.log("Running repair script command #{cmd}",:info)
192
+
193
+ # Run the search
194
+ #
195
+ job_params= {:jobid => search_tool.jobid_from_filename(filename) }
196
+ job_params[:queue]="lowmem"
197
+ job_params[:vmem]="900mb"
198
+ search_tool.run(cmd,genv,job_params)
199
+
200
+
201
+ else
202
+ genv.log("Skipping search on existing file #{output_path}",:warn)
203
+ end
204
+
205
+ end
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # A wrapper for PeptideProphet
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/prophet_tool'
13
+
14
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
15
+ #
16
+ prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true,:maldi=>true})
17
+ prophet_tool.option_parser.banner = "Run PeptideProphet on a set of pep.xml input files.\n\nUsage: peptide_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
18
+ prophet_tool.options.output_suffix="_pproph"
19
+
20
+ prophet_tool.options.useicat = false
21
+ prophet_tool.option_parser.on( '--useicat',"Use icat information" ) do
22
+ prophet_tool.options.useicat = true
23
+ end
24
+
25
+ prophet_tool.options.nouseicat = false
26
+ prophet_tool.option_parser.on( '--no-useicat',"Do not use icat information" ) do
27
+ prophet_tool.options.nouseicat = true
28
+ end
29
+
30
+ prophet_tool.options.phospho = false
31
+ prophet_tool.option_parser.on( '--phospho',"Use phospho information" ) do
32
+ prophet_tool.options.phospho = true
33
+ end
34
+
35
+ prophet_tool.options.usepi = false
36
+ prophet_tool.option_parser.on( '--usepi',"Use pI information" ) do
37
+ prophet_tool.options.usepi = true
38
+ end
39
+
40
+ prophet_tool.options.usert = false
41
+ prophet_tool.option_parser.on( '--usert',"Use hydrophobicity / RT information" ) do
42
+ prophet_tool.options.usert = true
43
+ end
44
+
45
+ prophet_tool.options.accurate_mass = false
46
+ prophet_tool.option_parser.on( '--accurate-mass',"Use accurate mass binning" ) do
47
+ prophet_tool.options.accurate_mass = true
48
+ end
49
+
50
+ prophet_tool.options.no_ntt = false
51
+ prophet_tool.option_parser.on( '--no-ntt',"Don't use NTT model" ) do
52
+ prophet_tool.options.no_ntt = true
53
+ end
54
+
55
+ prophet_tool.options.no_nmc = false
56
+ prophet_tool.option_parser.on( '--no-nmc',"Don't use NMC model" ) do
57
+ prophet_tool.options.no_nmc = true
58
+ end
59
+
60
+ prophet_tool.options.usegamma = false
61
+ prophet_tool.option_parser.on( '--usegamma',"Use Gamma distribution to model the negatives" ) do
62
+ prophet_tool.options.usegamma = true
63
+ end
64
+
65
+ prophet_tool.options.use_only_expect = false
66
+ prophet_tool.option_parser.on( '--use-only-expect',"Only use Expect Score as the discriminant" ) do
67
+ prophet_tool.options.use_only_expect = true
68
+ end
69
+
70
+ prophet_tool.options.force_fit = false
71
+ prophet_tool.option_parser.on( '--force-fit',"Force fitting of mixture model and bypass checks" ) do
72
+ prophet_tool.options.force_fit = true
73
+ end
74
+
75
+ prophet_tool.options.allow_alt_instruments=false
76
+ prophet_tool.option_parser.on( '--allow-alt-instruments',"Warning instead of exit with error if instrument types between runs is different" ) do
77
+ prophet_tool.options.allow_alt_instruments = true
78
+ end
79
+
80
+ prophet_tool.options.one_ata_time = false
81
+ prophet_tool.option_parser.on( '-F', '--one-ata-time', 'Create a separate pproph output file for each analysis' ) do
82
+ prophet_tool.options.one_ata_time = true
83
+ end
84
+
85
+
86
+ prophet_tool.option_parser.parse!
87
+
88
+ throw "When --output and -F options are set only one file at a time can be run" if ( ARGV.length> 1 ) && ( prophet_tool.explicit_output!=nil ) && (prophet_tool.one_ata_time!=nil)
89
+
90
+ # Obtain a global environment object
91
+ genv=Constants.new
92
+
93
+
94
+ # Interrogate all the input files to obtain the database and search engine from them
95
+ #
96
+ genv.log("Determining search engine and database used to create input files ...",:info)
97
+ file_info={}
98
+ ARGV.each {|file_name|
99
+ name=file_name.chomp
100
+
101
+ engine=prophet_tool.extract_engine(name)
102
+ db_path=prophet_tool.extract_db(name)
103
+
104
+
105
+ file_info[name]={:engine=>engine , :database=>db_path }
106
+ }
107
+
108
+ # Check that all searches were performed with the same engine and database
109
+ #
110
+ #
111
+ engine=nil
112
+ database=nil
113
+ inputs=file_info.collect do |info|
114
+ if ( engine==nil)
115
+ engine=info[1][:engine]
116
+ end
117
+ if ( database==nil)
118
+ database=info[1][:database]
119
+ end
120
+ throw "All files to be analyzed must have been searched with the same database and search engine" unless (info[1][:engine]==engine) && (info[1][:database])
121
+
122
+ retname= "#{prophet_tool.input_base_path(info[0],".pep.xml")}.pep.xml"
123
+ if ( info[0]=~/\.dat$/)
124
+ retname=info[0]
125
+ end
126
+
127
+ retname
128
+
129
+ end
130
+
131
+ def generate_command(genv,prophet_tool,inputs,output,database,engine)
132
+
133
+ cmd="#{genv.xinteract} -N#{output} -l7 -eT -D#{database} "
134
+
135
+ if prophet_tool.glyco
136
+ cmd << " -Og "
137
+ end
138
+
139
+ if prophet_tool.phospho
140
+ cmd << " -OH "
141
+ end
142
+
143
+ if prophet_tool.usepi
144
+ cmd << " -OI "
145
+ end
146
+
147
+ if prophet_tool.usert
148
+ cmd << " -OR "
149
+ end
150
+
151
+ if prophet_tool.accurate_mass
152
+ cmd << " -OA "
153
+ end
154
+
155
+ if prophet_tool.no_ntt
156
+ cmd << " -ON "
157
+ end
158
+
159
+ if prophet_tool.no_nmc
160
+ cmd << " -OM "
161
+ end
162
+
163
+ if prophet_tool.usegamma
164
+ cmd << " -OG "
165
+ end
166
+
167
+ if prophet_tool.use_only_expect
168
+ cmd << " -OE "
169
+ end
170
+
171
+ if prophet_tool.force_fit
172
+ cmd << " -OF "
173
+ end
174
+
175
+ if prophet_tool.allow_alt_instruments
176
+ cmd << " -Ow "
177
+ end
178
+
179
+ if prophet_tool.useicat
180
+ cmd << " -Oi "
181
+ end
182
+
183
+ if prophet_tool.nouseicat
184
+ cmd << " -Of"
185
+ end
186
+
187
+ if prophet_tool.maldi
188
+ cmd << " -I2 -T3 -I4 -I5 -I6 -I7 "
189
+ end
190
+
191
+ if engine=="omssa" || engine=="phenyx"
192
+ cmd << "-Op -P -ddecoy "
193
+ else
194
+ cmd << "-ddecoy "
195
+ end
196
+
197
+
198
+ if ( inputs.class==Array)
199
+ cmd << " #{inputs.join(" ")}"
200
+ else
201
+ cmd << " #{inputs}"
202
+ end
203
+
204
+ cmd
205
+ end
206
+
207
+ def run_peptide_prophet(genv,prophet_tool,cmd,output_path,engine)
208
+ if ( !prophet_tool.over_write && Pathname.new(output_path).exist? )
209
+ genv.log("Skipping analysis on existing file #{output_path}",:warn)
210
+ else
211
+ jobscript_path="#{output_path}.pbs.sh"
212
+ job_params={:jobid=>engine, :vmem=>"900mb", :queue => "lowmem"}
213
+ code=prophet_tool.run(cmd,genv,job_params,jobscript_path)
214
+ throw "Command failed with exit code #{code}" unless code==0
215
+ end
216
+ end
217
+
218
+
219
+ cmd=""
220
+ if ( prophet_tool.one_ata_time )
221
+ inputs.each { |input|
222
+
223
+ output_file_name="#{prophet_tool.output_prefix}#{input}_#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
224
+
225
+ cmd=generate_command(genv,prophet_tool,input,output_file_name,database,engine)
226
+
227
+ run_peptide_prophet(genv,prophet_tool,cmd,output_file_base_name,engine)
228
+
229
+
230
+ }
231
+ else
232
+ if (prophet_tool.explicit_output==nil)
233
+ output_file_name="#{prophet_tool.output_prefix}#{engine}_interact#{prophet_tool.output_suffix}.pep.xml"
234
+ else
235
+
236
+ output_file_name=prophet_tool.explicit_output
237
+
238
+ end
239
+ cmd=generate_command(genv,prophet_tool,inputs,output_file_name,database,engine)
240
+
241
+ run_peptide_prophet(genv,prophet_tool,cmd,output_file_name,engine)
242
+
243
+ end
244
+
245
+
@@ -0,0 +1,78 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # Convert a pepXML file to a tab delimited table
7
+ #
8
+ #
9
+
10
+ require 'libxml'
11
+ require 'protk/constants'
12
+ require 'protk/command_runner'
13
+ require 'protk/tool'
14
+
15
+ include LibXML
16
+
17
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
18
+ #
19
+ tool=Tool.new({:explicit_output=>true})
20
+ tool.option_parser.banner = "Convert a pepXML file to a tab delimited table.\n\nUsage: pepxml_to_table.rb [options] file1.pep.xml"
21
+
22
+ tool.option_parser.parse!
23
+
24
+ # Obtain a global environment object
25
+ #genv=Constants.new
26
+
27
+ input_file=ARGV[0]
28
+
29
+ output_file="#{input_file}.txt"
30
+
31
+ output_file = tool.explicit_output if tool.explicit_output!=nil
32
+
33
+ output_fh=File.new("#{output_file}",'w')
34
+
35
+ output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\tpeptideprophet_prob\tinterprophet_prob\n"
36
+
37
+ pepxml_parser=XML::Parser.file("#{input_file}")
38
+ pepxml_doc=pepxml_parser.parse
39
+
40
+ spectrum_queries=pepxml_doc.find('//xmlns:spectrum_query','xmlns:http://regis-web.systemsbiology.net/pepXML')
41
+
42
+ spectrum_queries.each do |query|
43
+
44
+ retention_time=query.attributes['retention_time_sec']
45
+ neutral_mass=query.attributes['precursor_neutral_mass']
46
+ assumed_charge=query.attributes['assumed_charge']
47
+
48
+ top_search_hit=query.find('./xmlns:search_result/xmlns:search_hit','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
49
+ peptide=top_search_hit.attributes['peptide']
50
+ protein=top_search_hit.attributes['protein']
51
+ calc_neutral_pep_mass=top_search_hit.attributes['calc_neutral_pep_mass']
52
+ start_scan=query.attributes['start_scan']
53
+ end_scan=query.attributes['end_scan']
54
+
55
+ search_engine=""
56
+ search_score_names=top_search_hit.find('./xmlns:search_score/@name','xmlns:http://regis-web.systemsbiology.net/pepXML').collect {|s| s.to_s}
57
+
58
+ if ( search_score_names.length==2 && search_score_names.grep(/^name.*=.*pvalue/))
59
+ search_engine="omssa"
60
+ elsif ( search_score_names.grep(/^name.*=.*ionscore/))
61
+ search_engine="mascot"
62
+ elsif ( search_score_names.grep(/^name.*=.*hyperscore/) )
63
+ search_engine="x!tandem"
64
+ end
65
+
66
+ pp_result=top_search_hit.find('./xmlns:analysis_result/xmlns:peptideprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
67
+ ip_result=top_search_hit.find('./xmlns:analysis_result/xmlns:interprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
68
+
69
+ peptide_prophet_prob=""
70
+ interprophet_prob=""
71
+ peptide_prophet_prob=pp_result[0].value if ( pp_result.length>0 )
72
+ interprophet_prob=ip_result[0].value if ( ip_result.length>0)
73
+
74
+ output_fh.write "#{protein}\t#{peptide}\t#{assumed_charge}\t#{calc_neutral_pep_mass}\t#{neutral_mass}\t#{retention_time}\t#{start_scan}\t#{end_scan}\t#{search_engine}\t#{peptide_prophet_prob}\t#{interprophet_prob}\n"
75
+
76
+ end
77
+
78
+ output_fh.close
@@ -0,0 +1,140 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 17/1/2011
5
+ #
6
+ # Runs the Protein Prophet tool on a set of pep.xml files. Accepts input from peptide_prophet or interprophet.
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/prophet_tool'
13
+ require 'protk/galaxy_stager'
14
+ require 'protk/galaxy_util'
15
+
16
+ for_galaxy = GalaxyUtil.for_galaxy
17
+
18
+ if for_galaxy
19
+ # Stage files for galaxy
20
+ original_input_file = ARGV[0]
21
+ original_input_path = Pathname.new("#{original_input_file}")
22
+ input_stager = GalaxyStager.new("#{original_input_file}", :extension => '.pep.xml')
23
+ ARGV.push("-o")
24
+ ARGV.push("protein_prophet_results.prot.xml")
25
+ end
26
+
27
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
28
+ #
29
+ prophet_tool=ProphetTool.new({:glyco=>true,:explicit_output=>true})
30
+ prophet_tool.option_parser.banner = "Run ProteinProphet on a set of pep.xml input files.\n\nUsage: protein_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
31
+ prophet_tool.options.output_suffix="_protproph"
32
+
33
+ prophet_tool.options.iproph = false
34
+ prophet_tool.option_parser.on( '--iprophet-input',"Inputs are from iProphet" ) do
35
+ prophet_tool.options.iproph = true
36
+ end
37
+
38
+ prophet_tool.options.nooccam = false
39
+ prophet_tool.option_parser.on( '--no-occam',"Do not attempt to derive the simplest protein list explaining observed peptides" ) do
40
+ prophet_tool.options.nooccam = true
41
+ end
42
+
43
+ prophet_tool.options.groupwts = false
44
+ prophet_tool.option_parser.on( '--group-wts',"Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" ) do
45
+ prophet_tool.options.groupwts = true
46
+ end
47
+
48
+ prophet_tool.options.normprotlen = false
49
+ prophet_tool.option_parser.on( '--norm-protlen',"Normalize NSP using Protein Length" ) do
50
+ prophet_tool.options.normprotlen = true
51
+ end
52
+
53
+ prophet_tool.options.logprobs = false
54
+ prophet_tool.option_parser.on( '--log-prob',"Use the log of probability in the confidence calculations" ) do
55
+ prophet_tool.options.logprobs = true
56
+ end
57
+
58
+ prophet_tool.options.confem = false
59
+ prophet_tool.option_parser.on( '--confem',"Use the EM to compute probability given the confidence" ) do
60
+ prophet_tool.options.confem = true
61
+ end
62
+
63
+ prophet_tool.options.allpeps = false
64
+ prophet_tool.option_parser.on( '--allpeps',"Consider all possible peptides in the database in the confidence model" ) do
65
+ prophet_tool.options.allpeps = true
66
+ end
67
+
68
+ prophet_tool.options.unmapped = false
69
+ prophet_tool.option_parser.on( '--unmapped',"Report results for unmapped proteins" ) do
70
+ prophet_tool.options.unmapped = true
71
+ end
72
+
73
+ prophet_tool.options.instances = false
74
+ prophet_tool.option_parser.on( '--instances',"Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" ) do
75
+ prophet_tool.options.instances = true
76
+ end
77
+
78
+ prophet_tool.options.delude = false
79
+ prophet_tool.option_parser.on( '--delude',"Do NOT use peptide degeneracy information when assessing proteins" ) do
80
+ prophet_tool.options.delude = true
81
+ end
82
+
83
+ prophet_tool.options.minprob = 0.05
84
+ prophet_tool.option_parser.on( '--minprob mp',"Minimum peptide prophet probability for peptides to be considered" ) do |mp|
85
+ prophet_tool.options.minprob = mp
86
+ end
87
+
88
+ prophet_tool.options.minindep = 0
89
+ prophet_tool.option_parser.on( '--minindep mp',"Minimum percentage of independent peptides required for a protein" ) do |mp|
90
+ prophet_tool.options.minindep = mp
91
+ end
92
+
93
+ prophet_tool.option_parser.parse!
94
+
95
+
96
+ # Obtain a global environment object
97
+ genv=Constants.new
98
+
99
+ if ( prophet_tool.explicit_output==nil )
100
+ output_file="#{prophet_tool.output_prefix}interact#{prophet_tool.output_suffix}.prot.xml"
101
+ else
102
+ output_file=prophet_tool.explicit_output
103
+ end
104
+
105
+ p output_file
106
+
107
+ if ( !Pathname.new(output_file).exist? || prophet_tool.over_write )
108
+
109
+ cmd="#{genv.proteinprophet} "
110
+
111
+ inputs = ARGV.collect {|file_name|
112
+ file_name.chomp
113
+ }
114
+
115
+ cmd << " #{inputs.join(" ")} #{output_file}"
116
+
117
+ if ( prophet_tool.glyco )
118
+ cmd << " GLYC "
119
+ end
120
+
121
+ # Run the analysis
122
+ #
123
+ jobscript_path="#{output_file}.pbs.sh"
124
+ job_params={:jobid=>"protproph", :vmem=>"900mb", :queue => "lowmem"}
125
+ genv.log("Running #{cmd}",:info)
126
+ code = prophet_tool.run(cmd,genv,job_params,jobscript_path)
127
+ throw "Command failed with exit code #{code}" unless code==0
128
+ else
129
+ genv.log("Protein Prophet output file #{output_file} already exists. Run with -r option to replace",:warn)
130
+ end
131
+
132
+ if for_galaxy
133
+ # Restore references to peptide prophet xml so downstream tools like
134
+ # libra can find it.
135
+ input_stager.restore_references("protein_prophet_results.prot.xml")
136
+ end
137
+
138
+
139
+
140
+
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 20/10/2012
5
+ #
6
+ # Post-install setup for protk.
7
+ # Installs third party tools
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/setup_tool'
12
+ require 'yaml'
13
+ require 'pp'
14
+
15
+
16
+ # Setup specific command-line options for this tool. Other options are inherited from Tool
17
+ #
18
+ tool=SetupTool.new
19
+ if ( tool.option_parser.banner=="")
20
+ tool.option_parser.banner = "Post install tasks for protk.\nUsage: protk_setup.rb [options] toolname"
21
+ end
22
+
23
+ tool.option_parser.parse!
24
+
25
+ # Create install directory if it doesn't already exist
26
+ #
27
+ env=Constants.new
28
+
29
+ ARGV.each do |toolname|
30
+ tool.install toolname
31
+ end