protk 1.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/README.md +85 -0
  2. data/bin/annotate_ids.rb +59 -0
  3. data/bin/big_search.rb +41 -0
  4. data/bin/correct_omssa_retention_times.rb +27 -0
  5. data/bin/feature_finder.rb +76 -0
  6. data/bin/file_convert.rb +157 -0
  7. data/bin/generate_omssa_loc.rb +42 -0
  8. data/bin/interprophet.rb +91 -0
  9. data/bin/make_decoy.rb +64 -0
  10. data/bin/manage_db.rb +123 -0
  11. data/bin/mascot_search.rb +187 -0
  12. data/bin/mascot_to_pepxml.rb +44 -0
  13. data/bin/msgfplus_search.rb +191 -0
  14. data/bin/omssa_search.rb +205 -0
  15. data/bin/peptide_prophet.rb +245 -0
  16. data/bin/pepxml_to_table.rb +78 -0
  17. data/bin/protein_prophet.rb +140 -0
  18. data/bin/protk_setup.rb +31 -0
  19. data/bin/repair_run_summary.rb +113 -0
  20. data/bin/tandem_search.rb +292 -0
  21. data/bin/template_search.rb +144 -0
  22. data/bin/unimod_to_loc.rb +118 -0
  23. data/bin/xls_to_table.rb +46 -0
  24. data/ext/protk/extconf.rb +3 -0
  25. data/ext/protk/protk.c +235 -0
  26. data/lib/protk/big_search_rakefile.rake +16 -0
  27. data/lib/protk/big_search_tool.rb +23 -0
  28. data/lib/protk/bio_sptr_extensions.rb +210 -0
  29. data/lib/protk/biotools_excel_converter.rb +60 -0
  30. data/lib/protk/command_runner.rb +84 -0
  31. data/lib/protk/constants.rb +296 -0
  32. data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
  33. data/lib/protk/data/apt-get_packages.yaml +47 -0
  34. data/lib/protk/data/brew_packages.yaml +10 -0
  35. data/lib/protk/data/default_config.yml +20 -0
  36. data/lib/protk/data/predefined_db.crap.yaml +19 -0
  37. data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
  38. data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
  39. data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
  40. data/lib/protk/data/tandem_params.xml +56 -0
  41. data/lib/protk/data/taxonomy_template.xml +9 -0
  42. data/lib/protk/data/unimod.xml +16780 -0
  43. data/lib/protk/eupathdb_gene_information_table.rb +158 -0
  44. data/lib/protk/galaxy_stager.rb +24 -0
  45. data/lib/protk/galaxy_util.rb +9 -0
  46. data/lib/protk/manage_db_rakefile.rake +484 -0
  47. data/lib/protk/manage_db_tool.rb +181 -0
  48. data/lib/protk/mascot_util.rb +63 -0
  49. data/lib/protk/omssa_util.rb +57 -0
  50. data/lib/protk/plasmodb.rb +50 -0
  51. data/lib/protk/prophet_tool.rb +85 -0
  52. data/lib/protk/protein_annotator.rb +646 -0
  53. data/lib/protk/protxml.rb +137 -0
  54. data/lib/protk/randomize.rb +7 -0
  55. data/lib/protk/search_tool.rb +182 -0
  56. data/lib/protk/setup_rakefile.rake +245 -0
  57. data/lib/protk/setup_tool.rb +19 -0
  58. data/lib/protk/spreadsheet_extensions.rb +78 -0
  59. data/lib/protk/swissprot_database.rb +38 -0
  60. data/lib/protk/tool.rb +182 -0
  61. data/lib/protk/xtandem_defaults.rb +11 -0
  62. data/lib/protk.rb +18 -0
  63. metadata +256 -0
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 2/12/2011
5
+ #
6
+ # Repairs the msms_run_summary tag in a pepXML document to contain a specified file and datatype
7
+ # This tool should only be used on pepXML files that contain a single msms_run_summary (eg not interprophet results)
8
+ #
9
+
10
+
11
+ require 'protk/constants'
12
+ require 'protk/command_runner'
13
+ require 'protk/tool'
14
+ require 'libxml'
15
+
16
+ include LibXML
17
+
18
+ # Environment with global constants
19
+ #
20
+ genv=Constants.new
21
+
22
+
23
+ # Setup specific command-line options for this tool. Other options are inherited from Tool
24
+ #
25
+ tool=Tool.new()
26
+ tool.option_parser.banner = "Repair msms_run_summary tag in a pepXML file.\n\nUsage: repair_run_summary.rb [options] file1.pepXML"
27
+
28
+ tool.options.new_base_name=nil
29
+ tool.option_parser.on( '-N', '--base-name mzmlfile', 'Original MSMS spectrum file used for search' ) do |file|
30
+ tool.options.new_base_name = file
31
+ end
32
+
33
+ tool.options.raw_data_type=nil
34
+ tool.option_parser.on( '-R', '--raw-type type', 'Raw data type used for search' ) do |type|
35
+ tool.options.raw_data_type = type
36
+ end
37
+
38
+ tool.options.omssa_ion_tolerance=nil
39
+ tool.option_parser.on('--omssa-itol fitol','Add a fragment ion tolerance parameter to the omssa search summary') do |fitol|
40
+ tool.options.omssa_ion_tolerance=fitol
41
+ end
42
+
43
+ tool.option_parser.parse!
44
+
45
+ pepxml_file=ARGV[0]
46
+
47
+ # Read the input file
48
+ #
49
+ parser=XML::Parser.file(pepxml_file)
50
+ doc=parser.parse
51
+
52
+ new_base_name=tool.new_base_name
53
+ raw_data_type=tool.raw_data_type
54
+
55
+ genv.log("Repairing #{pepxml_file} to #{new_base_name} format #{raw_data_type}",:info)
56
+
57
+ if ( new_base_name==nil )
58
+ # Try X!Tandem first
59
+ # It would be parameter spectrum,path
60
+ #
61
+ spectrum_path = doc.find('//xmlns:msms_run_summary/xmlns:search_summary/xmlns:parameter[@name="spectrum, path"]','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
62
+ if ( spectrum_path!=nil)
63
+ new_base_name=spectrum_path.attributes['value']
64
+ raw_data_type="mzML" # Always is for X!Tandem
65
+ end
66
+ end
67
+
68
+ if ( new_base_name==nil )
69
+ # Try Mascot
70
+ # It would be parameter File path
71
+ #
72
+ #<parameter name="FILE" value="dataset_2.dat"/>
73
+ file_path = doc.find('//xmlns:msms_run_summary/xmlns:search_summary/xmlns:parameter[@name="FILE"]','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
74
+ if ( file_path!=nil)
75
+
76
+ run_summary=doc.find('//xmlns:msms_run_summary','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
77
+ old_base_name=run_summary.attributes['base_name']
78
+ base_dir_path=Pathname.new(old_base_name).dirname.to_s
79
+
80
+ new_base_name="#{base_dir_path}/#{file_path.attributes['value']}"
81
+ raw_data_type="mgf" # Always is for Mascot
82
+ end
83
+
84
+ end
85
+
86
+ throw "Could not find original spectrum filename in pepXML and none provided" unless new_base_name!=nil
87
+
88
+
89
+ run_summary=doc.find('//xmlns:msms_run_summary','xmlns:http://regis-web.systemsbiology.net/pepXML')
90
+ if ( run_summary[0]==nil)
91
+ # Try without namespace (OMSSA)
92
+ run_summary=doc.find('//msms_run_summary')
93
+
94
+ if ( tool.options.omssa_ion_tolerance !=nil)
95
+ search_summary=doc.find('//search_summary')[0]
96
+ p search_summary
97
+ pmnode=XML::Node.new('parameter')
98
+ pmnode["name"]="to"
99
+ pmnode["value"]=tool.options.omssa_ion_tolerance.to_s
100
+ search_summary << pmnode
101
+
102
+ end
103
+
104
+ raw_data_type="mgf"
105
+ end
106
+
107
+ throw "No run summary found" unless run_summary[0]!=nil
108
+
109
+ run_summary[0].attributes['base_name']=new_base_name
110
+ run_summary[0].attributes['raw_data']=raw_data_type
111
+
112
+
113
+ doc.save(pepxml_file)
@@ -0,0 +1,292 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 17/12/2010
5
+ #
6
+ # Runs an MS/MS search using the X!Tandem search engine
7
+ #
8
+
9
+ require 'protk/constants'
10
+ require 'protk/command_runner'
11
+ require 'protk/search_tool'
12
+ require 'protk/xtandem_defaults'
13
+ require 'libxml'
14
+
15
+ include LibXML
16
+
17
+ # Environment with global constants
18
+ #
19
+ genv=Constants.new
20
+
21
+ # Setup specific command-line options for this tool. Other options are inherited from SearchTool
22
+ #
23
+ search_tool=SearchTool.new({:msms_search=>true,:background=>true,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
24
+ search_tool.jobid_prefix="x"
25
+ search_tool.option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
26
+ search_tool.options.output_suffix="_tandem"
27
+
28
+ tandem_defaults=XTandemDefaults.new.path
29
+ search_tool.options.tandem_params=tandem_defaults
30
+ search_tool.option_parser.on( '-T', '--tandem-params tandem', 'XTandem parameters to use' ) do |parms|
31
+ search_tool.options.tandem_params = parms
32
+ end
33
+
34
+ search_tool.options.no_pepxml=false
35
+ search_tool.option_parser.on( '-P', '--no-pepxml', 'Dont convert to pepXML after running the search') do
36
+ search_tool.options.no_pepxml=true
37
+ end
38
+
39
+ search_tool.options.keep_params_files=false
40
+ search_tool.option_parser.on( '-K', '--keep-params-files', 'Keep X!Tandem parameter files' ) do
41
+ search_tool.options.keep_params_files = true
42
+ end
43
+
44
+ search_tool.option_parser.parse!
45
+
46
+
47
+ # Set search engine specific parameters on the SearchTool object
48
+ #
49
+ tandem_bin="#{genv.xtandem}"
50
+
51
+ throw "Could not find X!Tandem executable" unless FileTest.exists?(tandem_bin)
52
+
53
+ tandem_params=search_tool.tandem_params
54
+
55
+ case
56
+ when Pathname.new(search_tool.database).exist? # It's an explicitly named db
57
+ current_db=Pathname.new(search_tool.database).realpath.to_s
58
+ else
59
+ current_db=search_tool.current_database :fasta
60
+ end
61
+
62
+
63
+
64
+
65
+ # Parse options from a parameter file (if provided), or from the default parameter file
66
+ #
67
+ params_parser=XML::Parser.file(tandem_params)
68
+ std_params=params_parser.parse
69
+
70
+ # Parse taxonomy template file
71
+ #
72
+ taxo_parser=XML::Parser.file(XTandemDefaults.new.taxonomy_path)
73
+ taxo_doc=taxo_parser.parse
74
+
75
+ # Galaxy changes things like @ to __at__ we need to change it back
76
+ #
77
+ def decode_modification_string(mstring)
78
+ mstring.gsub!("__at__","@")
79
+ mstring.gsub!("__oc__","{")
80
+ mstring.gsub!("__cc__","}")
81
+ mstring.gsub!("__ob__","[")
82
+ mstring.gsub!("__cb__","]")
83
+ mstring
84
+ end
85
+
86
+ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
87
+
88
+
89
+ # Set the input and output paths
90
+ #
91
+ input_notes=std_params.find('/bioml/note[@type="input" and @label="spectrum, path"]')
92
+ throw "Exactly one spectrum, path note is required in the parameter file" unless input_notes.length==1
93
+ input_notes[0].content=input_path
94
+
95
+ output_notes=std_params.find('/bioml/note[@type="input" and @label="output, path"]')
96
+ throw "Exactly one output, path note is required in the parameter file" unless output_notes.length==1
97
+ output_notes[0].content=output_path
98
+
99
+ # Set the path to the scoring algorithm default params. We use one from ISB
100
+ #
101
+ scoring_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
102
+ throw "Exactly one list path, default parameters note is required in the parameter file" unless scoring_notes.length==1
103
+ scoring_notes[0].content="#{genv.tpp_root}/bin/isb_default_input_kscore.xml"
104
+
105
+ # Taxonomy and Database
106
+ #
107
+ db_notes=std_params.find('/bioml/note[@type="input" and @label="protein, taxon"]')
108
+ throw "Exactly one protein, taxon note is required in the parameter file" unless db_notes.length==1
109
+ db_notes[0].content=search_tool.database.downcase
110
+
111
+ taxo_notes=std_params.find('/bioml/note[@type="input" and @label="list path, taxonomy information"]')
112
+ throw "Exactly one list path, taxonomy information note is required in the parameter file" unless taxo_notes.length==1
113
+ taxo_notes[0].content=taxo_path
114
+
115
+ fragment_tol = search_tool.fragment_tol
116
+
117
+ fmass=std_params.find('/bioml/note[@type="input" and @label="spectrum, fragment monoisotopic mass error"]')
118
+ p fmass
119
+ throw "Exactly one spectrum, fragment monoisotopic mass error note is required in the parameter file" unless fmass.length==1
120
+ fmass[0].content=fragment_tol.to_s
121
+
122
+ precursor_tol = search_tool.precursor_tol
123
+ ptol_plus=precursor_tol*0.5
124
+ ptol_minus=precursor_tol*0.5
125
+
126
+ # Precursor mass matching
127
+ #
128
+ pmass_minus=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error minus"]')
129
+ throw "Exactly one spectrum, parent monoisotopic mass error minus note is required in the parameter file" unless pmass_minus.length==1
130
+ pmass_minus[0].content=ptol_minus.to_s
131
+
132
+ pmass_plus=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error plus"]')
133
+ throw "Exactly one spectrum, parent monoisotopic mass error plus note is required in the parameter file" unless pmass_plus.length==1
134
+ pmass_plus[0].content=ptol_plus.to_s
135
+
136
+ pmass_err_units=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error units"]')
137
+ throw "Exactly one spectrum, parent monoisotopic mass error units note is required in the parameter file. Got #{pmass_err_units.length}" unless pmass_err_units.length==1
138
+
139
+
140
+ pmass_err_units[0].content=search_tool.precursor_tolu
141
+
142
+ if search_tool.strict_monoisotopic_mass
143
+ isotopic_error=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass isotope error"]')
144
+ throw "Exactly one spectrum, parent monoisotopic mass isotope error is required in the parameter file" unless isotopic_error.length==1
145
+ isotopic_error[0].content="no"
146
+ end
147
+
148
+
149
+ # Fixed and Variable Modifications
150
+ #
151
+ unless search_tool.carbamidomethyl
152
+ mods=std_params.find('/bioml/note[@type="input" and @id="carbamidomethyl-fixed"]')
153
+ mods.each{ |node| node.remove!}
154
+ end
155
+
156
+ unless search_tool.glyco
157
+ mods=std_params.find('/bioml/note[@type="input" and @id="glyco-variable"]')
158
+ mods.each{ |node| node.remove!}
159
+ end
160
+
161
+ unless search_tool.methionine_oxidation
162
+ mods=std_params.find('/bioml/note[@type="input" and @id="methionine-oxidation-variable"]')
163
+ mods.each{ |node| node.remove!}
164
+ end
165
+
166
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
167
+ var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
168
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
169
+ fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
170
+
171
+ root_bioml_node=std_params.find('/bioml')[0]
172
+
173
+ mod_id=1
174
+ var_mods.each do |vm|
175
+
176
+ mod_type="potential modification mass"
177
+ mod_type = "potential modification motif" if ( vm=~/[\[\]\(\)\{\}\!]/ )
178
+ mod_id_label = "custom-variable-mod-#{mod_id.to_s}"
179
+ mod_id=mod_id+1
180
+ mnode=XML::Node.new('node')
181
+ mnode["id"]=mod_id_label
182
+ mnode["type"]="input"
183
+ mnode["label"]="residue, #{mod_type}"
184
+ mnode.content=vm
185
+
186
+ root_bioml_node << mnode
187
+ end
188
+
189
+ mod_id=1
190
+ fix_mods.each do |fm|
191
+ mod_type="modification mass"
192
+ mod_type = "modification motif" if ( fm=~/[\[\]\(\)\{\}\!]/ )
193
+ mod_id_label = "custom-fixed-mod-#{mod_id.to_s}"
194
+ mod_id=mod_id+1
195
+ mnode=XML::Node.new('node')
196
+ mnode["id"]=mod_id_label
197
+ mnode["type"]="input"
198
+ mnode["label"]="residue, #{mod_type}"
199
+ mnode.content=fm
200
+
201
+ root_bioml_node << mnode
202
+ end
203
+
204
+ #p root_bioml_node
205
+ std_params
206
+
207
+ end
208
+
209
+ def generate_taxonomy_doc(taxo_doc,current_db,search_tool)
210
+
211
+ taxon_label=taxo_doc.find('/bioml/taxon')
212
+ throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
213
+ taxon_label[0].attributes['label']=search_tool.database.downcase
214
+
215
+ db_file=taxo_doc.find('/bioml/taxon/file')
216
+ throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
217
+ db_file[0].attributes['URL']=current_db
218
+
219
+ taxo_doc
220
+ end
221
+
222
+ # Run the search engine on each input file
223
+ #
224
+ ARGV.each do |filename|
225
+
226
+ input_path=Pathname.new(filename.chomp).realpath.to_s
227
+ output_path="#{search_tool.output_base_path(filename.chomp)}.tandem"
228
+
229
+ if ( search_tool.explicit_output==nil )
230
+ pepxml_path="#{output_path.match(/(.*)\.tandem$/)[1]}.pep.xml"
231
+ else
232
+ pepxml_path=search_tool.explicit_output
233
+ end
234
+
235
+ output_exists=false
236
+ if ( !search_tool.no_pepxml && Pathname.new(pepxml_path).exist?)
237
+ output_exists=true
238
+ end
239
+
240
+ if ( search_tool.no_pepxml && Pathname.new(output_path).exist? )
241
+ output_exists=true
242
+ end
243
+
244
+ # Only proceed if the output file is not present or we have opted to over-write it
245
+ #
246
+ if ( search_tool.over_write || !output_exists )
247
+
248
+ # Create the taxonomy file in the same directory as the params file
249
+ #
250
+ taxo_path="#{search_tool.input_base_path(filename.chomp)}.taxonomy.xml"
251
+ mod_taxo_doc=generate_taxonomy_doc(taxo_doc,current_db,search_tool)
252
+ mod_taxo_doc.save(taxo_path)
253
+
254
+ # Modify the default XML document to contain search specific details and save it so it can be used in the search
255
+ #
256
+ mod_params=generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
257
+ params_path="#{search_tool.input_base_path(filename.chomp)}.tandem.params"
258
+ mod_params.save(params_path)
259
+
260
+ # The basic command
261
+ #
262
+ cmd= "#{tandem_bin} #{params_path}"
263
+
264
+ # pepXML conversion and repair
265
+ #
266
+ unless search_tool.no_pepxml
267
+ repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
268
+ cmd << "; #{genv.tandem2xml} #{output_path} #{pepxml_path}; #{repair_script} #{pepxml_path}; rm #{output_path}"
269
+ end
270
+
271
+ # Add a cleanup command unless the user wants to keep params files
272
+ #
273
+ unless search_tool.keep_params_files
274
+ cmd << "; rm #{params_path}; rm #{taxo_path}"
275
+ end
276
+
277
+ # In case the user specified background running we need to create a jobscript path
278
+ #
279
+ jobscript_path="#{output_path}.pbs.sh"
280
+
281
+ # Run the search
282
+ #
283
+ job_params= {:jobid => search_tool.jobid_from_filename(filename)}
284
+ job_params[:queue]="lowmem"
285
+ job_params[:vmem]="900mb"
286
+ code = search_tool.run(cmd,genv,job_params,jobscript_path)
287
+ throw "Command failed with exit code #{code}" unless code==0
288
+ else
289
+ genv.log("Skipping search on existing file #{output_path}",:warn)
290
+ end
291
+
292
+ end
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 14/12/2010
5
+ #
6
+ # Runs an MS/MS search using the MSGFPlus search engine
7
+ #
8
+ require 'protk/search_tool'
9
+
10
+
11
+ # Setup specific command-line options for this tool. Other options are inherited from SearchTool
12
+ #
13
+ search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
14
+ search_tool.option_parser.banner = "Run an msms search on a set of msms spectrum input files.\n\nUsage: template_search.rb [options] file1.mzML file2.mzML ..."
15
+ search_tool.options.output_suffix="_template"
16
+
17
+ search_tool.options.custom_option="default"
18
+ search_tool.option_parser.on('--custom-opt value','Custom option relevant to this tool only (Default default)') do |val|
19
+ search_tool.options.custom_option=val
20
+ end
21
+
22
+ search_tool.option_parser.parse!
23
+
24
+ # Set search engine specific parameters on the SearchTool object
25
+ #
26
+ msgf_bin="#{genv.msgf_bin}/MSGFPlus.jar"
27
+
28
+ case
29
+ when Pathname.new(search_tool.database).exist? # It's an explicitly named db
30
+ current_db=Pathname.new(search_tool.database).realpath.to_s
31
+ else
32
+ current_db=search_tool.current_database :fasta
33
+ end
34
+
35
+ fragment_tol = search_tool.fragment_tol
36
+ precursor_tol = search_tool.precursor_tol
37
+
38
+
39
+ throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
40
+
41
+ # Run the search engine on each input file
42
+ #
43
+ ARGV.each do |filename|
44
+
45
+ if ( search_tool.explicit_output!=nil)
46
+ output_path=search_tool.explicit_output
47
+ else
48
+ output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
49
+ end
50
+
51
+ # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
52
+ # Get the input file extension
53
+ ext = Pathname.new(filename).extname
54
+
55
+
56
+
57
+ input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
58
+
59
+ # Only proceed if the output file is not present or we have opted to over-write it
60
+ #
61
+ if ( search_tool.over_write || !Pathname.new(output_path).exist? )
62
+
63
+ # The basic command
64
+ #
65
+ cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
66
+
67
+ #Missed cleavages
68
+ #
69
+ throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
70
+ cmd << " -ntt #{search_tool.missed_cleavages}"
71
+
72
+ # Precursor tolerance
73
+ #
74
+ cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
75
+
76
+ # Instrument type
77
+ #
78
+ cmd << " -inst 2"
79
+
80
+ # cmd << " -m 4"
81
+
82
+ cmd << " -addFeatures 1"
83
+
84
+ # Enzyme
85
+ #
86
+ # if ( search_tool.enzyme!="Trypsin")
87
+ # cmd << " -e #{search_tool.enzyme}"
88
+ # end
89
+
90
+ mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
91
+ mods_file=File.open(mods_path,'w+')
92
+
93
+ # Variable Modifications
94
+ #
95
+ if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
96
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
97
+ if ( var_mods !="" )
98
+ cmd << " -mv #{var_mods}"
99
+ end
100
+ else
101
+ # Add options related to peptide modifications
102
+ #
103
+ if ( search_tool.glyco )
104
+ cmd << " -mv 119 "
105
+ end
106
+ end
107
+
108
+ # Fixed modifications
109
+ #
110
+ if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
111
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
112
+ if ( fix_mods !="")
113
+ cmd << " -mf #{fix_mods}"
114
+ end
115
+ else
116
+ if ( search_tool.has_modifications )
117
+ cmd << " -mf "
118
+ if ( search_tool.carbamidomethyl )
119
+ cmd<<"3 "
120
+ end
121
+
122
+ if ( search_tool.methionine_oxidation )
123
+ cmd<<"1 "
124
+ end
125
+
126
+ end
127
+ end
128
+
129
+ # Up to here we've formulated the omssa command. The rest is cleanup
130
+ p "Running:#{cmd}"
131
+
132
+ # Run the search
133
+ #
134
+ job_params= {:jobid => search_tool.jobid_from_filename(filename) }
135
+ job_params[:queue]="lowmem"
136
+ job_params[:vmem]="900mb"
137
+ search_tool.run(cmd,genv,job_params)
138
+
139
+
140
+ else
141
+ genv.log("Skipping search on existing file #{output_path}",:warn)
142
+ end
143
+
144
+ end
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of MSLIMS
4
+ # Created by Ira Cooke 12/4/2010
5
+ #
6
+ # Reads a unimod xml file (eg from a Mascot installation) and produces a loc file with names of allowable chemical modifications
7
+ #
8
+
9
+ require 'libxml'
10
+
11
+ include LibXML
12
+
13
+ unimod_file=ARGV[0]
14
+
15
+ unimod_file=XML::Parser.file(unimod_file)
16
+ unimod_doc=unimod_file.parse
17
+
18
+
19
+ all_mods=[]
20
+
21
+ umd = unimod_doc.find('//umod:unimod/umod:modifications/umod:mod')
22
+
23
+ umd.each { |mod|
24
+
25
+ # Special Cases
26
+ #
27
+ title=mod.attributes['title']
28
+ if ( title=="Oxidation" || title=="Phospho" || title=="Sulfo")
29
+ if ( title=="Oxidation")
30
+ all_mods.push("Oxidation (HW)")
31
+ all_mods.push("Oxidation (M)")
32
+ end
33
+
34
+ if ( title=="Phospho")
35
+ all_mods.push("Phospho (ST)")
36
+ all_mods.push("Phospho (Y)")
37
+ end
38
+
39
+ if ( title=="Sulfo")
40
+ all_mods.push("Sulfo (S)")
41
+ all_mods.push("Sulfo (T)")
42
+ all_mods.push("Sulfo (Y)")
43
+ end
44
+
45
+ else
46
+
47
+ # Deal with the anywhere sites which can be concatenated
48
+ #
49
+ if ( mod.attributes['title'] !~ /^iTRAQ/ && mod.attributes['title'] !~ /^mTRAQ/ )
50
+ anywhere_sites = mod.find('./umod:specificity[@hidden="0" and @position="Anywhere"]')
51
+ if ( anywhere_sites.length>0 )
52
+
53
+ sites=[]
54
+
55
+ anywhere_sites.each { |s|
56
+ sites.push("#{s.attributes['site']}")
57
+ }
58
+ sites.sort!
59
+ specificity="("
60
+ sites.each { |s| specificity<<s }
61
+ specificity<<")"
62
+
63
+ all_mods.push("#{mod.attributes['title']} #{specificity}")
64
+
65
+ end
66
+
67
+ else
68
+ anywhere_sites = mod.find('./umod:specificity[@hidden="0" and @position="Anywhere"]')
69
+ anywhere_sites.each { |s|
70
+ all_mods.push("#{mod.attributes['title']} (#{s.attributes['site']})")
71
+ }
72
+ end
73
+
74
+ specifics=mod.find('./umod:specificity[@hidden="0" and @position!="Anywhere"]')
75
+ if ( specifics.length > 0 )
76
+ specifics.each { |specific_mod|
77
+
78
+ specificity=specific_mod.attributes['site']
79
+ if ( specific_mod.attributes['position'] =~ /^Protein/)
80
+ specificity=specific_mod.attributes['position']
81
+ end
82
+
83
+ if ( (specific_mod.attributes['position'] =~ /Any N-term/) && (specific_mod.attributes['site'] =~ /^[CQEM]$/) )
84
+ specificity="N-term #{specific_mod.attributes['site']}"
85
+ end
86
+
87
+ if ( (specific_mod.attributes['position'] =~ /Any C-term/) && (specific_mod.attributes['site'] =~ /^[M]$/) )
88
+ specificity="C-term #{specific_mod.attributes['site']}"
89
+ end
90
+
91
+ all_mods.push("#{mod.attributes['title']} (#{specificity})")
92
+
93
+ }
94
+
95
+ end
96
+
97
+ end
98
+
99
+ }
100
+
101
+
102
+ all_mods=all_mods.sort {|a,b| a.downcase <=> b.downcase}
103
+
104
+ loc_output=File.new("mascot_mods.loc",'w')
105
+
106
+ loc_output << "#This file lists the names of chemical modifications acceptable for proteomics search engines\n"
107
+ loc_output << "#\n"
108
+ loc_output << "#So, unimod_names.loc could look something like this:\n"
109
+ loc_output << "#\n"
110
+
111
+ all_mods.each { |am|
112
+ key = am.downcase.gsub(" ","").gsub("\(","\_").gsub("\)","\_").gsub("\:","\_").gsub("\-\>","\_")
113
+ loc_output << "#{am}\t#{key}\t#{am}\t#{key}\n"
114
+ }
115
+
116
+ loc_output.close
117
+
118
+