protk 1.2.6.pre5 → 1.3.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # This file is part of MSLIMS
3
+ # This file is part of Protk
4
4
  # Created by Ira Cooke 12/4/2010
5
5
  #
6
6
  # Convert mascot dat files to pepxml. A wrapper for Mascot2XML
@@ -15,34 +15,21 @@ require 'protk/mascot_util'
15
15
  #
16
16
  genv=Constants.new
17
17
 
18
- tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme])
18
+ tool=SearchTool.new([
19
+ :database,
20
+ :explicit_output,
21
+ :over_write,
22
+ :enzyme])
23
+
19
24
  tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot_to_pepxml.rb [options] file1.dat file2.dat ... "
20
25
 
21
26
  tool.options.enzyme="trypsin"
22
27
 
23
- tool.options.shortid=false
24
- tool.option_parser.on( '--shortid', 'Use short protein id as per Mascot result (default uses full protein ids in fasta file)' ) do
25
- tool.options.shortid=true
26
- end
27
-
28
- tool.option_parser.parse!
29
-
30
- exit unless tool.check_options
31
-
32
- if ( ARGV[0].nil? )
33
- puts "You must supply an input file"
34
- puts tool.option_parser
35
- exit
36
- end
28
+ tool.add_boolean_option(:shortid,false,['--shortid', 'Use short protein id as per Mascot result (default uses full protein ids in fasta file)' ])
37
29
 
38
- current_db=""
30
+ exit unless tool.check_options(true,[:database])
39
31
 
40
- case
41
- when Pathname.new(tool.database).exist? # It's an explicitly named db
42
- current_db=Pathname.new(tool.database).realpath.to_s
43
- else
44
- current_db=tool.current_database :fasta
45
- end
32
+ database_path=tool.database_info.path
46
33
 
47
34
 
48
35
 
@@ -54,20 +41,20 @@ ARGV.each do |file_name|
54
41
  if ( tool.explicit_output==nil )
55
42
  new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
56
43
  cmd="cp #{name} #{new_basename}.dat"
57
- cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
44
+ cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{database_path} -E#{tool.enzyme}"
58
45
 
59
46
  cmd << " -shortid" if tool.shortid
60
47
 
61
48
  else #Mascot2XML doesn't support explicitly named output files so we move the file to an appropriate output filename after finishing
62
49
  new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
63
50
  cmd="cp #{name} #{new_basename}.dat"
64
- cmd << "; Mascot2XML #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
51
+ cmd << "; Mascot2XML #{new_basename}.dat -D#{database_path} -E#{tool.enzyme}"
65
52
  cmd << " -shortid" if tool.shortid
66
53
  cmd << "; mv #{new_basename}.pep.xml #{tool.explicit_output}; rm #{new_basename}.dat"
67
54
  repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
68
55
  cmd << "; #{repair_script} #{tool.explicit_output}"
69
56
  end
70
57
 
71
- code = tool.run(cmd,genv,nil,nil)
58
+ code = tool.run(cmd,genv)
72
59
  throw "Command #{cmd} failed with exit code #{code}" unless code==0
73
60
  end
@@ -18,8 +18,19 @@ input_stager = nil
18
18
 
19
19
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
20
20
  #
21
- search_tool=SearchTool.new([:background,:database,:explicit_output,:over_write,:enzyme,
22
- :modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:cleavage_semi])
21
+ search_tool=SearchTool.new([
22
+ :database,
23
+ :explicit_output,
24
+ :over_write,
25
+ :enzyme,
26
+ :modifications,
27
+ :methionine_oxidation,
28
+ :carbamidomethyl,
29
+ :glyco,
30
+ :acetyl_nterm,
31
+ :instrument,
32
+ :cleavage_semi,
33
+ :threads])
23
34
 
24
35
  search_tool.jobid_prefix="p"
25
36
  search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
@@ -28,73 +39,24 @@ search_tool.options.output_suffix="_msgfplus"
28
39
  search_tool.options.enzyme=1
29
40
  search_tool.options.instrument=0
30
41
 
31
- search_tool.options.no_pepxml=false
32
- search_tool.option_parser.on( '--no-pepxml', 'Dont convert results to pepxml. Keep native mzidentml format' ) do
33
- search_tool.options.no_pepxml=true
34
- end
35
-
36
- search_tool.options.isotope_error_range="0,1"
37
- search_tool.option_parser.on( '--isotope-error-range range', 'Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation.(Default 0,1)' ) do |range|
38
- search_tool.options.isotope_error_range=range
39
- end
40
-
41
- search_tool.options.fragment_method=0
42
- search_tool.option_parser.on( '--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor' ) do |method|
43
- search_tool.options.fragment_method=method
44
- end
45
-
46
- search_tool.options.protocol=0
47
- search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation, 2: iTRAQ, 3: iTRAQPhospho' ) do |p|
48
- search_tool.options.protocol=p
49
- end
50
-
51
- search_tool.options.min_pep_length=6
52
- search_tool.option_parser.on( '--min-pep-length p', 'Minimum peptide length to consider, Default: 6' ) do |p|
53
- search_tool.options.min_pep_length=p
54
- end
55
-
56
- search_tool.options.max_pep_length=40
57
- search_tool.option_parser.on( '--max-pep-length p', 'Maximum peptide length to consider, Default: 40' ) do |p|
58
- search_tool.options.max_pep_length=p
59
- end
60
-
61
- search_tool.options.min_pep_charge=2
62
- search_tool.option_parser.on( '--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2' ) do |c|
63
- search_tool.options.min_pep_charge=c
64
- end
65
-
66
- search_tool.options.max_pep_charge=3
67
- search_tool.option_parser.on( '--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3' ) do |c|
68
- search_tool.options.max_pep_charge=c
69
- end
70
-
71
- search_tool.options.num_reported_matches=1
72
- search_tool.option_parser.on( '--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1' ) do |n|
73
- search_tool.options.num_reported_matches=n
74
- end
75
-
76
- search_tool.options.add_features=false
77
- search_tool.option_parser.on( '--add-features', 'output additional features' ) do
78
- search_tool.options.add_features=true
79
- end
80
-
81
- search_tool.options.num_threads=nil
82
- search_tool.option_parser.on('--threads NumThreads','Number of processing threads to use') do |nt|
83
- search_tool.options.num_threads=nt
84
- end
85
-
86
- search_tool.options.java_mem="3500M"
87
- search_tool.option_parser.on('--java-mem mem','Java memory limit when running the search (Default 3.5Gb)') do |mem|
88
- search_tool.options.java_mem=mem
89
- end
42
+ # MS-GF+ doesnt support fragment tol so add this manually rather than via the SearchTool defaults
43
+ search_tool.add_value_option(:precursor_tol,"20",['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance.'])
44
+ search_tool.add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
45
+
46
+ search_tool.add_boolean_option(:pepxml,false,['--pepxml', 'Convert results to pepxml.'])
47
+ search_tool.add_value_option(:isotope_error_range,"0,1",['--isotope-error-range range', 'Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation.(Default 0,1)'])
48
+ search_tool.add_value_option(:fragment_method,0,['--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor'])
49
+ search_tool.add_boolean_option(:decoy_search,false,['--decoy-search', 'Build and search a decoy database on the fly. Input db should not contain decoys if this option is used'])
50
+ search_tool.add_value_option(:protocol,0,['--protocol p', '0: NoProtocol (Default), 1: Phosphorylation, 2: iTRAQ, 3: iTRAQPhospho'])
51
+ search_tool.add_value_option(:min_pep_length,6,['--min-pep-length p', 'Minimum peptide length to consider, Default: 6'])
52
+ search_tool.add_value_option(:max_pep_length,40,['--max-pep-length p', 'Maximum peptide length to consider, Default: 40'])
53
+ search_tool.add_value_option(:min_pep_charge,2,['--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2'])
54
+ search_tool.add_value_option(:max_pep_charge,3,['--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3'])
55
+ search_tool.add_value_option(:num_reported_matches,1,['--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1'])
56
+ search_tool.add_boolean_option(:add_features,false,['--add-features', 'output additional features'])
57
+ search_tool.add_value_option(:java_mem,"3500M",['--java-mem mem','Java memory limit when running the search (Default 3.5Gb)'])
90
58
 
91
- exit unless search_tool.check_options
92
-
93
- if ( ARGV[0].nil? )
94
- puts "You must supply an input file"
95
- puts search_tool.option_parser
96
- exit
97
- end
59
+ exit unless search_tool.check_options(true)
98
60
 
99
61
  # Environment with global constants
100
62
  #
@@ -104,30 +66,34 @@ genv=Constants.new
104
66
  #
105
67
  msgf_bin="#{genv.msgfplusjar}"
106
68
 
107
- throw "Could not find MSGFPlus.jar" if (msgf_bin==nil) || (msgf_bin.length==0)
69
+ # We need to cope with the fact that MSGFPlus.jar might not be executable so fall back to the protk predefined path
70
+
71
+ msgf_bin = "#{genv.msgfplus_root}/MSGFPlus.jar " if !msgf_bin
72
+
73
+ throw "Could not find MSGFPlus.jar" if !msgf_bin || (msgf_bin.length==0) || !File.exist?(msgf_bin)
108
74
 
109
75
  make_msgfdb_cmd=""
110
76
 
111
- case
112
- when Pathname.new(search_tool.database).exist? # It's an explicitly named db
113
- current_db=Pathname.new(search_tool.database).realpath.to_s
77
+ @output_suffix="_msgfplus"
78
+ @output_extension= search_tool.pepxml ? ".pep.xml" : ".mzid"
114
79
 
115
- # Must have fasta extension
116
- if ( Pathname.new(current_db).extname.to_s.downcase != ".fasta" )
117
- make_msgfdb_cmd << "ln -s #{current_db} #{current_db}.fasta;"
118
- current_db="#{current_db}.fasta"
119
- end
80
+ db_info=search_tool.database_info
120
81
 
121
- if(not FileTest.exists?("#{current_db}.canno"))
122
- dbdir = Pathname.new(current_db).dirname.realpath.to_s
123
- make_msgfdb_cmd << "cd #{dbdir}; java -Xmx3500M -cp #{genv.msgfplusjar} edu.ucsd.msjava.msdbsearch.BuildSA -d #{current_db} -tda 0; "
124
- end
125
- else
126
- current_db=search_tool.current_database :fasta
82
+ database_path=db_info.path
83
+
84
+ # Database must have fasta extension
85
+ if Pathname.new(database_path).extname.to_s.downcase != ".fasta"
86
+ make_msgfdb_cmd << "ln -s #{database_path} #{database_path}.fasta;"
87
+ database_path="#{database_path}.fasta"
88
+ db_info.path=database_path
127
89
  end
128
90
 
129
- fragment_tol = search_tool.fragment_tol
130
- precursor_tol = search_tool.precursor_tol
91
+ # Database must be indexed
92
+ unless FileTest.exists?("#{database_path}.canno")
93
+ dbdir = Pathname.new(database_path).dirname.realpath.to_s
94
+ tdavalue=search_tool.decoy_search ? 1 : 0;
95
+ make_msgfdb_cmd << "cd #{dbdir}; java -Xmx3500M -cp #{genv.msgfplusjar} edu.ucsd.msjava.msdbsearch.BuildSA -d #{database_path} -tda #{tdavalue}; "
96
+ end
131
97
 
132
98
 
133
99
  throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
@@ -139,17 +105,12 @@ ARGV.each do |filename|
139
105
  if ( search_tool.explicit_output!=nil)
140
106
  output_path=search_tool.explicit_output
141
107
  else
142
- output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
108
+ output_path=Tool.default_output_path(filename,@output_extension,search_tool.output_prefix,@output_suffix)
143
109
  end
144
110
 
145
111
 
146
- # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
147
- # Get the input file extension
148
- ext = Pathname.new(filename).extname
149
- input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
150
-
151
- mzid_output_path="#{search_tool.input_base_path(filename.chomp)}.mzid"
152
-
112
+ input_path=filename.chomp
113
+ mzid_output_path="#{output_path}.mzid"
153
114
 
154
115
  if for_galaxy
155
116
  original_input_file = input_path
@@ -166,12 +127,17 @@ ARGV.each do |filename|
166
127
 
167
128
  # The basic command
168
129
  #
169
- cmd= "#{make_msgfdb_cmd} java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{mzid_output_path} "
130
+ cmd= "#{make_msgfdb_cmd} java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{database_path} -s #{input_path} -o #{mzid_output_path} "
170
131
 
171
132
  #Semi tryptic peptides
172
133
  #
173
134
  cmd << " -ntt 1" if ( search_tool.cleavage_semi )
174
135
 
136
+ #Decoy searches
137
+ #
138
+ tdavalue=search_tool.decoy_search ? 1 : 0;
139
+ cmd << " -tda #{tdavalue}"
140
+
175
141
  # Precursor tolerance
176
142
  #
177
143
  cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
@@ -203,7 +169,7 @@ ARGV.each do |filename|
203
169
 
204
170
  # Num Threads
205
171
  #
206
- cmd << " -thread #{search_tool.num_threads}" if search_tool.num_threads
172
+ cmd << " -thread #{search_tool.threads}" if search_tool.threads > 0
207
173
 
208
174
  mods_file_content = ""
209
175
 
@@ -211,6 +177,9 @@ ARGV.each do |filename|
211
177
  #
212
178
  if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
213
179
  var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join("\n")
180
+ var_mods << "O1,M,opt,any,Oxidation\n" if search_tool.methionine_oxidation
181
+ var_mods << "C2H2O,*,opt,Prot-N-term,Acetyl\n" if search_tool.acetyl_nterm
182
+ var_mods << "H-1N-1O1,N,opt,any,Deamidated\n" if search_tool.glyco
214
183
  if ( var_mods !="" )
215
184
  mods_file_content << "#{var_mods}\n"
216
185
  end
@@ -220,13 +189,14 @@ ARGV.each do |filename|
220
189
  #
221
190
  if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
222
191
  fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join("\n")
192
+ fix_mods << "C2H3N1O1,C,opt,any,Carbamidomethyl\n" if search_tool.carbamidomethyl
223
193
  if ( fix_mods !="")
224
194
  mods_file_content << "#{fix_mods}"
225
195
  end
226
196
  end
227
197
 
228
198
  if ( mods_file_content != "")
229
- mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
199
+ mods_path="#{output_path}.msgfplus_mods.txt"
230
200
  mods_file=File.open(mods_path,'w+')
231
201
  mods_file.write "NumMods=2\n#{mods_file_content}"
232
202
  mods_file.close
@@ -234,15 +204,15 @@ ARGV.each do |filename|
234
204
  end
235
205
 
236
206
  # As a final part of the command we convert to pepxml
237
- if search_tool.no_pepxml
238
- cmd << "; cp #{mzid_output_path} #{output_path}"
239
- else
207
+ if search_tool.pepxml
240
208
  #if search_tool.explicit_output
241
209
  cmd << ";ruby -pi.bak -e \"gsub('post=\\\"?','post=\\\"X')\" #{mzid_output_path}"
242
210
  cmd << ";ruby -pi.bak -e \"gsub('pre=\\\"?','pre=\\\"X')\" #{mzid_output_path}"
243
211
  cmd << ";idconvert #{mzid_output_path} --pepXML -o #{Pathname.new(mzid_output_path).dirname}"
244
212
  #Then copy the pepxml to the final output path
245
213
  cmd << "; mv #{mzid_output_path.chomp('.mzid')}.pepXML #{output_path}"
214
+ else
215
+ cmd << "; mv #{mzid_output_path} #{output_path}"
246
216
  end
247
217
 
248
218
 
@@ -251,14 +221,7 @@ ARGV.each do |filename|
251
221
 
252
222
  # In case the user specified background running we need to create a jobscript path
253
223
  #
254
- jobscript_path="#{output_path}.pbs.sh"
255
-
256
- # Run the search
257
- #
258
- job_params= {:jobid => search_tool.jobid_from_filename(filename) }
259
- job_params[:queue]="seventytwo"
260
- job_params[:vmem]="70gb"
261
- code = search_tool.run(cmd,genv,job_params,jobscript_path)
224
+ code = search_tool.run(cmd,genv)
262
225
  throw "Command failed with exit code #{code}" unless code==0
263
226
 
264
227
  if for_galaxy
data/bin/omssa_search.rb CHANGED
@@ -16,56 +16,37 @@ for_galaxy = GalaxyUtil.for_galaxy?
16
16
 
17
17
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
18
18
  #
19
- search_tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme,
20
- :modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:missed_cleavages,
21
- :precursor_search_type,:respect_precursor_charges,:num_peaks_for_multi_isotope_search,:searched_ions
19
+ search_tool=SearchTool.new([
20
+ :database,
21
+ :explicit_output,
22
+ :over_write,
23
+ :enzyme,
24
+ :modifications,
25
+ :methionine_oxidation,
26
+ :carbamidomethyl,
27
+ :glyco,
28
+ :instrument,
29
+ :mass_tolerance_units,
30
+ :mass_tolerance,
31
+ :missed_cleavages,
32
+ :precursor_search_type,
33
+ :respect_precursor_charges,
34
+ :num_peaks_for_multi_isotope_search,
35
+ :searched_ions,
36
+ :threads
22
37
  ])
23
38
 
24
39
 
25
40
  search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
26
- search_tool.options.output_suffix="_omssa"
27
41
 
28
- search_tool.options.add_retention_times=true
29
- search_tool.option_parser.on( '-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times' ) do
30
- search_tool.options.add_retention_times=false
31
- end
42
+ search_tool.add_boolean_option(:add_retention_times,true,['-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times'])
43
+ search_tool.add_value_option(:max_hit_expect,1,['--max-hit-expect exp', 'Expect values less than this are considered to be hits'])
44
+ search_tool.add_value_option(:intensity_cut_off,0.0005,['--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity'])
45
+ search_tool.add_value_option(:galaxy_index_dir,nil,['--galaxy-index-dir dir', 'Specify galaxy index directory, will search for mods file there.'])
46
+ search_tool.add_value_option(:omx_output,nil,['--omx-output path', 'Specify path for additional OMX output (optional).'])
47
+ search_tool.add_value_option(:logfile,nil,['--logfile path','Send OMSSA stdout to a logfile'])
32
48
 
33
- search_tool.options.max_hit_expect=1
34
- search_tool.option_parser.on( '--max-hit-expect exp', 'Expect values less than this are considered to be hits' ) do |exp|
35
- search_tool.options.max_hit_expect=exp
36
- end
37
-
38
- search_tool.options.intensity_cut_off=0.0005
39
- search_tool.option_parser.on( '--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity' ) do |co|
40
- search_tool.options.intensity_cut_off=co
41
- end
42
-
43
- search_tool.options.galaxy_index_dir=nil
44
- search_tool.option_parser.on( '--galaxy-index-dir dir', 'Specify galaxy index directory, will search for mods file there.' ) do |dir|
45
- search_tool.options.galaxy_index_dir=dir
46
- end
47
-
48
- search_tool.options.omx_output=nil
49
- search_tool.option_parser.on( '--omx-output path', 'Specify path for additional OMX output (optional).' ) do |path|
50
- search_tool.options.omx_output=path
51
- end
52
-
53
- if ( ENV['PROTK_OMSSA_NTHREADS'] )
54
- search_tool.options.nthreads=ENV['PROTK_OMSSA_NTHREADS']
55
- else
56
- search_tool.options.nthreads=0
57
- end
58
- search_tool.option_parser.on( '--nthreads num', 'Number of search threads to use. Default is to use the value in environment variable PROTK_OMSSA_NTHREADS or else to autodetect' ) do |num|
59
- search_tool.options.nthreads=num
60
- end
61
-
62
- exit unless search_tool.check_options
63
-
64
- if ( ARGV[0].nil? )
65
- puts "You must supply an input file"
66
- puts search_tool.option_parser
67
- exit
68
- end
49
+ exit unless search_tool.check_options(true)
69
50
 
70
51
  # Environment with global constants
71
52
  #
@@ -73,24 +54,18 @@ genv=Constants.new
73
54
 
74
55
  # Set search engine specific parameters on the SearchTool object
75
56
  #
76
- rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
77
57
  repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
78
58
 
79
59
  make_blastdb_cmd=""
60
+ @output_suffix="_omssa"
80
61
 
81
- case
82
- when Pathname.new(search_tool.database).exist? # It's an explicitly named db
83
- current_db=Pathname.new(search_tool.database).realpath.to_s
84
- if(not FileTest.exists?("#{current_db}.phr"))
85
- make_blastdb_cmd << "makeblastdb -dbtype prot -parse_seqids -in #{current_db}; "
86
- end
87
- else
88
- current_db=search_tool.current_database :fasta
89
- end
90
-
91
- fragment_tol = search_tool.fragment_tol
92
- precursor_tol = search_tool.precursor_tol
62
+ db_info = search_tool.database_info
93
63
 
64
+ # Index the DB if needed
65
+ #
66
+ unless File.exists?("#{db_info.path}.phr")
67
+ make_blastdb_cmd << "makeblastdb -dbtype prot -parse_seqids -in #{db_info.path}; "
68
+ end
94
69
 
95
70
  throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
96
71
 
@@ -101,18 +76,10 @@ ARGV.each do |filename|
101
76
  if ( search_tool.explicit_output!=nil)
102
77
  output_path=search_tool.explicit_output
103
78
  else
104
- output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
79
+ output_path=Tool.default_output_path(filename,".pep.xml",search_tool.output_prefix,@output_suffix)
105
80
  end
106
81
 
107
- # We always perform searches on mgf files so
108
- #
109
- input_path="#{search_tool.input_base_path(filename.chomp)}.mgf"
110
- input_ext=Pathname.new(filename).extname
111
-
112
- if ( input_ext==".dat" )
113
- # This is a file provided by galaxy so we need to leave the .dat extension
114
- input_path="#{search_tool.input_base_path(filename.chomp)}.dat"
115
- end
82
+ input_path=filename.chomp
116
83
 
117
84
 
118
85
  # Only proceed if the output file is not present or we have opted to over-write it
@@ -121,7 +88,7 @@ ARGV.each do |filename|
121
88
 
122
89
  # The basic command
123
90
  #
124
- cmd = "#{make_blastdb_cmd} omssacl -nt #{search_tool.nthreads} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
91
+ cmd = "#{make_blastdb_cmd} omssacl -nt #{search_tool.threads} -d #{db_info.path} -fm #{input_path} -op #{output_path} -w"
125
92
 
126
93
  #Missed cleavages
127
94
  #
@@ -157,7 +124,7 @@ ARGV.each do |filename|
157
124
 
158
125
  # Fragment ion tolerance
159
126
  #
160
- cmd << " -to #{fragment_tol}" #Always in Da
127
+ cmd << " -to #{search_tool.fragment_tol}" #Always in Da
161
128
 
162
129
  # Set the search type (monoisotopic vs average masses) and whether to use strict monoisotopic masses
163
130
  #
@@ -179,38 +146,24 @@ ARGV.each do |filename|
179
146
 
180
147
  # Variable Modifications
181
148
  #
182
- if ( search_tool.var_mods !="" && !(search_tool.var_mods =~/None/)) # Checking for none is to cope with galaxy input
183
- var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
184
-
185
- if ( var_mods !="" )
186
- cmd << " -mv #{var_mods}"
187
- end
188
- else
189
- # Add options related to peptide modifications
190
- #
191
- if ( search_tool.glyco )
192
- cmd << " -mv 119 "
193
- end
149
+ if ( search_tool.var_mods && !(search_tool.var_mods =~/None/)) # Checking for none is to cope with galaxy input
150
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
194
151
  end
195
152
 
196
- if ( search_tool.fix_mods !="" && !(search_tool.fix_mods=~/None/))
197
- fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
198
- if ( fix_mods !="")
199
- cmd << " -mf #{fix_mods}"
200
- end
201
- else
202
- if ( search_tool.has_modifications )
203
- cmd << " -mf "
204
- if ( search_tool.carbamidomethyl )
205
- cmd<<"3 "
206
- end
153
+ var_mods=[] unless var_mods
154
+ var_mods << "119" if search_tool.glyco
155
+ var_mods << "1" if search_tool.methionine_oxidation
207
156
 
208
- if ( search_tool.methionine_oxidation )
209
- cmd<<"1 "
210
- end
157
+ cmd << " -mv #{var_mods.join(",")}" if var_mods.length > 0
211
158
 
212
- end
159
+
160
+ if ( search_tool.fix_mods && !(search_tool.fix_mods=~/None/))
161
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
213
162
  end
163
+ fix_mods=[] unless fix_mods
164
+ fix_mods << ["3"] if search_tool.carbamidomethyl
165
+
166
+ cmd << " -mf #{fix_mods.join(",")}" if fix_mods.length > 0
214
167
 
215
168
  if ( search_tool.searched_ions !="" && !(search_tool.searched_ions=~/None/))
216
169
  searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
@@ -235,29 +188,19 @@ ARGV.each do |filename|
235
188
  cmd << " -ci #{search_tool.intensity_cut_off}"
236
189
 
237
190
  # Send output to logfile. OMSSA Logging does not play well with Ruby Open4
238
- cmd << " -logfile omssa.log"
191
+ cmd << " -logfile #{search_tool.logfile}" if search_tool.logfile
239
192
 
240
193
  # Up to here we've formulated the omssa command. The rest is cleanup
241
194
  p "Running:#{cmd}"
242
195
 
243
- # Add retention time corrections
244
- #
245
- if (search_tool.options.add_retention_times)
246
- # TODO: Really correct rts
247
- # cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
248
- end
249
196
 
250
197
  # Correct the pepXML file
251
198
  #
252
- # cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
253
- # genv.log("Running repair script command #{cmd}",:info)
199
+ cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
254
200
 
255
201
  # Run the search
256
202
  #
257
- job_params= {:jobid => search_tool.jobid_from_filename(filename) }
258
- job_params[:queue]="lowmem"
259
- job_params[:vmem]="900mb"
260
- search_tool.run(cmd,genv,job_params)
203
+ search_tool.run(cmd,genv)
261
204
 
262
205
 
263
206
  else