protk 1.2.6.pre5 → 1.3.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # This file is part of MSLIMS
3
+ # This file is part of Protk
4
4
  # Created by Ira Cooke 12/4/2010
5
5
  #
6
6
  # Convert mascot dat files to pepxml. A wrapper for Mascot2XML
@@ -15,34 +15,21 @@ require 'protk/mascot_util'
15
15
  #
16
16
  genv=Constants.new
17
17
 
18
- tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme])
18
+ tool=SearchTool.new([
19
+ :database,
20
+ :explicit_output,
21
+ :over_write,
22
+ :enzyme])
23
+
19
24
  tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot_to_pepxml.rb [options] file1.dat file2.dat ... "
20
25
 
21
26
  tool.options.enzyme="trypsin"
22
27
 
23
- tool.options.shortid=false
24
- tool.option_parser.on( '--shortid', 'Use short protein id as per Mascot result (default uses full protein ids in fasta file)' ) do
25
- tool.options.shortid=true
26
- end
27
-
28
- tool.option_parser.parse!
29
-
30
- exit unless tool.check_options
31
-
32
- if ( ARGV[0].nil? )
33
- puts "You must supply an input file"
34
- puts tool.option_parser
35
- exit
36
- end
28
+ tool.add_boolean_option(:shortid,false,['--shortid', 'Use short protein id as per Mascot result (default uses full protein ids in fasta file)' ])
37
29
 
38
- current_db=""
30
+ exit unless tool.check_options(true,[:database])
39
31
 
40
- case
41
- when Pathname.new(tool.database).exist? # It's an explicitly named db
42
- current_db=Pathname.new(tool.database).realpath.to_s
43
- else
44
- current_db=tool.current_database :fasta
45
- end
32
+ database_path=tool.database_info.path
46
33
 
47
34
 
48
35
 
@@ -54,20 +41,20 @@ ARGV.each do |file_name|
54
41
  if ( tool.explicit_output==nil )
55
42
  new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
56
43
  cmd="cp #{name} #{new_basename}.dat"
57
- cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
44
+ cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{database_path} -E#{tool.enzyme}"
58
45
 
59
46
  cmd << " -shortid" if tool.shortid
60
47
 
61
48
  else #Mascot2XML doesn't support explicitly named output files so we move the file to an appropriate output filename after finishing
62
49
  new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
63
50
  cmd="cp #{name} #{new_basename}.dat"
64
- cmd << "; Mascot2XML #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
51
+ cmd << "; Mascot2XML #{new_basename}.dat -D#{database_path} -E#{tool.enzyme}"
65
52
  cmd << " -shortid" if tool.shortid
66
53
  cmd << "; mv #{new_basename}.pep.xml #{tool.explicit_output}; rm #{new_basename}.dat"
67
54
  repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
68
55
  cmd << "; #{repair_script} #{tool.explicit_output}"
69
56
  end
70
57
 
71
- code = tool.run(cmd,genv,nil,nil)
58
+ code = tool.run(cmd,genv)
72
59
  throw "Command #{cmd} failed with exit code #{code}" unless code==0
73
60
  end
@@ -18,8 +18,19 @@ input_stager = nil
18
18
 
19
19
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
20
20
  #
21
- search_tool=SearchTool.new([:background,:database,:explicit_output,:over_write,:enzyme,
22
- :modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:cleavage_semi])
21
+ search_tool=SearchTool.new([
22
+ :database,
23
+ :explicit_output,
24
+ :over_write,
25
+ :enzyme,
26
+ :modifications,
27
+ :methionine_oxidation,
28
+ :carbamidomethyl,
29
+ :glyco,
30
+ :acetyl_nterm,
31
+ :instrument,
32
+ :cleavage_semi,
33
+ :threads])
23
34
 
24
35
  search_tool.jobid_prefix="p"
25
36
  search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
@@ -28,73 +39,24 @@ search_tool.options.output_suffix="_msgfplus"
28
39
  search_tool.options.enzyme=1
29
40
  search_tool.options.instrument=0
30
41
 
31
- search_tool.options.no_pepxml=false
32
- search_tool.option_parser.on( '--no-pepxml', 'Dont convert results to pepxml. Keep native mzidentml format' ) do
33
- search_tool.options.no_pepxml=true
34
- end
35
-
36
- search_tool.options.isotope_error_range="0,1"
37
- search_tool.option_parser.on( '--isotope-error-range range', 'Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation.(Default 0,1)' ) do |range|
38
- search_tool.options.isotope_error_range=range
39
- end
40
-
41
- search_tool.options.fragment_method=0
42
- search_tool.option_parser.on( '--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor' ) do |method|
43
- search_tool.options.fragment_method=method
44
- end
45
-
46
- search_tool.options.protocol=0
47
- search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation, 2: iTRAQ, 3: iTRAQPhospho' ) do |p|
48
- search_tool.options.protocol=p
49
- end
50
-
51
- search_tool.options.min_pep_length=6
52
- search_tool.option_parser.on( '--min-pep-length p', 'Minimum peptide length to consider, Default: 6' ) do |p|
53
- search_tool.options.min_pep_length=p
54
- end
55
-
56
- search_tool.options.max_pep_length=40
57
- search_tool.option_parser.on( '--max-pep-length p', 'Maximum peptide length to consider, Default: 40' ) do |p|
58
- search_tool.options.max_pep_length=p
59
- end
60
-
61
- search_tool.options.min_pep_charge=2
62
- search_tool.option_parser.on( '--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2' ) do |c|
63
- search_tool.options.min_pep_charge=c
64
- end
65
-
66
- search_tool.options.max_pep_charge=3
67
- search_tool.option_parser.on( '--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3' ) do |c|
68
- search_tool.options.max_pep_charge=c
69
- end
70
-
71
- search_tool.options.num_reported_matches=1
72
- search_tool.option_parser.on( '--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1' ) do |n|
73
- search_tool.options.num_reported_matches=n
74
- end
75
-
76
- search_tool.options.add_features=false
77
- search_tool.option_parser.on( '--add-features', 'output additional features' ) do
78
- search_tool.options.add_features=true
79
- end
80
-
81
- search_tool.options.num_threads=nil
82
- search_tool.option_parser.on('--threads NumThreads','Number of processing threads to use') do |nt|
83
- search_tool.options.num_threads=nt
84
- end
85
-
86
- search_tool.options.java_mem="3500M"
87
- search_tool.option_parser.on('--java-mem mem','Java memory limit when running the search (Default 3.5Gb)') do |mem|
88
- search_tool.options.java_mem=mem
89
- end
42
+ # MS-GF+ doesnt support fragment tol so add this manually rather than via the SearchTool defaults
43
+ search_tool.add_value_option(:precursor_tol,"20",['-p','--precursor-ion-tol tol', 'Precursor ion mass tolerance.'])
44
+ search_tool.add_value_option(:precursor_tolu,"ppm",['--precursor-ion-tol-units tolu', 'Precursor ion mass tolerance units (ppm or Da). Default=ppm'])
45
+
46
+ search_tool.add_boolean_option(:pepxml,false,['--pepxml', 'Convert results to pepxml.'])
47
+ search_tool.add_value_option(:isotope_error_range,"0,1",['--isotope-error-range range', 'Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation.(Default 0,1)'])
48
+ search_tool.add_value_option(:fragment_method,0,['--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor'])
49
+ search_tool.add_boolean_option(:decoy_search,false,['--decoy-search', 'Build and search a decoy database on the fly. Input db should not contain decoys if this option is used'])
50
+ search_tool.add_value_option(:protocol,0,['--protocol p', '0: NoProtocol (Default), 1: Phosphorylation, 2: iTRAQ, 3: iTRAQPhospho'])
51
+ search_tool.add_value_option(:min_pep_length,6,['--min-pep-length p', 'Minimum peptide length to consider, Default: 6'])
52
+ search_tool.add_value_option(:max_pep_length,40,['--max-pep-length p', 'Maximum peptide length to consider, Default: 40'])
53
+ search_tool.add_value_option(:min_pep_charge,2,['--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2'])
54
+ search_tool.add_value_option(:max_pep_charge,3,['--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3'])
55
+ search_tool.add_value_option(:num_reported_matches,1,['--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1'])
56
+ search_tool.add_boolean_option(:add_features,false,['--add-features', 'output additional features'])
57
+ search_tool.add_value_option(:java_mem,"3500M",['--java-mem mem','Java memory limit when running the search (Default 3.5Gb)'])
90
58
 
91
- exit unless search_tool.check_options
92
-
93
- if ( ARGV[0].nil? )
94
- puts "You must supply an input file"
95
- puts search_tool.option_parser
96
- exit
97
- end
59
+ exit unless search_tool.check_options(true)
98
60
 
99
61
  # Environment with global constants
100
62
  #
@@ -104,30 +66,34 @@ genv=Constants.new
104
66
  #
105
67
  msgf_bin="#{genv.msgfplusjar}"
106
68
 
107
- throw "Could not find MSGFPlus.jar" if (msgf_bin==nil) || (msgf_bin.length==0)
69
+ # We need to cope with the fact that MSGFPlus.jar might not be executable so fall back to the protk predefined path
70
+
71
+ msgf_bin = "#{genv.msgfplus_root}/MSGFPlus.jar " if !msgf_bin
72
+
73
+ throw "Could not find MSGFPlus.jar" if !msgf_bin || (msgf_bin.length==0) || !File.exist?(msgf_bin)
108
74
 
109
75
  make_msgfdb_cmd=""
110
76
 
111
- case
112
- when Pathname.new(search_tool.database).exist? # It's an explicitly named db
113
- current_db=Pathname.new(search_tool.database).realpath.to_s
77
+ @output_suffix="_msgfplus"
78
+ @output_extension= search_tool.pepxml ? ".pep.xml" : ".mzid"
114
79
 
115
- # Must have fasta extension
116
- if ( Pathname.new(current_db).extname.to_s.downcase != ".fasta" )
117
- make_msgfdb_cmd << "ln -s #{current_db} #{current_db}.fasta;"
118
- current_db="#{current_db}.fasta"
119
- end
80
+ db_info=search_tool.database_info
120
81
 
121
- if(not FileTest.exists?("#{current_db}.canno"))
122
- dbdir = Pathname.new(current_db).dirname.realpath.to_s
123
- make_msgfdb_cmd << "cd #{dbdir}; java -Xmx3500M -cp #{genv.msgfplusjar} edu.ucsd.msjava.msdbsearch.BuildSA -d #{current_db} -tda 0; "
124
- end
125
- else
126
- current_db=search_tool.current_database :fasta
82
+ database_path=db_info.path
83
+
84
+ # Database must have fasta extension
85
+ if Pathname.new(database_path).extname.to_s.downcase != ".fasta"
86
+ make_msgfdb_cmd << "ln -s #{database_path} #{database_path}.fasta;"
87
+ database_path="#{database_path}.fasta"
88
+ db_info.path=database_path
127
89
  end
128
90
 
129
- fragment_tol = search_tool.fragment_tol
130
- precursor_tol = search_tool.precursor_tol
91
+ # Database must be indexed
92
+ unless FileTest.exists?("#{database_path}.canno")
93
+ dbdir = Pathname.new(database_path).dirname.realpath.to_s
94
+ tdavalue=search_tool.decoy_search ? 1 : 0;
95
+ make_msgfdb_cmd << "cd #{dbdir}; java -Xmx3500M -cp #{genv.msgfplusjar} edu.ucsd.msjava.msdbsearch.BuildSA -d #{database_path} -tda #{tdavalue}; "
96
+ end
131
97
 
132
98
 
133
99
  throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
@@ -139,17 +105,12 @@ ARGV.each do |filename|
139
105
  if ( search_tool.explicit_output!=nil)
140
106
  output_path=search_tool.explicit_output
141
107
  else
142
- output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
108
+ output_path=Tool.default_output_path(filename,@output_extension,search_tool.output_prefix,@output_suffix)
143
109
  end
144
110
 
145
111
 
146
- # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
147
- # Get the input file extension
148
- ext = Pathname.new(filename).extname
149
- input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
150
-
151
- mzid_output_path="#{search_tool.input_base_path(filename.chomp)}.mzid"
152
-
112
+ input_path=filename.chomp
113
+ mzid_output_path="#{output_path}.mzid"
153
114
 
154
115
  if for_galaxy
155
116
  original_input_file = input_path
@@ -166,12 +127,17 @@ ARGV.each do |filename|
166
127
 
167
128
  # The basic command
168
129
  #
169
- cmd= "#{make_msgfdb_cmd} java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{mzid_output_path} "
130
+ cmd= "#{make_msgfdb_cmd} java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{database_path} -s #{input_path} -o #{mzid_output_path} "
170
131
 
171
132
  #Semi tryptic peptides
172
133
  #
173
134
  cmd << " -ntt 1" if ( search_tool.cleavage_semi )
174
135
 
136
+ #Decoy searches
137
+ #
138
+ tdavalue=search_tool.decoy_search ? 1 : 0;
139
+ cmd << " -tda #{tdavalue}"
140
+
175
141
  # Precursor tolerance
176
142
  #
177
143
  cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
@@ -203,7 +169,7 @@ ARGV.each do |filename|
203
169
 
204
170
  # Num Threads
205
171
  #
206
- cmd << " -thread #{search_tool.num_threads}" if search_tool.num_threads
172
+ cmd << " -thread #{search_tool.threads}" if search_tool.threads > 0
207
173
 
208
174
  mods_file_content = ""
209
175
 
@@ -211,6 +177,9 @@ ARGV.each do |filename|
211
177
  #
212
178
  if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
213
179
  var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join("\n")
180
+ var_mods << "O1,M,opt,any,Oxidation\n" if search_tool.methionine_oxidation
181
+ var_mods << "C2H2O,*,opt,Prot-N-term,Acetyl\n" if search_tool.acetyl_nterm
182
+ var_mods << "H-1N-1O1,N,opt,any,Deamidated\n" if search_tool.glyco
214
183
  if ( var_mods !="" )
215
184
  mods_file_content << "#{var_mods}\n"
216
185
  end
@@ -220,13 +189,14 @@ ARGV.each do |filename|
220
189
  #
221
190
  if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
222
191
  fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join("\n")
192
+ fix_mods << "C2H3N1O1,C,opt,any,Carbamidomethyl\n" if search_tool.carbamidomethyl
223
193
  if ( fix_mods !="")
224
194
  mods_file_content << "#{fix_mods}"
225
195
  end
226
196
  end
227
197
 
228
198
  if ( mods_file_content != "")
229
- mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
199
+ mods_path="#{output_path}.msgfplus_mods.txt"
230
200
  mods_file=File.open(mods_path,'w+')
231
201
  mods_file.write "NumMods=2\n#{mods_file_content}"
232
202
  mods_file.close
@@ -234,15 +204,15 @@ ARGV.each do |filename|
234
204
  end
235
205
 
236
206
  # As a final part of the command we convert to pepxml
237
- if search_tool.no_pepxml
238
- cmd << "; cp #{mzid_output_path} #{output_path}"
239
- else
207
+ if search_tool.pepxml
240
208
  #if search_tool.explicit_output
241
209
  cmd << ";ruby -pi.bak -e \"gsub('post=\\\"?','post=\\\"X')\" #{mzid_output_path}"
242
210
  cmd << ";ruby -pi.bak -e \"gsub('pre=\\\"?','pre=\\\"X')\" #{mzid_output_path}"
243
211
  cmd << ";idconvert #{mzid_output_path} --pepXML -o #{Pathname.new(mzid_output_path).dirname}"
244
212
  #Then copy the pepxml to the final output path
245
213
  cmd << "; mv #{mzid_output_path.chomp('.mzid')}.pepXML #{output_path}"
214
+ else
215
+ cmd << "; mv #{mzid_output_path} #{output_path}"
246
216
  end
247
217
 
248
218
 
@@ -251,14 +221,7 @@ ARGV.each do |filename|
251
221
 
252
222
  # In case the user specified background running we need to create a jobscript path
253
223
  #
254
- jobscript_path="#{output_path}.pbs.sh"
255
-
256
- # Run the search
257
- #
258
- job_params= {:jobid => search_tool.jobid_from_filename(filename) }
259
- job_params[:queue]="seventytwo"
260
- job_params[:vmem]="70gb"
261
- code = search_tool.run(cmd,genv,job_params,jobscript_path)
224
+ code = search_tool.run(cmd,genv)
262
225
  throw "Command failed with exit code #{code}" unless code==0
263
226
 
264
227
  if for_galaxy
data/bin/omssa_search.rb CHANGED
@@ -16,56 +16,37 @@ for_galaxy = GalaxyUtil.for_galaxy?
16
16
 
17
17
  # Setup specific command-line options for this tool. Other options are inherited from SearchTool
18
18
  #
19
- search_tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme,
20
- :modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:missed_cleavages,
21
- :precursor_search_type,:respect_precursor_charges,:num_peaks_for_multi_isotope_search,:searched_ions
19
+ search_tool=SearchTool.new([
20
+ :database,
21
+ :explicit_output,
22
+ :over_write,
23
+ :enzyme,
24
+ :modifications,
25
+ :methionine_oxidation,
26
+ :carbamidomethyl,
27
+ :glyco,
28
+ :instrument,
29
+ :mass_tolerance_units,
30
+ :mass_tolerance,
31
+ :missed_cleavages,
32
+ :precursor_search_type,
33
+ :respect_precursor_charges,
34
+ :num_peaks_for_multi_isotope_search,
35
+ :searched_ions,
36
+ :threads
22
37
  ])
23
38
 
24
39
 
25
40
  search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
26
- search_tool.options.output_suffix="_omssa"
27
41
 
28
- search_tool.options.add_retention_times=true
29
- search_tool.option_parser.on( '-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times' ) do
30
- search_tool.options.add_retention_times=false
31
- end
42
+ search_tool.add_boolean_option(:add_retention_times,true,['-R', '--no-add-retention-times', 'Don\'t post process the output to add retention times'])
43
+ search_tool.add_value_option(:max_hit_expect,1,['--max-hit-expect exp', 'Expect values less than this are considered to be hits'])
44
+ search_tool.add_value_option(:intensity_cut_off,0.0005,['--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity'])
45
+ search_tool.add_value_option(:galaxy_index_dir,nil,['--galaxy-index-dir dir', 'Specify galaxy index directory, will search for mods file there.'])
46
+ search_tool.add_value_option(:omx_output,nil,['--omx-output path', 'Specify path for additional OMX output (optional).'])
47
+ search_tool.add_value_option(:logfile,nil,['--logfile path','Send OMSSA stdout to a logfile'])
32
48
 
33
- search_tool.options.max_hit_expect=1
34
- search_tool.option_parser.on( '--max-hit-expect exp', 'Expect values less than this are considered to be hits' ) do |exp|
35
- search_tool.options.max_hit_expect=exp
36
- end
37
-
38
- search_tool.options.intensity_cut_off=0.0005
39
- search_tool.option_parser.on( '--intensity-cut-off co', 'Peak intensity cut-off as a fraction of maximum peak intensity' ) do |co|
40
- search_tool.options.intensity_cut_off=co
41
- end
42
-
43
- search_tool.options.galaxy_index_dir=nil
44
- search_tool.option_parser.on( '--galaxy-index-dir dir', 'Specify galaxy index directory, will search for mods file there.' ) do |dir|
45
- search_tool.options.galaxy_index_dir=dir
46
- end
47
-
48
- search_tool.options.omx_output=nil
49
- search_tool.option_parser.on( '--omx-output path', 'Specify path for additional OMX output (optional).' ) do |path|
50
- search_tool.options.omx_output=path
51
- end
52
-
53
- if ( ENV['PROTK_OMSSA_NTHREADS'] )
54
- search_tool.options.nthreads=ENV['PROTK_OMSSA_NTHREADS']
55
- else
56
- search_tool.options.nthreads=0
57
- end
58
- search_tool.option_parser.on( '--nthreads num', 'Number of search threads to use. Default is to use the value in environment variable PROTK_OMSSA_NTHREADS or else to autodetect' ) do |num|
59
- search_tool.options.nthreads=num
60
- end
61
-
62
- exit unless search_tool.check_options
63
-
64
- if ( ARGV[0].nil? )
65
- puts "You must supply an input file"
66
- puts search_tool.option_parser
67
- exit
68
- end
49
+ exit unless search_tool.check_options(true)
69
50
 
70
51
  # Environment with global constants
71
52
  #
@@ -73,24 +54,18 @@ genv=Constants.new
73
54
 
74
55
  # Set search engine specific parameters on the SearchTool object
75
56
  #
76
- rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
77
57
  repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
78
58
 
79
59
  make_blastdb_cmd=""
60
+ @output_suffix="_omssa"
80
61
 
81
- case
82
- when Pathname.new(search_tool.database).exist? # It's an explicitly named db
83
- current_db=Pathname.new(search_tool.database).realpath.to_s
84
- if(not FileTest.exists?("#{current_db}.phr"))
85
- make_blastdb_cmd << "makeblastdb -dbtype prot -parse_seqids -in #{current_db}; "
86
- end
87
- else
88
- current_db=search_tool.current_database :fasta
89
- end
90
-
91
- fragment_tol = search_tool.fragment_tol
92
- precursor_tol = search_tool.precursor_tol
62
+ db_info = search_tool.database_info
93
63
 
64
+ # Index the DB if needed
65
+ #
66
+ unless File.exists?("#{db_info.path}.phr")
67
+ make_blastdb_cmd << "makeblastdb -dbtype prot -parse_seqids -in #{db_info.path}; "
68
+ end
94
69
 
95
70
  throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
96
71
 
@@ -101,18 +76,10 @@ ARGV.each do |filename|
101
76
  if ( search_tool.explicit_output!=nil)
102
77
  output_path=search_tool.explicit_output
103
78
  else
104
- output_path="#{search_tool.output_base_path(filename.chomp)}.pep.xml"
79
+ output_path=Tool.default_output_path(filename,".pep.xml",search_tool.output_prefix,@output_suffix)
105
80
  end
106
81
 
107
- # We always perform searches on mgf files so
108
- #
109
- input_path="#{search_tool.input_base_path(filename.chomp)}.mgf"
110
- input_ext=Pathname.new(filename).extname
111
-
112
- if ( input_ext==".dat" )
113
- # This is a file provided by galaxy so we need to leave the .dat extension
114
- input_path="#{search_tool.input_base_path(filename.chomp)}.dat"
115
- end
82
+ input_path=filename.chomp
116
83
 
117
84
 
118
85
  # Only proceed if the output file is not present or we have opted to over-write it
@@ -121,7 +88,7 @@ ARGV.each do |filename|
121
88
 
122
89
  # The basic command
123
90
  #
124
- cmd = "#{make_blastdb_cmd} omssacl -nt #{search_tool.nthreads} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
91
+ cmd = "#{make_blastdb_cmd} omssacl -nt #{search_tool.threads} -d #{db_info.path} -fm #{input_path} -op #{output_path} -w"
125
92
 
126
93
  #Missed cleavages
127
94
  #
@@ -157,7 +124,7 @@ ARGV.each do |filename|
157
124
 
158
125
  # Fragment ion tolerance
159
126
  #
160
- cmd << " -to #{fragment_tol}" #Always in Da
127
+ cmd << " -to #{search_tool.fragment_tol}" #Always in Da
161
128
 
162
129
  # Set the search type (monoisotopic vs average masses) and whether to use strict monoisotopic masses
163
130
  #
@@ -179,38 +146,24 @@ ARGV.each do |filename|
179
146
 
180
147
  # Variable Modifications
181
148
  #
182
- if ( search_tool.var_mods !="" && !(search_tool.var_mods =~/None/)) # Checking for none is to cope with galaxy input
183
- var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
184
-
185
- if ( var_mods !="" )
186
- cmd << " -mv #{var_mods}"
187
- end
188
- else
189
- # Add options related to peptide modifications
190
- #
191
- if ( search_tool.glyco )
192
- cmd << " -mv 119 "
193
- end
149
+ if ( search_tool.var_mods && !(search_tool.var_mods =~/None/)) # Checking for none is to cope with galaxy input
150
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
194
151
  end
195
152
 
196
- if ( search_tool.fix_mods !="" && !(search_tool.fix_mods=~/None/))
197
- fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
198
- if ( fix_mods !="")
199
- cmd << " -mf #{fix_mods}"
200
- end
201
- else
202
- if ( search_tool.has_modifications )
203
- cmd << " -mf "
204
- if ( search_tool.carbamidomethyl )
205
- cmd<<"3 "
206
- end
153
+ var_mods=[] unless var_mods
154
+ var_mods << "119" if search_tool.glyco
155
+ var_mods << "1" if search_tool.methionine_oxidation
207
156
 
208
- if ( search_tool.methionine_oxidation )
209
- cmd<<"1 "
210
- end
157
+ cmd << " -mv #{var_mods.join(",")}" if var_mods.length > 0
211
158
 
212
- end
159
+
160
+ if ( search_tool.fix_mods && !(search_tool.fix_mods=~/None/))
161
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
213
162
  end
163
+ fix_mods=[] unless fix_mods
164
+ fix_mods << ["3"] if search_tool.carbamidomethyl
165
+
166
+ cmd << " -mf #{fix_mods.join(",")}" if fix_mods.length > 0
214
167
 
215
168
  if ( search_tool.searched_ions !="" && !(search_tool.searched_ions=~/None/))
216
169
  searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
@@ -235,29 +188,19 @@ ARGV.each do |filename|
235
188
  cmd << " -ci #{search_tool.intensity_cut_off}"
236
189
 
237
190
  # Send output to logfile. OMSSA Logging does not play well with Ruby Open4
238
- cmd << " -logfile omssa.log"
191
+ cmd << " -logfile #{search_tool.logfile}" if search_tool.logfile
239
192
 
240
193
  # Up to here we've formulated the omssa command. The rest is cleanup
241
194
  p "Running:#{cmd}"
242
195
 
243
- # Add retention time corrections
244
- #
245
- if (search_tool.options.add_retention_times)
246
- # TODO: Really correct rts
247
- # cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
248
- end
249
196
 
250
197
  # Correct the pepXML file
251
198
  #
252
- # cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
253
- # genv.log("Running repair script command #{cmd}",:info)
199
+ cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
254
200
 
255
201
  # Run the search
256
202
  #
257
- job_params= {:jobid => search_tool.jobid_from_filename(filename) }
258
- job_params[:queue]="lowmem"
259
- job_params[:vmem]="900mb"
260
- search_tool.run(cmd,genv,job_params)
203
+ search_tool.run(cmd,genv)
261
204
 
262
205
 
263
206
  else