protk 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,20 @@ search_tool.option_parser.on( '--galaxy-index-dir dir', 'Specify galaxy index di
40
40
  search_tool.options.galaxy_index_dir=dir
41
41
  end
42
42
 
43
+ search_tool.options.omx_output=nil
44
+ search_tool.option_parser.on( '--omx-output path', 'Specify path for additional OMX output (optional).' ) do |path|
45
+ search_tool.options.omx_output=path
46
+ end
47
+
48
+ if ( ENV['PROTK_OMSSA_NTHREADS'] )
49
+ search_tool.options.nthreads=ENV['PROTK_OMSSA_NTHREADS']
50
+ else
51
+ search_tool.options.nthreads=0
52
+ end
53
+ search_tool.option_parser.on( '--nthreads num', 'Number of search threads to use. Default is to use the value in environment variable PROTK_OMSSA_NTHREADS or else to autodetect' ) do |num|
54
+ search_tool.options.nthreads=num
55
+ end
56
+
43
57
  search_tool.option_parser.parse!
44
58
 
45
59
  # Environment with global constants
@@ -96,7 +110,7 @@ ARGV.each do |filename|
96
110
 
97
111
  # The basic command
98
112
  #
99
- cmd = "#{make_blastdb_cmd} #{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
113
+ cmd = "#{make_blastdb_cmd} #{genv.omssacl} -nt #{search_tool.nthreads} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
100
114
 
101
115
  #Missed cleavages
102
116
  #
@@ -118,6 +132,10 @@ ARGV.each do |filename|
118
132
  end
119
133
  end
120
134
 
135
+ if ( search_tool.omx_output )
136
+ cmd << " -ox #{search_tool.omx_output} "
137
+ end
138
+
121
139
 
122
140
  # Precursor tolerance
123
141
  #
@@ -150,8 +168,9 @@ ARGV.each do |filename|
150
168
 
151
169
  # Variable Modifications
152
170
  #
153
- if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
171
+ if ( search_tool.var_mods !="" && !(search_tool.var_mods =~/None/)) # Checking for none is to cope with galaxy input
154
172
  var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
173
+
155
174
  if ( var_mods !="" )
156
175
  cmd << " -mv #{var_mods}"
157
176
  end
@@ -163,7 +182,7 @@ ARGV.each do |filename|
163
182
  end
164
183
  end
165
184
 
166
- if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
185
+ if ( search_tool.fix_mods !="" && !(search_tool.fix_mods=~/None/))
167
186
  fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
168
187
  if ( fix_mods !="")
169
188
  cmd << " -mf #{fix_mods}"
@@ -182,7 +201,7 @@ ARGV.each do |filename|
182
201
  end
183
202
  end
184
203
 
185
- if ( search_tool.searched_ions !="" && !search_tool.searched_ions=~/None/)
204
+ if ( search_tool.searched_ions !="" && !(search_tool.searched_ions=~/None/))
186
205
  searched_ions=search_tool.searched_ions.split(",").collect{ |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
187
206
  if ( searched_ions!="")
188
207
  cmd << " -i #{searched_ions}"
@@ -204,19 +223,23 @@ ARGV.each do |filename|
204
223
  # Intensity cut-off
205
224
  cmd << " -ci #{search_tool.intensity_cut_off}"
206
225
 
226
+ # Send output to logfile. OMSSA Logging does not play well with Ruby Open4
227
+ cmd << " -logfile omssa.log"
228
+
207
229
  # Up to here we've formulated the omssa command. The rest is cleanup
208
230
  p "Running:#{cmd}"
209
231
 
210
232
  # Add retention time corrections
211
233
  #
212
234
  if (search_tool.options.add_retention_times)
213
- cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
235
+ # TODO: Really correct rts
236
+ # cmd << "; #{rt_correct_bin} #{output_path} #{input_path} "
214
237
  end
215
238
 
216
239
  # Correct the pepXML file
217
240
  #
218
- cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
219
- genv.log("Running repair script command #{cmd}",:info)
241
+ # cmd << "; #{repair_script_bin} -N #{input_path} -R mgf #{output_path} --omssa-itol #{search_tool.fragment_tol}"
242
+ # genv.log("Running repair script command #{cmd}",:info)
220
243
 
221
244
  # Run the search
222
245
  #
@@ -234,4 +257,4 @@ ARGV.each do |filename|
234
257
  #
235
258
  make_blastdb_cmd=""
236
259
 
237
- end
260
+ end
@@ -41,8 +41,5 @@ env=Constants.new
41
41
 
42
42
  toolname=ARGV.shift
43
43
 
44
- p ARGV
45
- p toolname
46
-
47
44
  p "Installing #{toolname}"
48
45
  tool.install toolname
@@ -40,12 +40,6 @@ file.each do |entry|
40
40
  position_start = position
41
41
  position_end = position_start + orf.length*3 -1
42
42
 
43
- if ( frame > 3)
44
- position_start = length - (position - 1)
45
- position_end = position_start - orf.length * 3 + 1
46
- end
47
-
48
-
49
43
  # Create accession compliant with NCBI naming standard
50
44
  # See http://www.ncbi.nlm.nih.gov/books/NBK7183/?rendertype=table&id=ch_demo.T5
51
45
  ncbi_scaffold_id = entry.entry_id.gsub('|','_').gsub(' ','_')
@@ -48,7 +48,18 @@ search_tool.option_parser.on( '--tandem-output tandem_output', 'Keep X! Tandem O
48
48
  end
49
49
 
50
50
  search_tool.options.thresholds_type = 'isb_kscore'
51
- search_tool.option_parser.on( '--thresholds-type thresholds_type', 'Threshold Type (tandem_default, isb_native, isb_kscore, scaffold)' ) do |thresholds_type|
51
+ search_tool.option_parser.on( '--thresholds-type thresholds_type', 'Threshold Type (tandem_default, isb_native, isb_kscore, scaffold, system_default)' ) do |thresholds_type|
52
+ # This options sets up various X! Tandem thresholds.
53
+ # - system_default: Don't change any defaults just use
54
+ # the defaults for this TPP install as is.
55
+ # - tandem_default: These thresholds are found on the
56
+ # tandem api page. http://www.thegpm.org/tandem/api/index.html
57
+ # - isb_native: These are the defaults found in
58
+ # isb_default_input_native.xml distributed with TPP 4.6.
59
+ # - isb_kscore: These are the defaults found in
60
+ # isb_default_input_kscore.xml distributed with TPP 4.6.
61
+ # - scaffold: These are the defaults recommend by Proteome Software
62
+ # for use with Scaffold.
52
63
  search_tool.options.thresholds_type = thresholds_type
53
64
  end
54
65
 
@@ -57,11 +68,12 @@ search_tool.option_parser.on( '--algorithm algorithm', "Scoring algorithm (kscor
57
68
  search_tool.options.algorithm = algorithm
58
69
  end
59
70
 
60
- search_tool.options.cleavage_semi = false
61
- search_tool.option_parser.on( '--cleavage-semi' ) do
62
- search_tool.options.cleavage_semi = true
71
+ search_tool.options.cleavage_semi = true
72
+ search_tool.option_parser.on( '--no-cleavage-semi' ) do
73
+ search_tool.options.cleavage_semi = false
63
74
  end
64
75
 
76
+
65
77
  search_tool.options.n_terminal_mod_mass=nil
66
78
  search_tool.option_parser.on('--n-terminal-mod-mass mass') do |mass|
67
79
  search_tool.options.n_terminal_mod_mass = mass
@@ -122,9 +134,16 @@ def decode_modification_string(mstring)
122
134
  mstring
123
135
  end
124
136
 
137
+ def set_option(std_params, tandem_key, value)
138
+ notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
139
+ throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
140
+ notes[0].content=value
141
+ end
142
+
125
143
  def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
126
-
127
-
144
+ set_option(std_params, "protein, cleavage semi", search_tool.cleavage_semi ? "yes" : "no")
145
+ set_option(std_params, "scoring, maximum missed cleavage sites", search_tool.missed_cleavages)
146
+
128
147
  # Set the input and output paths
129
148
  #
130
149
  input_notes=std_params.find('/bioml/note[@type="input" and @label="spectrum, path"]')
@@ -184,6 +203,78 @@ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_d
184
203
  isotopic_error[0].content="no"
185
204
  end
186
205
 
206
+ if search_tool.tandem_output
207
+ # If one is interested in the tandem output (e.g. for consumption by Scaffold)
208
+ # want to store additional information.
209
+ set_option(std_params, "output, spectra", "yes")
210
+ end
211
+
212
+ thresholds_type = search_tool.thresholds_type
213
+
214
+ if thresholds_type != "system_default"
215
+
216
+ maximum_valid_expectation_value = "0.1"
217
+ if thresholds_type == "scaffold"
218
+ maximum_valid_expectation_value = "1000"
219
+ end
220
+
221
+ minimum_ion_count = "4"
222
+ case thresholds_type
223
+ when "isb_kscore", "isb_native"
224
+ minimum_ion_count = "1"
225
+ when "scaffold"
226
+ minimum_ion_count = "0"
227
+ end
228
+
229
+ minimum_peaks = "15"
230
+ case thresholds_type
231
+ when "isb_native"
232
+ minimum_peaks = "6"
233
+ when "isb_kscore"
234
+ minimum_peaks = "10"
235
+ when "scaffold"
236
+ minimum_peaks = "0"
237
+ end
238
+
239
+ minimum_fragement_mz = "150"
240
+ case thresholds_type
241
+ when "isb_native"
242
+ minimum_fragement_mz = "50"
243
+ when "isb_kscore"
244
+ minimum_fragement_mz = "125"
245
+ when "scaffold"
246
+ minimum_fragement_mz = "0"
247
+ end
248
+
249
+ minimum_parent_mh = "500" # tandem and isb_native defaults
250
+ case thresholds_type
251
+ when "isb_kscore"
252
+ minimum_parent_mh = "600"
253
+ when "scaffold"
254
+ minimum_parent_mh = "0"
255
+ end
256
+
257
+ use_noise_suppression = "yes"
258
+ if thresholds_type == "isb_kscore" or thresholds_type == "scaffold"
259
+ use_noise_suppression = "no"
260
+ end
261
+
262
+ dynamic_range = "100.0"
263
+ case thresholds_type
264
+ when "isb_kscore"
265
+ dynamic_range = "10000.0"
266
+ when "scaffold"
267
+ dynamic_range = "1000.0"
268
+ end
269
+
270
+ set_option(std_params, "spectrum, dynamic range", dynamic_range)
271
+ set_option(std_params, "spectrum, use noise suppression", use_noise_suppression)
272
+ set_option(std_params, "spectrum, minimum parent m+h", minimum_parent_mh)
273
+ set_option(std_params, "spectrum, minimum fragment mz", minimum_fragement_mz)
274
+ set_option(std_params, "spectrum, minimum peaks", minimum_peaks)
275
+ set_option(std_params, "scoring, minimum ion count", minimum_ion_count)
276
+ set_option(std_params, "output, maximum valid expectation value", maximum_valid_expectation_value)
277
+ end
187
278
 
188
279
  # Fixed and Variable Modifications
189
280
  #
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 14/12/2010
5
+ #
6
+ # Runs an MS/MS search using the MSGFPlus search engine
7
+ #
8
+ require 'protk/search_tool'
9
+
10
+
11
+ # Setup specific command-line options for this tool. Other options are inherited from SearchTool
12
+ #
13
+ search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
14
+ search_tool.option_parser.banner = "Run an msms search on a set of msms spectrum input files.\n\nUsage: template_search.rb [options] file1.mzML file2.mzML ..."
15
+ search_tool.options.output_suffix="_template"
16
+
17
+ search_tool.options.custom_option="default"
18
+ search_tool.option_parser.on('--custom-opt value','Custom option relevant to this tool only (Default default)') do |val|
19
+ search_tool.options.custom_option=val
20
+ end
21
+
22
+ search_tool.option_parser.parse!
23
+
24
+ # Set search engine specific parameters on the SearchTool object
25
+ #
26
+ msgf_bin="#{genv.msgf_bin}/MSGFPlus.jar"
27
+
28
+ case
29
+ when Pathname.new(search_tool.database).exist? # It's an explicitly named db
30
+ current_db=Pathname.new(search_tool.database).realpath.to_s
31
+ else
32
+ current_db=search_tool.current_database :fasta
33
+ end
34
+
35
+ fragment_tol = search_tool.fragment_tol
36
+ precursor_tol = search_tool.precursor_tol
37
+
38
+
39
+ throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
40
+
41
+ # Run the search engine on each input file
42
+ #
43
+ ARGV.each do |filename|
44
+
45
+ if ( search_tool.explicit_output!=nil)
46
+ output_path=search_tool.explicit_output
47
+ else
48
+ output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
49
+ end
50
+
51
+ # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
52
+ # Get the input file extension
53
+ ext = Pathname.new(filename).extname
54
+
55
+
56
+
57
+ input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
58
+
59
+ # Only proceed if the output file is not present or we have opted to over-write it
60
+ #
61
+ if ( search_tool.over_write || !Pathname.new(output_path).exist? )
62
+
63
+ # The basic command
64
+ #
65
+ cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
66
+
67
+ #Missed cleavages
68
+ #
69
+ throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
70
+ cmd << " -ntt #{search_tool.missed_cleavages}"
71
+
72
+ # Precursor tolerance
73
+ #
74
+ cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
75
+
76
+ # Instrument type
77
+ #
78
+ cmd << " -inst 2"
79
+
80
+ # cmd << " -m 4"
81
+
82
+ cmd << " -addFeatures 1"
83
+
84
+ # Enzyme
85
+ #
86
+ # if ( search_tool.enzyme!="Trypsin")
87
+ # cmd << " -e #{search_tool.enzyme}"
88
+ # end
89
+
90
+ mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
91
+ mods_file=File.open(mods_path,'w+')
92
+
93
+ # Variable Modifications
94
+ #
95
+ if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
96
+ var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
97
+ if ( var_mods !="" )
98
+ cmd << " -mv #{var_mods}"
99
+ end
100
+ else
101
+ # Add options related to peptide modifications
102
+ #
103
+ if ( search_tool.glyco )
104
+ cmd << " -mv 119 "
105
+ end
106
+ end
107
+
108
+ # Fixed modifications
109
+ #
110
+ if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
111
+ fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
112
+ if ( fix_mods !="")
113
+ cmd << " -mf #{fix_mods}"
114
+ end
115
+ else
116
+ if ( search_tool.has_modifications )
117
+ cmd << " -mf "
118
+ if ( search_tool.carbamidomethyl )
119
+ cmd<<"3 "
120
+ end
121
+
122
+ if ( search_tool.methionine_oxidation )
123
+ cmd<<"1 "
124
+ end
125
+
126
+ end
127
+ end
128
+
129
+ # Up to here we've formulated the omssa command. The rest is cleanup
130
+ p "Running:#{cmd}"
131
+
132
+ # Run the search
133
+ #
134
+ job_params= {:jobid => search_tool.jobid_from_filename(filename) }
135
+ job_params[:queue]="lowmem"
136
+ job_params[:vmem]="900mb"
137
+ search_tool.run(cmd,genv,job_params)
138
+
139
+
140
+ else
141
+ genv.log("Skipping search on existing file #{output_path}",:warn)
142
+ end
143
+
144
+ end
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of Protk
4
+ # Created by Ira Cooke 24/3/2013
5
+ #
6
+ # Retrieve annotation information for proteins from the Uniprot Swissprot database
7
+ #
8
+ #
9
+ require 'protk/constants'
10
+ require 'protk/command_runner'
11
+ require 'protk/tool'
12
+ require 'protk/swissprot_database'
13
+ require 'protk/bio_sptr_extensions'
14
+
15
+
16
+ # Setup specific command-line options for this tool. Other options are inherited from Tool
17
+ #
18
+ tool=Tool.new({:explicit_output=>true})
19
+ tool.option_parser.banner = "Retrieve information from the Uniprot database given a list of ID's.\n\n\
20
+ Usage: uniprot_annotation.rb [options] input.tsv"
21
+
22
+ tool.options.id_column=1
23
+ tool.option_parser.on( '--id-column num', 'Specify a column for ids (default is column 1)' ) do |col|
24
+ tool.options.id_column=col.to_i
25
+ end
26
+
27
+ tool.options.fields=nil
28
+ tool.option_parser.on( '--fields flds', 'A comma separated list of fields to extract' ) do |flds|
29
+ tool.options.fields=flds
30
+ end
31
+
32
+ tool.option_parser.parse!
33
+
34
+ # Obtain a global environment object
35
+ genv=Constants.new
36
+
37
+ input_file=ARGV[0]
38
+
39
+ swissprotdb=SwissprotDatabase.new(genv)
40
+
41
+ output_file=nil
42
+
43
+ if ( tool.explicit_output==nil)
44
+ output_file=$stdout
45
+ else
46
+ output_file=File.open(tool.explicit_output,'w+')
47
+ end
48
+
49
+ ac_column = tool.id_column-1
50
+
51
+ db_fields = {
52
+ 'recname'=>"Primary Name",
53
+ 'cd'=>"CD Antigen Name",
54
+ 'altnames'=>"Alternate Names",
55
+ 'location' => "Subcellular Location",
56
+ 'function' => "Known Function",
57
+ 'similarity' => "Similarity",
58
+ 'tissues' => "Tissue Specificity",
59
+ 'disease' => "Disease Association",
60
+ 'domain' => "Domain",
61
+ 'subunit' => "Sub Unit",
62
+ 'nextbio' => "NextBio",
63
+ 'ipi' => "IPI",
64
+ 'intact' => "Interactions",
65
+ 'pride' => 'Pride',
66
+ 'ensembl'=> 'Ensembl',
67
+ 'num_transmem'=>"Transmembrane Regions",
68
+ 'signalp'=>'Signal Peptide',
69
+ 'ref_dump'=>'References',
70
+ 'tax_dump'=>'Taxonomy Cross Ref',
71
+ 'species_dump'=>'Species',
72
+ 'feature_dump'=>'Feature Table',
73
+ 'seq_dump' => 'AA Sequence'
74
+ }
75
+
76
+ hyperlink_fields = {
77
+ 'uniprot_link'=>"Uniprot Link",
78
+ 'nextbio_link'=>'NextBio Link',
79
+ 'intact_link'=>"Interactions Link",
80
+ 'pride_link'=>"Pride Link",
81
+ 'ensembl_link'=>"Ensembl Link"
82
+ }
83
+
84
+ if tool.fields !=nil
85
+ fields = tool.fields.split(",").collect { |f| f.lstrip.rstrip }.reject {|e| e.empty? }
86
+ db_fields = db_fields.select { |k| fields.include? k }
87
+ hyperlink_fields = hyperlink_fields.select { |k| fields.include? k}
88
+ end
89
+
90
+ output_file.write db_fields.values.join("\t")
91
+ if ( hyperlink_fields.count > 0 )
92
+ output_file.write("\t")
93
+ output_file.write hyperlink_fields.values.join("\t")
94
+ end
95
+ output_file.write("\n")
96
+
97
+ line_num=0
98
+ File.foreach(input_file) { |line|
99
+ input_cols=line.split("\t")
100
+ throw "Not enough columns in line #{line_num}" unless input_cols.count > ac_column
101
+ accession=input_cols[ac_column].chomp
102
+
103
+ sptr_entry=swissprotdb.get_entry_for_name(accession)
104
+
105
+ if ( sptr_entry==nil)
106
+ genv.log("No entry for #{accession} in uniprot database",:warn)
107
+ else
108
+
109
+ db_values = db_fields.collect { |key,value|
110
+ sptr_entry.send(key)
111
+ }
112
+
113
+ hyperlink_values = hyperlink_fields.collect { |key,value|
114
+ sptr_entry.send(key)
115
+ }
116
+
117
+ output_file.write db_values.join("\t")
118
+ if ( hyperlink_fields.count > 0 )
119
+ output_file.write("\t")
120
+ output_file.write hyperlink_values.join("\t")
121
+ end
122
+ output_file.write "\n"
123
+ end
124
+
125
+ line_num+=1
126
+
127
+ }
128
+
129
+
130
+