protk 1.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
data/bin/manage_db.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 13/3/2012
|
5
|
+
#
|
6
|
+
# Manage named protein databases
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/manage_db_tool'
|
12
|
+
require 'yaml'
|
13
|
+
require 'pp'
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
command=ARGV[0]
|
19
|
+
ARGV[0] = "--help" if ( command==nil || command=="-h" || command=="help")
|
20
|
+
tool=ManageDBTool.new(command)
|
21
|
+
if ( tool.option_parser.banner=="")
|
22
|
+
tool.option_parser.banner = "Manage named protein databases.\nUsage: manage_db.rb <command> [options] dbname\nCommands are: add remove update list help\nType manage_db <command> -h to get help on a specific command"
|
23
|
+
end
|
24
|
+
|
25
|
+
tool.option_parser.parse!
|
26
|
+
if ( ARGV[0]=="--help")
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
|
30
|
+
command=ARGV.shift
|
31
|
+
dbname=ARGV.shift
|
32
|
+
|
33
|
+
if ( dbname!=nil)
|
34
|
+
dbname=dbname.downcase
|
35
|
+
throw "Database name should contain no spaces" if ( dbname=~/\s/)
|
36
|
+
end
|
37
|
+
|
38
|
+
genv=Constants.new()
|
39
|
+
|
40
|
+
|
41
|
+
case command
|
42
|
+
when "add"
|
43
|
+
throw "Must specify a database name" if dbname==nil
|
44
|
+
throw "all is a reserved word and cannot be used as a database name" if ( dbname=="all")
|
45
|
+
throw "Database #{dbname} exists. Use --update-spec to overwrite." if genv.dbexist?(dbname) && !tool.update_spec
|
46
|
+
throw "Database #{dbname} cannot be updated because it doesn't exist" if !genv.dbexist?(dbname) && tool.update_spec
|
47
|
+
|
48
|
+
dbspec = tool.get_predefined_definition dbname
|
49
|
+
throw "#{dbname} is not a predefined database" if tool.predefined && dbspec=={}
|
50
|
+
|
51
|
+
genv.log("Adding new database #{dbname}",:info) if !tool.update_spec
|
52
|
+
genv.log("Updating spec for #{dbname}",:info) if tool.update_spec
|
53
|
+
|
54
|
+
if dbspec=={}
|
55
|
+
# Create the database specifiation dictionary (or make ammendments if a predefinition was used)
|
56
|
+
dbspec[:is_annotation_db]=tool.is_annotation_db
|
57
|
+
dbspec[:sources]=tool.sources
|
58
|
+
dbspec[:make_blast_index]= tool.make_blast_index
|
59
|
+
dbspec[:make_msgf_index]= tool.make_msgf_index
|
60
|
+
dbspec[:include_filters]= tool.is_annotation_db ? [] : tool.include_filters
|
61
|
+
dbspec[:id_regexes]= tool.is_annotation_db ? [] : tool.id_regexes
|
62
|
+
dbspec[:decoys]= tool.is_annotation_db ? false : tool.decoys
|
63
|
+
dbspec[:archive_old]= tool.is_annotation_db ? false : tool.archive_old
|
64
|
+
dbspec[:decoy_prefix]= tool.decoy_prefix
|
65
|
+
dbspec[:format] = tool.db_format
|
66
|
+
end
|
67
|
+
tool.add dbspec, dbname
|
68
|
+
|
69
|
+
when "update"
|
70
|
+
throw "Must specify a database name" if dbname==nil
|
71
|
+
if ( dbname=="all" )
|
72
|
+
dbnames=tool.all_database_names(genv)
|
73
|
+
else
|
74
|
+
dbnames=[dbname]
|
75
|
+
end
|
76
|
+
p dbnames
|
77
|
+
dbnames.each { |db|
|
78
|
+
throw "Database #{db} does not exist" if !genv.dbexist?(db)
|
79
|
+
dbdir="#{genv.protein_database_root}/#{db}"
|
80
|
+
throw "Could not find required spec file #{dbdir}/.protkdb.yaml" unless Pathname.new("#{dbdir}/.protkdb.yaml").exist?
|
81
|
+
runner=CommandRunner.new(genv)
|
82
|
+
runner.run_local("rake -f #{tool.rakefile_path} #{db} ")
|
83
|
+
}
|
84
|
+
when "list"
|
85
|
+
|
86
|
+
gw_file_handle=nil
|
87
|
+
if tool.galaxy_write
|
88
|
+
pepxml_loc = "#{genv.galaxy_root}/tool-data/pepxml_databases.loc"
|
89
|
+
if ( Pathname.new(pepxml_loc).exist? )
|
90
|
+
gw_file_handle=File.open(pepxml_loc,'w')
|
91
|
+
end
|
92
|
+
p "Warning: Could not find database loc file #{pepxml_loc}" unless Pathname.new(pepxml_loc).exist?
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
Dir.foreach(genv.protein_database_root) do |db_subdir|
|
97
|
+
db_specfile="#{genv.protein_database_root}/#{db_subdir}/.protkdb.yaml"
|
98
|
+
if ( Pathname.new(db_specfile).exist?)
|
99
|
+
spec=YAML.load_file(db_specfile)
|
100
|
+
case
|
101
|
+
when tool.galaxy || tool.galaxy_write
|
102
|
+
unless ( spec[:is_annotation_db] )
|
103
|
+
db_prettyname=db_subdir.gsub(/_/,' ').capitalize
|
104
|
+
loc_line= "#{db_prettyname}\t#{db_subdir}_\t#{db_subdir}\t#{db_subdir}_\n"
|
105
|
+
puts loc_line
|
106
|
+
if ( gw_file_handle )
|
107
|
+
gw_file_handle.write loc_line
|
108
|
+
end
|
109
|
+
end
|
110
|
+
when tool.verbose
|
111
|
+
puts "-- #{db_subdir} --\n"
|
112
|
+
PP.pp(spec)
|
113
|
+
puts "\n"
|
114
|
+
else
|
115
|
+
puts "#{db_subdir}\n"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
gw_file_handle.close if ( gw_file_handle)
|
121
|
+
|
122
|
+
end
|
123
|
+
|
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 14/12/2010
|
5
|
+
#
|
6
|
+
# Runs an MS/MS search using the Mascot search engine
|
7
|
+
#
|
8
|
+
|
9
|
+
$VERBOSE=nil
|
10
|
+
|
11
|
+
require 'protk/constants'
|
12
|
+
require 'protk/command_runner'
|
13
|
+
require 'protk/search_tool'
|
14
|
+
require 'rest_client'
|
15
|
+
|
16
|
+
# Environment with global constants
|
17
|
+
#
|
18
|
+
genv=Constants.new
|
19
|
+
|
20
|
+
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
21
|
+
#
|
22
|
+
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
23
|
+
search_tool.jobid_prefix="o"
|
24
|
+
|
25
|
+
search_tool.option_parser.banner = "Run a Mascot msms search on a set of mgf input files.\n\nUsage: mascot_search.rb [options] file1.mgf file2.mgf ..."
|
26
|
+
search_tool.options.output_suffix="_mascot"
|
27
|
+
|
28
|
+
search_tool.options.mascot_server="#{genv.default_mascot_server}/mascot/cgi/"
|
29
|
+
#search_tool.option_parser.on( '-P', '--http-proxy url', 'The url to a proxy server' ) do |url|
|
30
|
+
# search_tool.options.mascot_server=url
|
31
|
+
#end
|
32
|
+
|
33
|
+
#search_tool.options.http_proxy="http://squid.latrobe.edu.au:8080"
|
34
|
+
#search_tool.option_parser.on( '-P', '--http-proxy url', 'The url to a proxy server' ) do |url|
|
35
|
+
# search_tool.options.http_proxy=url
|
36
|
+
#end
|
37
|
+
|
38
|
+
search_tool.option_parser.parse!
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
# Set search engine specific parameters on the SearchTool object
|
45
|
+
#
|
46
|
+
fragment_tol = search_tool.fragment_tol
|
47
|
+
precursor_tol = search_tool.precursor_tol
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
mascot_cgi=search_tool.mascot_server
|
52
|
+
|
53
|
+
unless ( mascot_cgi =~ /^http:\/\//)
|
54
|
+
mascot_cgi = "http://#{mascot_cgi}"
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
RestClient.proxy=search_tool.http_proxy
|
59
|
+
|
60
|
+
genv.log("Var mods #{search_tool.var_mods} and fixed #{search_tool.fix_mods}",:info)
|
61
|
+
|
62
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
63
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
64
|
+
|
65
|
+
# None is given by a an empty galaxy multi-select list and we need to turn it into an empty string
|
66
|
+
#
|
67
|
+
var_mods="" if var_mods=="None"
|
68
|
+
fix_mods="" if fix_mods=="None"
|
69
|
+
|
70
|
+
postdict={}
|
71
|
+
|
72
|
+
# CHARGE
|
73
|
+
#
|
74
|
+
postdict[:CHARGE]=search_tool.allowed_charges
|
75
|
+
|
76
|
+
# CLE
|
77
|
+
#
|
78
|
+
postdict[:CLE]=search_tool.enzyme
|
79
|
+
|
80
|
+
# PFA
|
81
|
+
#
|
82
|
+
postdict[:PFA]=search_tool.missed_cleavages
|
83
|
+
|
84
|
+
# COM (Search title)
|
85
|
+
#
|
86
|
+
postdict[:COM]="Protk"
|
87
|
+
|
88
|
+
# DB (Database)
|
89
|
+
#
|
90
|
+
postdict[:DB]=search_tool.database
|
91
|
+
|
92
|
+
# INSTRUMENT
|
93
|
+
#
|
94
|
+
postdict[:INSTRUMENT]=search_tool.instrument
|
95
|
+
|
96
|
+
# IT_MODS (Variable Modifications)
|
97
|
+
#
|
98
|
+
postdict[:IT_MODS]=var_mods
|
99
|
+
|
100
|
+
# ITOL (Fragment ion tolerance)
|
101
|
+
#
|
102
|
+
postdict[:ITOL]=search_tool.fragment_tol
|
103
|
+
|
104
|
+
# ITOLU (Fragment ion tolerance units)
|
105
|
+
#
|
106
|
+
postdict[:ITOLU]=search_tool.fragment_tolu
|
107
|
+
|
108
|
+
# MASS (Monoisotopic and Average)
|
109
|
+
#
|
110
|
+
postdict[:MASS]=search_tool.precursor_search_type
|
111
|
+
|
112
|
+
# MODS (Fixed modifications)
|
113
|
+
#
|
114
|
+
postdict[:MODS]=fix_mods
|
115
|
+
|
116
|
+
# REPORT (What to include in the search report. For command-line searches this is pretty much irrelevant because we retrieve the entire results file anyway)
|
117
|
+
#
|
118
|
+
postdict[:REPORT]="AUTO"
|
119
|
+
|
120
|
+
# TAXONOMY (Blank because we don't allow taxonomy)
|
121
|
+
#
|
122
|
+
postdict[:TAXONOMY]="All entries"
|
123
|
+
|
124
|
+
# TOL (Precursor ion tolerance (Unit dependent))
|
125
|
+
#
|
126
|
+
postdict[:TOL]=search_tool.precursor_tol
|
127
|
+
|
128
|
+
# TOLU (Tolerance Units)
|
129
|
+
#
|
130
|
+
postdict[:TOLU]=search_tool.precursor_tolu
|
131
|
+
|
132
|
+
# Email
|
133
|
+
#
|
134
|
+
postdict[:USEREMAIL]=search_tool.email
|
135
|
+
|
136
|
+
# Username
|
137
|
+
#
|
138
|
+
postdict[:USERNAME]=search_tool.username
|
139
|
+
|
140
|
+
|
141
|
+
# FILE
|
142
|
+
#
|
143
|
+
postdict[:FILE]=File.new(ARGV[0])
|
144
|
+
|
145
|
+
postdict[:FORMVER]='1.01'
|
146
|
+
postdict[:INTERMEDIATE]=''
|
147
|
+
|
148
|
+
genv.log("Sending #{postdict}",:info)
|
149
|
+
|
150
|
+
postdict.each do |kv|
|
151
|
+
p "#{kv}|\n"
|
152
|
+
end
|
153
|
+
|
154
|
+
search_response=RestClient.post "#{mascot_cgi}/nph-mascot.exe?1", postdict
|
155
|
+
|
156
|
+
genv.log("Mascot search response was #{search_response}",:info)
|
157
|
+
|
158
|
+
# Look for an error if there is one
|
159
|
+
error_result= /Sorry, your search could not be performed(.*)/.match(search_response)
|
160
|
+
if ( error_result != nil )
|
161
|
+
p error_result[0]
|
162
|
+
genv.log("Mascot search failed with response #{search_response}",:warn)
|
163
|
+
throw "Mascot search failed with response #{search_response}"
|
164
|
+
else
|
165
|
+
|
166
|
+
# Search for the location of the mascot data file in the response
|
167
|
+
results=/master_results_?2?\.pl\?file=\.*\/data\/(.*)\/(.+\.dat)/.match(search_response)
|
168
|
+
results_date=results[1]
|
169
|
+
results_file=results[2]
|
170
|
+
|
171
|
+
|
172
|
+
get_url= "#{mascot_cgi}/../x-cgi/ms-status.exe?Autorefresh=false&Show=RESULTFILE&DateDir=#{results_date}&ResJob=#{results_file}"
|
173
|
+
|
174
|
+
if ( search_tool.explicit_output!=nil)
|
175
|
+
output_path=search_tool.explicit_output
|
176
|
+
else
|
177
|
+
output_path="#{results_file}"
|
178
|
+
end
|
179
|
+
|
180
|
+
# Download the results
|
181
|
+
#
|
182
|
+
require 'open-uri'
|
183
|
+
open("#{output_path}", 'wb') do |file|
|
184
|
+
file << open("#{get_url}").read
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of MSLIMS
|
4
|
+
# Created by Ira Cooke 12/4/2010
|
5
|
+
#
|
6
|
+
# Convert mascot dat files to pepxml. A wrapper for Mascot2XML
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
require 'protk/mascot_util'
|
13
|
+
|
14
|
+
# Environment with global constants
|
15
|
+
#
|
16
|
+
genv=Constants.new
|
17
|
+
|
18
|
+
tool=SearchTool.new({:database=>true,:explicit_output=>true,:over_write=>true})
|
19
|
+
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot_to_pepxml.rb [options] file1.dat file2.dat ... "
|
20
|
+
tool.option_parser.parse!
|
21
|
+
|
22
|
+
|
23
|
+
ARGV.each do |file_name|
|
24
|
+
name=file_name.chomp
|
25
|
+
|
26
|
+
this_dir=Pathname.new(name).dirname.realpath
|
27
|
+
|
28
|
+
if ( tool.explicit_output==nil )
|
29
|
+
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
30
|
+
cmd="cp #{name} #{new_basename}.dat"
|
31
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.current_database :fasta}"
|
32
|
+
|
33
|
+
else #Mascot2XML doesn't support explicitly named output files so we move the file to an appropriate output filename after finishing
|
34
|
+
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
35
|
+
cmd="cp #{name} #{new_basename}.dat"
|
36
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.current_database :fasta}"
|
37
|
+
cmd << "; mv #{new_basename}.pep.xml #{tool.explicit_output}; rm #{new_basename}.dat"
|
38
|
+
repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
|
39
|
+
cmd << "; #{repair_script} #{tool.explicit_output}"
|
40
|
+
end
|
41
|
+
|
42
|
+
code = tool.run(cmd,genv,nil,nil)
|
43
|
+
throw "Command #{cmd} failed with exit code #{code}" unless code==0
|
44
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 14/12/2010
|
5
|
+
#
|
6
|
+
# Runs an MS/MS search using the MSGFPlus search engine
|
7
|
+
#
|
8
|
+
$VERBOSE=nil
|
9
|
+
require 'protk/constants'
|
10
|
+
require 'protk/command_runner'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
|
13
|
+
|
14
|
+
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
15
|
+
#
|
16
|
+
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
17
|
+
search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
|
18
|
+
search_tool.options.output_suffix="_msgfplus"
|
19
|
+
|
20
|
+
search_tool.options.fragment_method=0
|
21
|
+
search_tool.option_parser.on( '--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor' ) do |method|
|
22
|
+
search_tool.options.fragment_method=method
|
23
|
+
end
|
24
|
+
|
25
|
+
search_tool.options.protocol=0
|
26
|
+
search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation' ) do |p|
|
27
|
+
search_tool.options.protocol=p
|
28
|
+
end
|
29
|
+
|
30
|
+
search_tool.options.min_pep_length=6
|
31
|
+
search_tool.option_parser.on( '--min-pep-length p', 'Minimum peptide length to consider, Default: 6' ) do |p|
|
32
|
+
search_tool.options.min_pep_length=p
|
33
|
+
end
|
34
|
+
|
35
|
+
search_tool.options.max_pep_length=40
|
36
|
+
search_tool.option_parser.on( '--max-pep-length p', 'Maximum peptide length to consider, Default: 40' ) do |p|
|
37
|
+
search_tool.options.max_pep_length=p
|
38
|
+
end
|
39
|
+
|
40
|
+
search_tool.options.min_pep_charge=2
|
41
|
+
search_tool.option_parser.on( '--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2' ) do |c|
|
42
|
+
search_tool.options.min_pep_charge=c
|
43
|
+
end
|
44
|
+
|
45
|
+
search_tool.options.max_pep_charge=3
|
46
|
+
search_tool.option_parser.on( '--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3' ) do |c|
|
47
|
+
search_tool.options.max_pep_charge=c
|
48
|
+
end
|
49
|
+
|
50
|
+
search_tool.options.num_reported_matches=1
|
51
|
+
search_tool.option_parser.on( '--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1' ) do |n|
|
52
|
+
search_tool.options.num_reported_matches=n
|
53
|
+
end
|
54
|
+
|
55
|
+
search_tool.options.add_features=false
|
56
|
+
search_tool.option_parser.on( '--add-features', 'output additional features' ) do
|
57
|
+
search_tool.options.add_features=true
|
58
|
+
end
|
59
|
+
|
60
|
+
search_tool.options.java_mem="3500M"
|
61
|
+
search_tool.option_parser.on('--java-mem mem','Java memory limit when running the search (Default 3.5Gb)') do |mem|
|
62
|
+
search_tool.options.java_mem=mem
|
63
|
+
end
|
64
|
+
|
65
|
+
search_tool.option_parser.parse!
|
66
|
+
|
67
|
+
# Environment with global constants
|
68
|
+
#
|
69
|
+
genv=Constants.new
|
70
|
+
|
71
|
+
# Set search engine specific parameters on the SearchTool object
|
72
|
+
#
|
73
|
+
msgf_bin="#{genv.msgfplusjar}"
|
74
|
+
|
75
|
+
case
|
76
|
+
when Pathname.new(search_tool.database).exist? # It's an explicitly named db
|
77
|
+
current_db=Pathname.new(search_tool.database).realpath.to_s
|
78
|
+
else
|
79
|
+
current_db=search_tool.current_database :fasta
|
80
|
+
end
|
81
|
+
|
82
|
+
fragment_tol = search_tool.fragment_tol
|
83
|
+
precursor_tol = search_tool.precursor_tol
|
84
|
+
|
85
|
+
|
86
|
+
throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
|
87
|
+
|
88
|
+
# Run the search engine on each input file
|
89
|
+
#
|
90
|
+
ARGV.each do |filename|
|
91
|
+
|
92
|
+
if ( search_tool.explicit_output!=nil)
|
93
|
+
output_path=search_tool.explicit_output
|
94
|
+
else
|
95
|
+
output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
|
96
|
+
end
|
97
|
+
|
98
|
+
# (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
|
99
|
+
# Get the input file extension
|
100
|
+
ext = Pathname.new(filename).extname
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
|
105
|
+
|
106
|
+
# Only proceed if the output file is not present or we have opted to over-write it
|
107
|
+
#
|
108
|
+
if ( search_tool.over_write || !Pathname.new(output_path).exist? )
|
109
|
+
|
110
|
+
# The basic command
|
111
|
+
#
|
112
|
+
cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
|
113
|
+
|
114
|
+
#Missed cleavages
|
115
|
+
#
|
116
|
+
throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
|
117
|
+
cmd << " -ntt #{search_tool.missed_cleavages}"
|
118
|
+
|
119
|
+
# Precursor tolerance
|
120
|
+
#
|
121
|
+
cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
|
122
|
+
|
123
|
+
# Instrument type
|
124
|
+
#
|
125
|
+
cmd << " -inst 2"
|
126
|
+
|
127
|
+
# cmd << " -m 4"
|
128
|
+
|
129
|
+
cmd << " -addFeatures 1"
|
130
|
+
|
131
|
+
# Enzyme
|
132
|
+
#
|
133
|
+
# if ( search_tool.enzyme!="Trypsin")
|
134
|
+
# cmd << " -e #{search_tool.enzyme}"
|
135
|
+
# end
|
136
|
+
|
137
|
+
mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
|
138
|
+
mods_file=File.open(mods_path,'w+')
|
139
|
+
|
140
|
+
# Variable Modifications
|
141
|
+
#
|
142
|
+
if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
|
143
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
144
|
+
if ( var_mods !="" )
|
145
|
+
cmd << " -mv #{var_mods}"
|
146
|
+
end
|
147
|
+
else
|
148
|
+
# Add options related to peptide modifications
|
149
|
+
#
|
150
|
+
if ( search_tool.glyco )
|
151
|
+
cmd << " -mv 119 "
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Fixed modifications
|
156
|
+
#
|
157
|
+
if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
|
158
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
159
|
+
if ( fix_mods !="")
|
160
|
+
cmd << " -mf #{fix_mods}"
|
161
|
+
end
|
162
|
+
else
|
163
|
+
if ( search_tool.has_modifications )
|
164
|
+
cmd << " -mf "
|
165
|
+
if ( search_tool.carbamidomethyl )
|
166
|
+
cmd<<"3 "
|
167
|
+
end
|
168
|
+
|
169
|
+
if ( search_tool.methionine_oxidation )
|
170
|
+
cmd<<"1 "
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Up to here we've formulated the omssa command. The rest is cleanup
|
177
|
+
p "Running:#{cmd}"
|
178
|
+
|
179
|
+
# Run the search
|
180
|
+
#
|
181
|
+
job_params= {:jobid => search_tool.jobid_from_filename(filename) }
|
182
|
+
job_params[:queue]="lowmem"
|
183
|
+
job_params[:vmem]="900mb"
|
184
|
+
search_tool.run(cmd,genv,job_params)
|
185
|
+
|
186
|
+
|
187
|
+
else
|
188
|
+
genv.log("Skipping search on existing file #{output_path}",:warn)
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|