protk 1.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
data/bin/manage_db.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 13/3/2012
|
5
|
+
#
|
6
|
+
# Manage named protein databases
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/manage_db_tool'
|
12
|
+
require 'yaml'
|
13
|
+
require 'pp'
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
command=ARGV[0]
|
19
|
+
ARGV[0] = "--help" if ( command==nil || command=="-h" || command=="help")
|
20
|
+
tool=ManageDBTool.new(command)
|
21
|
+
if ( tool.option_parser.banner=="")
|
22
|
+
tool.option_parser.banner = "Manage named protein databases.\nUsage: manage_db.rb <command> [options] dbname\nCommands are: add remove update list help\nType manage_db <command> -h to get help on a specific command"
|
23
|
+
end
|
24
|
+
|
25
|
+
tool.option_parser.parse!
|
26
|
+
if ( ARGV[0]=="--help")
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
|
30
|
+
command=ARGV.shift
|
31
|
+
dbname=ARGV.shift
|
32
|
+
|
33
|
+
if ( dbname!=nil)
|
34
|
+
dbname=dbname.downcase
|
35
|
+
throw "Database name should contain no spaces" if ( dbname=~/\s/)
|
36
|
+
end
|
37
|
+
|
38
|
+
genv=Constants.new()
|
39
|
+
|
40
|
+
|
41
|
+
case command
|
42
|
+
when "add"
|
43
|
+
throw "Must specify a database name" if dbname==nil
|
44
|
+
throw "all is a reserved word and cannot be used as a database name" if ( dbname=="all")
|
45
|
+
throw "Database #{dbname} exists. Use --update-spec to overwrite." if genv.dbexist?(dbname) && !tool.update_spec
|
46
|
+
throw "Database #{dbname} cannot be updated because it doesn't exist" if !genv.dbexist?(dbname) && tool.update_spec
|
47
|
+
|
48
|
+
dbspec = tool.get_predefined_definition dbname
|
49
|
+
throw "#{dbname} is not a predefined database" if tool.predefined && dbspec=={}
|
50
|
+
|
51
|
+
genv.log("Adding new database #{dbname}",:info) if !tool.update_spec
|
52
|
+
genv.log("Updating spec for #{dbname}",:info) if tool.update_spec
|
53
|
+
|
54
|
+
if dbspec=={}
|
55
|
+
# Create the database specifiation dictionary (or make ammendments if a predefinition was used)
|
56
|
+
dbspec[:is_annotation_db]=tool.is_annotation_db
|
57
|
+
dbspec[:sources]=tool.sources
|
58
|
+
dbspec[:make_blast_index]= tool.make_blast_index
|
59
|
+
dbspec[:make_msgf_index]= tool.make_msgf_index
|
60
|
+
dbspec[:include_filters]= tool.is_annotation_db ? [] : tool.include_filters
|
61
|
+
dbspec[:id_regexes]= tool.is_annotation_db ? [] : tool.id_regexes
|
62
|
+
dbspec[:decoys]= tool.is_annotation_db ? false : tool.decoys
|
63
|
+
dbspec[:archive_old]= tool.is_annotation_db ? false : tool.archive_old
|
64
|
+
dbspec[:decoy_prefix]= tool.decoy_prefix
|
65
|
+
dbspec[:format] = tool.db_format
|
66
|
+
end
|
67
|
+
tool.add dbspec, dbname
|
68
|
+
|
69
|
+
when "update"
|
70
|
+
throw "Must specify a database name" if dbname==nil
|
71
|
+
if ( dbname=="all" )
|
72
|
+
dbnames=tool.all_database_names(genv)
|
73
|
+
else
|
74
|
+
dbnames=[dbname]
|
75
|
+
end
|
76
|
+
p dbnames
|
77
|
+
dbnames.each { |db|
|
78
|
+
throw "Database #{db} does not exist" if !genv.dbexist?(db)
|
79
|
+
dbdir="#{genv.protein_database_root}/#{db}"
|
80
|
+
throw "Could not find required spec file #{dbdir}/.protkdb.yaml" unless Pathname.new("#{dbdir}/.protkdb.yaml").exist?
|
81
|
+
runner=CommandRunner.new(genv)
|
82
|
+
runner.run_local("rake -f #{tool.rakefile_path} #{db} ")
|
83
|
+
}
|
84
|
+
when "list"
|
85
|
+
|
86
|
+
gw_file_handle=nil
|
87
|
+
if tool.galaxy_write
|
88
|
+
pepxml_loc = "#{genv.galaxy_root}/tool-data/pepxml_databases.loc"
|
89
|
+
if ( Pathname.new(pepxml_loc).exist? )
|
90
|
+
gw_file_handle=File.open(pepxml_loc,'w')
|
91
|
+
end
|
92
|
+
p "Warning: Could not find database loc file #{pepxml_loc}" unless Pathname.new(pepxml_loc).exist?
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
Dir.foreach(genv.protein_database_root) do |db_subdir|
|
97
|
+
db_specfile="#{genv.protein_database_root}/#{db_subdir}/.protkdb.yaml"
|
98
|
+
if ( Pathname.new(db_specfile).exist?)
|
99
|
+
spec=YAML.load_file(db_specfile)
|
100
|
+
case
|
101
|
+
when tool.galaxy || tool.galaxy_write
|
102
|
+
unless ( spec[:is_annotation_db] )
|
103
|
+
db_prettyname=db_subdir.gsub(/_/,' ').capitalize
|
104
|
+
loc_line= "#{db_prettyname}\t#{db_subdir}_\t#{db_subdir}\t#{db_subdir}_\n"
|
105
|
+
puts loc_line
|
106
|
+
if ( gw_file_handle )
|
107
|
+
gw_file_handle.write loc_line
|
108
|
+
end
|
109
|
+
end
|
110
|
+
when tool.verbose
|
111
|
+
puts "-- #{db_subdir} --\n"
|
112
|
+
PP.pp(spec)
|
113
|
+
puts "\n"
|
114
|
+
else
|
115
|
+
puts "#{db_subdir}\n"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
gw_file_handle.close if ( gw_file_handle)
|
121
|
+
|
122
|
+
end
|
123
|
+
|
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 14/12/2010
|
5
|
+
#
|
6
|
+
# Runs an MS/MS search using the Mascot search engine
|
7
|
+
#
|
8
|
+
|
9
|
+
$VERBOSE=nil
|
10
|
+
|
11
|
+
require 'protk/constants'
|
12
|
+
require 'protk/command_runner'
|
13
|
+
require 'protk/search_tool'
|
14
|
+
require 'rest_client'
|
15
|
+
|
16
|
+
# Environment with global constants
|
17
|
+
#
|
18
|
+
genv=Constants.new
|
19
|
+
|
20
|
+
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
21
|
+
#
|
22
|
+
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
23
|
+
search_tool.jobid_prefix="o"
|
24
|
+
|
25
|
+
search_tool.option_parser.banner = "Run a Mascot msms search on a set of mgf input files.\n\nUsage: mascot_search.rb [options] file1.mgf file2.mgf ..."
|
26
|
+
search_tool.options.output_suffix="_mascot"
|
27
|
+
|
28
|
+
search_tool.options.mascot_server="#{genv.default_mascot_server}/mascot/cgi/"
|
29
|
+
#search_tool.option_parser.on( '-P', '--http-proxy url', 'The url to a proxy server' ) do |url|
|
30
|
+
# search_tool.options.mascot_server=url
|
31
|
+
#end
|
32
|
+
|
33
|
+
#search_tool.options.http_proxy="http://squid.latrobe.edu.au:8080"
|
34
|
+
#search_tool.option_parser.on( '-P', '--http-proxy url', 'The url to a proxy server' ) do |url|
|
35
|
+
# search_tool.options.http_proxy=url
|
36
|
+
#end
|
37
|
+
|
38
|
+
search_tool.option_parser.parse!
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
# Set search engine specific parameters on the SearchTool object
|
45
|
+
#
|
46
|
+
fragment_tol = search_tool.fragment_tol
|
47
|
+
precursor_tol = search_tool.precursor_tol
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
mascot_cgi=search_tool.mascot_server
|
52
|
+
|
53
|
+
unless ( mascot_cgi =~ /^http:\/\//)
|
54
|
+
mascot_cgi = "http://#{mascot_cgi}"
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
RestClient.proxy=search_tool.http_proxy
|
59
|
+
|
60
|
+
genv.log("Var mods #{search_tool.var_mods} and fixed #{search_tool.fix_mods}",:info)
|
61
|
+
|
62
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
63
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
64
|
+
|
65
|
+
# None is given by a an empty galaxy multi-select list and we need to turn it into an empty string
|
66
|
+
#
|
67
|
+
var_mods="" if var_mods=="None"
|
68
|
+
fix_mods="" if fix_mods=="None"
|
69
|
+
|
70
|
+
postdict={}
|
71
|
+
|
72
|
+
# CHARGE
|
73
|
+
#
|
74
|
+
postdict[:CHARGE]=search_tool.allowed_charges
|
75
|
+
|
76
|
+
# CLE
|
77
|
+
#
|
78
|
+
postdict[:CLE]=search_tool.enzyme
|
79
|
+
|
80
|
+
# PFA
|
81
|
+
#
|
82
|
+
postdict[:PFA]=search_tool.missed_cleavages
|
83
|
+
|
84
|
+
# COM (Search title)
|
85
|
+
#
|
86
|
+
postdict[:COM]="Protk"
|
87
|
+
|
88
|
+
# DB (Database)
|
89
|
+
#
|
90
|
+
postdict[:DB]=search_tool.database
|
91
|
+
|
92
|
+
# INSTRUMENT
|
93
|
+
#
|
94
|
+
postdict[:INSTRUMENT]=search_tool.instrument
|
95
|
+
|
96
|
+
# IT_MODS (Variable Modifications)
|
97
|
+
#
|
98
|
+
postdict[:IT_MODS]=var_mods
|
99
|
+
|
100
|
+
# ITOL (Fragment ion tolerance)
|
101
|
+
#
|
102
|
+
postdict[:ITOL]=search_tool.fragment_tol
|
103
|
+
|
104
|
+
# ITOLU (Fragment ion tolerance units)
|
105
|
+
#
|
106
|
+
postdict[:ITOLU]=search_tool.fragment_tolu
|
107
|
+
|
108
|
+
# MASS (Monoisotopic and Average)
|
109
|
+
#
|
110
|
+
postdict[:MASS]=search_tool.precursor_search_type
|
111
|
+
|
112
|
+
# MODS (Fixed modifications)
|
113
|
+
#
|
114
|
+
postdict[:MODS]=fix_mods
|
115
|
+
|
116
|
+
# REPORT (What to include in the search report. For command-line searches this is pretty much irrelevant because we retrieve the entire results file anyway)
|
117
|
+
#
|
118
|
+
postdict[:REPORT]="AUTO"
|
119
|
+
|
120
|
+
# TAXONOMY (Blank because we don't allow taxonomy)
|
121
|
+
#
|
122
|
+
postdict[:TAXONOMY]="All entries"
|
123
|
+
|
124
|
+
# TOL (Precursor ion tolerance (Unit dependent))
|
125
|
+
#
|
126
|
+
postdict[:TOL]=search_tool.precursor_tol
|
127
|
+
|
128
|
+
# TOLU (Tolerance Units)
|
129
|
+
#
|
130
|
+
postdict[:TOLU]=search_tool.precursor_tolu
|
131
|
+
|
132
|
+
# Email
|
133
|
+
#
|
134
|
+
postdict[:USEREMAIL]=search_tool.email
|
135
|
+
|
136
|
+
# Username
|
137
|
+
#
|
138
|
+
postdict[:USERNAME]=search_tool.username
|
139
|
+
|
140
|
+
|
141
|
+
# FILE
|
142
|
+
#
|
143
|
+
postdict[:FILE]=File.new(ARGV[0])
|
144
|
+
|
145
|
+
postdict[:FORMVER]='1.01'
|
146
|
+
postdict[:INTERMEDIATE]=''
|
147
|
+
|
148
|
+
genv.log("Sending #{postdict}",:info)
|
149
|
+
|
150
|
+
postdict.each do |kv|
|
151
|
+
p "#{kv}|\n"
|
152
|
+
end
|
153
|
+
|
154
|
+
search_response=RestClient.post "#{mascot_cgi}/nph-mascot.exe?1", postdict
|
155
|
+
|
156
|
+
genv.log("Mascot search response was #{search_response}",:info)
|
157
|
+
|
158
|
+
# Look for an error if there is one
|
159
|
+
error_result= /Sorry, your search could not be performed(.*)/.match(search_response)
|
160
|
+
if ( error_result != nil )
|
161
|
+
p error_result[0]
|
162
|
+
genv.log("Mascot search failed with response #{search_response}",:warn)
|
163
|
+
throw "Mascot search failed with response #{search_response}"
|
164
|
+
else
|
165
|
+
|
166
|
+
# Search for the location of the mascot data file in the response
|
167
|
+
results=/master_results_?2?\.pl\?file=\.*\/data\/(.*)\/(.+\.dat)/.match(search_response)
|
168
|
+
results_date=results[1]
|
169
|
+
results_file=results[2]
|
170
|
+
|
171
|
+
|
172
|
+
get_url= "#{mascot_cgi}/../x-cgi/ms-status.exe?Autorefresh=false&Show=RESULTFILE&DateDir=#{results_date}&ResJob=#{results_file}"
|
173
|
+
|
174
|
+
if ( search_tool.explicit_output!=nil)
|
175
|
+
output_path=search_tool.explicit_output
|
176
|
+
else
|
177
|
+
output_path="#{results_file}"
|
178
|
+
end
|
179
|
+
|
180
|
+
# Download the results
|
181
|
+
#
|
182
|
+
require 'open-uri'
|
183
|
+
open("#{output_path}", 'wb') do |file|
|
184
|
+
file << open("#{get_url}").read
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of MSLIMS
|
4
|
+
# Created by Ira Cooke 12/4/2010
|
5
|
+
#
|
6
|
+
# Convert mascot dat files to pepxml. A wrapper for Mascot2XML
|
7
|
+
#
|
8
|
+
|
9
|
+
|
10
|
+
require 'protk/constants'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
require 'protk/mascot_util'
|
13
|
+
|
14
|
+
# Environment with global constants
|
15
|
+
#
|
16
|
+
genv=Constants.new
|
17
|
+
|
18
|
+
tool=SearchTool.new({:database=>true,:explicit_output=>true,:over_write=>true})
|
19
|
+
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot_to_pepxml.rb [options] file1.dat file2.dat ... "
|
20
|
+
tool.option_parser.parse!
|
21
|
+
|
22
|
+
|
23
|
+
ARGV.each do |file_name|
|
24
|
+
name=file_name.chomp
|
25
|
+
|
26
|
+
this_dir=Pathname.new(name).dirname.realpath
|
27
|
+
|
28
|
+
if ( tool.explicit_output==nil )
|
29
|
+
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
30
|
+
cmd="cp #{name} #{new_basename}.dat"
|
31
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.current_database :fasta}"
|
32
|
+
|
33
|
+
else #Mascot2XML doesn't support explicitly named output files so we move the file to an appropriate output filename after finishing
|
34
|
+
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
35
|
+
cmd="cp #{name} #{new_basename}.dat"
|
36
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.current_database :fasta}"
|
37
|
+
cmd << "; mv #{new_basename}.pep.xml #{tool.explicit_output}; rm #{new_basename}.dat"
|
38
|
+
repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
|
39
|
+
cmd << "; #{repair_script} #{tool.explicit_output}"
|
40
|
+
end
|
41
|
+
|
42
|
+
code = tool.run(cmd,genv,nil,nil)
|
43
|
+
throw "Command #{cmd} failed with exit code #{code}" unless code==0
|
44
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 14/12/2010
|
5
|
+
#
|
6
|
+
# Runs an MS/MS search using the MSGFPlus search engine
|
7
|
+
#
|
8
|
+
$VERBOSE=nil
|
9
|
+
require 'protk/constants'
|
10
|
+
require 'protk/command_runner'
|
11
|
+
require 'protk/search_tool'
|
12
|
+
|
13
|
+
|
14
|
+
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
15
|
+
#
|
16
|
+
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
17
|
+
search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
|
18
|
+
search_tool.options.output_suffix="_msgfplus"
|
19
|
+
|
20
|
+
search_tool.options.fragment_method=0
|
21
|
+
search_tool.option_parser.on( '--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor' ) do |method|
|
22
|
+
search_tool.options.fragment_method=method
|
23
|
+
end
|
24
|
+
|
25
|
+
search_tool.options.protocol=0
|
26
|
+
search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation' ) do |p|
|
27
|
+
search_tool.options.protocol=p
|
28
|
+
end
|
29
|
+
|
30
|
+
search_tool.options.min_pep_length=6
|
31
|
+
search_tool.option_parser.on( '--min-pep-length p', 'Minimum peptide length to consider, Default: 6' ) do |p|
|
32
|
+
search_tool.options.min_pep_length=p
|
33
|
+
end
|
34
|
+
|
35
|
+
search_tool.options.max_pep_length=40
|
36
|
+
search_tool.option_parser.on( '--max-pep-length p', 'Maximum peptide length to consider, Default: 40' ) do |p|
|
37
|
+
search_tool.options.max_pep_length=p
|
38
|
+
end
|
39
|
+
|
40
|
+
search_tool.options.min_pep_charge=2
|
41
|
+
search_tool.option_parser.on( '--min-pep-charge c', 'Minimum precursor charge to consider if charges are not specified in the spectrum file, Default: 2' ) do |c|
|
42
|
+
search_tool.options.min_pep_charge=c
|
43
|
+
end
|
44
|
+
|
45
|
+
search_tool.options.max_pep_charge=3
|
46
|
+
search_tool.option_parser.on( '--max-pep-charge c', 'Maximum precursor charge to consider if charges are not specified in the spectrum file, Default: 3' ) do |c|
|
47
|
+
search_tool.options.max_pep_charge=c
|
48
|
+
end
|
49
|
+
|
50
|
+
search_tool.options.num_reported_matches=1
|
51
|
+
search_tool.option_parser.on( '--num-reported-matches n', 'Number of matches per spectrum to be reported, Default: 1' ) do |n|
|
52
|
+
search_tool.options.num_reported_matches=n
|
53
|
+
end
|
54
|
+
|
55
|
+
search_tool.options.add_features=false
|
56
|
+
search_tool.option_parser.on( '--add-features', 'output additional features' ) do
|
57
|
+
search_tool.options.add_features=true
|
58
|
+
end
|
59
|
+
|
60
|
+
search_tool.options.java_mem="3500M"
|
61
|
+
search_tool.option_parser.on('--java-mem mem','Java memory limit when running the search (Default 3.5Gb)') do |mem|
|
62
|
+
search_tool.options.java_mem=mem
|
63
|
+
end
|
64
|
+
|
65
|
+
search_tool.option_parser.parse!
|
66
|
+
|
67
|
+
# Environment with global constants
|
68
|
+
#
|
69
|
+
genv=Constants.new
|
70
|
+
|
71
|
+
# Set search engine specific parameters on the SearchTool object
|
72
|
+
#
|
73
|
+
msgf_bin="#{genv.msgfplusjar}"
|
74
|
+
|
75
|
+
case
|
76
|
+
when Pathname.new(search_tool.database).exist? # It's an explicitly named db
|
77
|
+
current_db=Pathname.new(search_tool.database).realpath.to_s
|
78
|
+
else
|
79
|
+
current_db=search_tool.current_database :fasta
|
80
|
+
end
|
81
|
+
|
82
|
+
fragment_tol = search_tool.fragment_tol
|
83
|
+
precursor_tol = search_tool.precursor_tol
|
84
|
+
|
85
|
+
|
86
|
+
throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
|
87
|
+
|
88
|
+
# Run the search engine on each input file
|
89
|
+
#
|
90
|
+
ARGV.each do |filename|
|
91
|
+
|
92
|
+
if ( search_tool.explicit_output!=nil)
|
93
|
+
output_path=search_tool.explicit_output
|
94
|
+
else
|
95
|
+
output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
|
96
|
+
end
|
97
|
+
|
98
|
+
# (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
|
99
|
+
# Get the input file extension
|
100
|
+
ext = Pathname.new(filename).extname
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
|
105
|
+
|
106
|
+
# Only proceed if the output file is not present or we have opted to over-write it
|
107
|
+
#
|
108
|
+
if ( search_tool.over_write || !Pathname.new(output_path).exist? )
|
109
|
+
|
110
|
+
# The basic command
|
111
|
+
#
|
112
|
+
cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
|
113
|
+
|
114
|
+
#Missed cleavages
|
115
|
+
#
|
116
|
+
throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
|
117
|
+
cmd << " -ntt #{search_tool.missed_cleavages}"
|
118
|
+
|
119
|
+
# Precursor tolerance
|
120
|
+
#
|
121
|
+
cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
|
122
|
+
|
123
|
+
# Instrument type
|
124
|
+
#
|
125
|
+
cmd << " -inst 2"
|
126
|
+
|
127
|
+
# cmd << " -m 4"
|
128
|
+
|
129
|
+
cmd << " -addFeatures 1"
|
130
|
+
|
131
|
+
# Enzyme
|
132
|
+
#
|
133
|
+
# if ( search_tool.enzyme!="Trypsin")
|
134
|
+
# cmd << " -e #{search_tool.enzyme}"
|
135
|
+
# end
|
136
|
+
|
137
|
+
mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
|
138
|
+
mods_file=File.open(mods_path,'w+')
|
139
|
+
|
140
|
+
# Variable Modifications
|
141
|
+
#
|
142
|
+
if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
|
143
|
+
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
144
|
+
if ( var_mods !="" )
|
145
|
+
cmd << " -mv #{var_mods}"
|
146
|
+
end
|
147
|
+
else
|
148
|
+
# Add options related to peptide modifications
|
149
|
+
#
|
150
|
+
if ( search_tool.glyco )
|
151
|
+
cmd << " -mv 119 "
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Fixed modifications
|
156
|
+
#
|
157
|
+
if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
|
158
|
+
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
159
|
+
if ( fix_mods !="")
|
160
|
+
cmd << " -mf #{fix_mods}"
|
161
|
+
end
|
162
|
+
else
|
163
|
+
if ( search_tool.has_modifications )
|
164
|
+
cmd << " -mf "
|
165
|
+
if ( search_tool.carbamidomethyl )
|
166
|
+
cmd<<"3 "
|
167
|
+
end
|
168
|
+
|
169
|
+
if ( search_tool.methionine_oxidation )
|
170
|
+
cmd<<"1 "
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Up to here we've formulated the omssa command. The rest is cleanup
|
177
|
+
p "Running:#{cmd}"
|
178
|
+
|
179
|
+
# Run the search
|
180
|
+
#
|
181
|
+
job_params= {:jobid => search_tool.jobid_from_filename(filename) }
|
182
|
+
job_params[:queue]="lowmem"
|
183
|
+
job_params[:vmem]="900mb"
|
184
|
+
search_tool.run(cmd,genv,job_params)
|
185
|
+
|
186
|
+
|
187
|
+
else
|
188
|
+
genv.log("Skipping search on existing file #{output_path}",:warn)
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|