protk 1.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +85 -0
- data/bin/annotate_ids.rb +59 -0
- data/bin/big_search.rb +41 -0
- data/bin/correct_omssa_retention_times.rb +27 -0
- data/bin/feature_finder.rb +76 -0
- data/bin/file_convert.rb +157 -0
- data/bin/generate_omssa_loc.rb +42 -0
- data/bin/interprophet.rb +91 -0
- data/bin/make_decoy.rb +64 -0
- data/bin/manage_db.rb +123 -0
- data/bin/mascot_search.rb +187 -0
- data/bin/mascot_to_pepxml.rb +44 -0
- data/bin/msgfplus_search.rb +191 -0
- data/bin/omssa_search.rb +205 -0
- data/bin/peptide_prophet.rb +245 -0
- data/bin/pepxml_to_table.rb +78 -0
- data/bin/protein_prophet.rb +140 -0
- data/bin/protk_setup.rb +31 -0
- data/bin/repair_run_summary.rb +113 -0
- data/bin/tandem_search.rb +292 -0
- data/bin/template_search.rb +144 -0
- data/bin/unimod_to_loc.rb +118 -0
- data/bin/xls_to_table.rb +46 -0
- data/ext/protk/extconf.rb +3 -0
- data/ext/protk/protk.c +235 -0
- data/lib/protk/big_search_rakefile.rake +16 -0
- data/lib/protk/big_search_tool.rb +23 -0
- data/lib/protk/bio_sptr_extensions.rb +210 -0
- data/lib/protk/biotools_excel_converter.rb +60 -0
- data/lib/protk/command_runner.rb +84 -0
- data/lib/protk/constants.rb +296 -0
- data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
- data/lib/protk/data/apt-get_packages.yaml +47 -0
- data/lib/protk/data/brew_packages.yaml +10 -0
- data/lib/protk/data/default_config.yml +20 -0
- data/lib/protk/data/predefined_db.crap.yaml +19 -0
- data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
- data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
- data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
- data/lib/protk/data/tandem_params.xml +56 -0
- data/lib/protk/data/taxonomy_template.xml +9 -0
- data/lib/protk/data/unimod.xml +16780 -0
- data/lib/protk/eupathdb_gene_information_table.rb +158 -0
- data/lib/protk/galaxy_stager.rb +24 -0
- data/lib/protk/galaxy_util.rb +9 -0
- data/lib/protk/manage_db_rakefile.rake +484 -0
- data/lib/protk/manage_db_tool.rb +181 -0
- data/lib/protk/mascot_util.rb +63 -0
- data/lib/protk/omssa_util.rb +57 -0
- data/lib/protk/plasmodb.rb +50 -0
- data/lib/protk/prophet_tool.rb +85 -0
- data/lib/protk/protein_annotator.rb +646 -0
- data/lib/protk/protxml.rb +137 -0
- data/lib/protk/randomize.rb +7 -0
- data/lib/protk/search_tool.rb +182 -0
- data/lib/protk/setup_rakefile.rake +245 -0
- data/lib/protk/setup_tool.rb +19 -0
- data/lib/protk/spreadsheet_extensions.rb +78 -0
- data/lib/protk/swissprot_database.rb +38 -0
- data/lib/protk/tool.rb +182 -0
- data/lib/protk/xtandem_defaults.rb +11 -0
- data/lib/protk.rb +18 -0
- metadata +256 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
# This file is part of protk
|
2
|
+
# Created by Ira Cooke 14/12/2010
|
3
|
+
#
|
4
|
+
# Initialises global constants.
|
5
|
+
# All tools should source this file.
|
6
|
+
#
|
7
|
+
require 'yaml'
|
8
|
+
require 'logger'
|
9
|
+
require 'pathname'
|
10
|
+
require 'ftools'
|
11
|
+
|
12
|
+
class Constants
|
13
|
+
|
14
|
+
# A Hash holding all the constants
|
15
|
+
#
|
16
|
+
@env
|
17
|
+
|
18
|
+
# These are logger attributes with thresholds as indicated
|
19
|
+
# DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN
|
20
|
+
#Debug (development mode) or Info (production)
|
21
|
+
#
|
22
|
+
@stdout_logger
|
23
|
+
|
24
|
+
#Warn
|
25
|
+
#
|
26
|
+
@file_logger
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
attr :info_level
|
31
|
+
attr :protk_dir
|
32
|
+
|
33
|
+
# Provides direct access to constants through methods of the same name
|
34
|
+
# This will be used for all constants other than paths
|
35
|
+
#
|
36
|
+
def method_missing(method)
|
37
|
+
@env[method.to_s]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Some constants are paths. They need to be translated into real paths before being returned
|
41
|
+
#
|
42
|
+
|
43
|
+
def bin
|
44
|
+
return "#{@protk_dir}/bin"
|
45
|
+
end
|
46
|
+
|
47
|
+
def tpp_root
|
48
|
+
path=@env['tpp_root']
|
49
|
+
if ( path =~ /^\// )
|
50
|
+
return path
|
51
|
+
else
|
52
|
+
return "#{@protk_dir}/#{@env['tpp_root']}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def xinteract
|
57
|
+
return "#{self.tpp_root}/bin/xinteract"
|
58
|
+
end
|
59
|
+
|
60
|
+
def xtandem
|
61
|
+
return "#{self.tpp_root}/bin/tandem"
|
62
|
+
end
|
63
|
+
|
64
|
+
def tandem2xml
|
65
|
+
return "#{self.tpp_root}/bin/Tandem2XML"
|
66
|
+
end
|
67
|
+
|
68
|
+
def interprophetparser
|
69
|
+
return "#{self.tpp_root}/bin/InterProphetParser"
|
70
|
+
end
|
71
|
+
|
72
|
+
def proteinprophet
|
73
|
+
return "#{self.tpp_root}/bin/ProteinProphet"
|
74
|
+
end
|
75
|
+
|
76
|
+
def mascot2xml
|
77
|
+
return "#{self.tpp_root}/bin/Mascot2XML"
|
78
|
+
end
|
79
|
+
|
80
|
+
def omssa_root
|
81
|
+
path=@env['omssa_root']
|
82
|
+
if ( path =~ /^\// )
|
83
|
+
return path
|
84
|
+
else
|
85
|
+
return "#{@protk_dir}/#{@env['omssa_root']}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def omssacl
|
90
|
+
return "#{self.omssa_root}/omssacl"
|
91
|
+
end
|
92
|
+
|
93
|
+
def omssa2pepxml
|
94
|
+
return "#{self.omssa_root}/omssa2pepXML"
|
95
|
+
end
|
96
|
+
|
97
|
+
def openms_root
|
98
|
+
path=@env['openms_root']
|
99
|
+
if ( path =~ /^\// )
|
100
|
+
return path
|
101
|
+
else
|
102
|
+
return "#{@protk_dir}/#{@env['openms_root']}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def msgfplus_root
|
107
|
+
path=@env['msgfplus_root']
|
108
|
+
if ( path =~ /^\// )
|
109
|
+
return path
|
110
|
+
else
|
111
|
+
return "#{@protk_dir}/#{@env['msgfplus_root']}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def msgfplusjar
|
116
|
+
return "#{self.msgfplus_root}/MSGFPlus.jar"
|
117
|
+
end
|
118
|
+
|
119
|
+
def protein_database_root
|
120
|
+
path=@env['protein_database_root']
|
121
|
+
if ( path =~ /^\// )
|
122
|
+
return path
|
123
|
+
else
|
124
|
+
return "#{@protk_dir}/#{@env['protein_database_root']}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def database_downloads
|
129
|
+
return "#{self.protein_database_root}/downloads"
|
130
|
+
end
|
131
|
+
|
132
|
+
def blast_root
|
133
|
+
path=@env['blast_root']
|
134
|
+
if ( path =~ /^\// )
|
135
|
+
return path
|
136
|
+
else
|
137
|
+
return "#{@protk_dir}/#{@env['blast_root']}"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def makeblastdb
|
142
|
+
return "#{self.blast_root}/bin/makeblastdb"
|
143
|
+
end
|
144
|
+
|
145
|
+
def log_file
|
146
|
+
path=@env['log_file']
|
147
|
+
if ( path =~ /^\// )
|
148
|
+
return path
|
149
|
+
else
|
150
|
+
return "#{@protk_dir}/#{@env['log_file']}"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
# Read the global constants file and initialize our class @env variable
|
156
|
+
# Initialize loggers
|
157
|
+
#
|
158
|
+
def initialize
|
159
|
+
|
160
|
+
@protk_dir="#{Dir.home}/.protk"
|
161
|
+
|
162
|
+
|
163
|
+
default_config_yml = YAML.load_file "#{File.dirname(__FILE__)}/data/default_config.yml"
|
164
|
+
throw "Unable to read the config file at #{File.dirname(__FILE__)}/data/default_config.yml" unless default_config_yml!=nil
|
165
|
+
|
166
|
+
@env=default_config_yml
|
167
|
+
throw "No data found in config file" unless @env!=nil
|
168
|
+
@info_level=default_config_yml['message_level']
|
169
|
+
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
|
174
|
+
def initialize_loggers
|
175
|
+
log_dir = Pathname.new(self.log_file).dirname
|
176
|
+
log_dir.mkpath unless log_dir.exist?
|
177
|
+
|
178
|
+
@stdout_logger=Logger.new(STDOUT)
|
179
|
+
@file_logger=Logger.new(self.log_file,'daily')
|
180
|
+
|
181
|
+
throw "Unable to create file logger at path #{self.log_file}" unless @file_logger!=nil
|
182
|
+
throw "Unable to create stdout logger " unless @stdout_logger!=nil
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
case @info_level
|
187
|
+
when "info"
|
188
|
+
@stdout_logger.level=Logger::INFO
|
189
|
+
when "debug"
|
190
|
+
@stdout_logger.level=Logger::DEBUG
|
191
|
+
when "warn"
|
192
|
+
@stdout_logger.level=Logger::WARN
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
# Write a message to all logger objects
|
198
|
+
#
|
199
|
+
def log(message,level)
|
200
|
+
if ( @stdout_logger == nil || @file_logger == nil)
|
201
|
+
initialize_loggers
|
202
|
+
end
|
203
|
+
@stdout_logger.send(level,message)
|
204
|
+
@file_logger.send(level,message)
|
205
|
+
end
|
206
|
+
|
207
|
+
def path_for_builtin_database(dbname)
|
208
|
+
"#{self.protein_database_root}/#{dbname}/current.fasta"
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
def dbexist?(dbname)
|
213
|
+
Pathname.new("#{self.protein_database_root}/#{dbname}").exist?
|
214
|
+
end
|
215
|
+
|
216
|
+
# Based on the database shortname and global database path, find the most current version of the required database
|
217
|
+
# If dbname corresponds to a folder in the dbroot this function returns the path of the database with an extension
|
218
|
+
# appropriate to the database type
|
219
|
+
#
|
220
|
+
# If dbname is a full path to a file this tool will first import the file as a temporary database
|
221
|
+
# and will then return its full path
|
222
|
+
#
|
223
|
+
def current_database_for_name(dbname)
|
224
|
+
dbroot=self.protein_database_root
|
225
|
+
|
226
|
+
throw "Protein database directory not specified" unless dbroot!=nil
|
227
|
+
throw "Protein database directory #{dbroot} does not exist" unless Pathname(dbroot).exist?
|
228
|
+
|
229
|
+
# Remove any trailing slashes or spaces from the end of dbroot if present
|
230
|
+
#
|
231
|
+
dbroot.sub!(/(\/*\s*)$/,"")
|
232
|
+
|
233
|
+
return path_for_builtin_database(dbname)
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
# Runs the given command in a local shell
|
239
|
+
#
|
240
|
+
def run_local(command_string)
|
241
|
+
self.log("Command: #{command_string} started",:info)
|
242
|
+
status = Open4::popen4("#{command_string} ") do |pid, stdin, stdout, stderr|
|
243
|
+
puts "PID #{pid}"
|
244
|
+
|
245
|
+
stdout.each { |line| self.log(line.chomp,:info) }
|
246
|
+
|
247
|
+
stderr.each { |line| self.log(line.chomp,:warn) }
|
248
|
+
|
249
|
+
end
|
250
|
+
if ( status!=0 )
|
251
|
+
# We terminated with some error code so log as an error
|
252
|
+
self.log( "Command: #{command_string} exited with status #{status.to_s}",:error)
|
253
|
+
else
|
254
|
+
self.log( "Command: #{command_string} exited with status #{status.to_s}",:info)
|
255
|
+
end
|
256
|
+
status
|
257
|
+
end
|
258
|
+
|
259
|
+
def import_fasta_database(dbroot,path_to_fasta_file)
|
260
|
+
|
261
|
+
tmp_dbroot=Pathname.new("#{dbroot}/tmp/")
|
262
|
+
|
263
|
+
dest_fasta_file_name=Pathname.new(path_to_fasta_file).basename
|
264
|
+
dest_fasta_file_path=Pathname.new("#{tmp_dbroot}#{dest_fasta_file_name}")
|
265
|
+
|
266
|
+
if ( !dest_fasta_file_path.exist? )
|
267
|
+
|
268
|
+
Dir.mkdir(tmp_dbroot) unless tmp_dbroot.exist? && tmp_dbroot.directory?
|
269
|
+
|
270
|
+
throw "Unable to make temporary database directory #{tmp_dbroot}" unless tmp_dbroot.exist?
|
271
|
+
|
272
|
+
link_cmd = "ln -s #{path_to_fasta_file} #{dest_fasta_file_path}"
|
273
|
+
|
274
|
+
result= %x[#{link_cmd}]
|
275
|
+
p result
|
276
|
+
end
|
277
|
+
|
278
|
+
check_cmd="#{self.ncbi_tools_bin}/blastdbcmd -info -db #{dest_fasta_file_path}"
|
279
|
+
result = %x[#{check_cmd}]
|
280
|
+
|
281
|
+
if ( result=="")
|
282
|
+
|
283
|
+
throw "Unable to create temporary database #{dest_fasta_file_path}" unless dest_fasta_file_path.exist?
|
284
|
+
cmd="#{self.makeblastdb} -in #{dest_fasta_file_path} -parse_seqids"
|
285
|
+
p cmd
|
286
|
+
self.run_local(cmd)
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
return dest_fasta_file_path.to_s
|
291
|
+
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
<?xml version="1.0" encoding="ISO-8859-1"?>
|
2
|
+
<PARAMETERS version="1.3" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
3
|
+
<NODE name="FeatureFinderCentroided" description="Detects two-dimensional features in LC-MS data.">
|
4
|
+
<ITEM name="version" value="1.9.0" type="string" description="Version of the tool that generated this parameters file." tags="advanced" />
|
5
|
+
<NODE name="1" description="Instance '1' section for 'FeatureFinderCentroided'">
|
6
|
+
<ITEM name="in" value="" type="string" description="input file" tags="input file,required" restrictions="*.mzML" />
|
7
|
+
<ITEM name="out" value="" type="string" description="output file" tags="output file,required" restrictions="*.featureXML" />
|
8
|
+
<ITEM name="seeds" value="" type="string" description="User specified seed list" tags="input file" restrictions="*.featureXML" />
|
9
|
+
<ITEM name="log" value="" type="string" description="Name of log file (created only when specified)" tags="advanced" />
|
10
|
+
<ITEM name="debug" value="0" type="int" description="Sets the debug level" tags="advanced" />
|
11
|
+
<ITEM name="threads" value="1" type="int" description="Sets the number of threads allowed to be used by the TOPP tool" />
|
12
|
+
<ITEM name="no_progress" value="false" type="string" description="Disables progress logging to command line" tags="advanced" restrictions="true,false" />
|
13
|
+
<ITEM name="test" value="false" type="string" description="Enables the test mode (needed for internal use only)" tags="advanced" restrictions="true,false" />
|
14
|
+
<NODE name="algorithm" description="Algorithm section">
|
15
|
+
<ITEM name="debug" value="false" type="string" description="When debug mode is activated, several files with intermediate results are written to the folder 'debug' (do not use in parallel mode)." restrictions="true,false" />
|
16
|
+
<NODE name="intensity" description="Settings for the calculation of a score indicating if a peak's intensity is significant in the local environment (between 0 and 1)">
|
17
|
+
<ITEM name="bins" value="10" type="int" description="Number of bins per dimension (RT and m/z). The higher this value, the more local the intensity significance score is.#br#This parameter should be decreased, if the algorithm is used on small regions of a map." restrictions="1:" />
|
18
|
+
</NODE>
|
19
|
+
<NODE name="mass_trace" description="Settings for the calculation of a score indicating if a peak is part of a mass trace (between 0 and 1).">
|
20
|
+
<ITEM name="mz_tolerance" value="0.02" type="float" description="Tolerated m/z deviation of peaks belonging to the same mass trace.#br#It should be larger than the m/z resolution of the instument.#br#This value must be smaller than that 1/charge_high!" restrictions="0:" />
|
21
|
+
<ITEM name="min_spectra" value="10" type="int" description="Number of spectra that have to show a similar peak mass in a mass trace." restrictions="1:" />
|
22
|
+
<ITEM name="max_missing" value="1" type="int" description="Number of consecutive spectra where a high mass deviation or missing peak is acceptable.#br#This parameter should be well below 'min_spectra'!" restrictions="0:" />
|
23
|
+
<ITEM name="slope_bound" value="0.1" type="float" description="The maximum slope of mass trace intensities when extending from the highest peak.#br#This parameter is important to seperate overlapping elution peaks.#br#It should be increased if feature elution profiles fluctuate a lot." restrictions="0:" />
|
24
|
+
</NODE>
|
25
|
+
<NODE name="isotopic_pattern" description="Settings for the calculation of a score indicating if a peak is part of a isotoipic pattern (between 0 and 1).">
|
26
|
+
<ITEM name="charge_low" value="1" type="int" description="Lowest charge to search for." restrictions="1:" />
|
27
|
+
<ITEM name="charge_high" value="4" type="int" description="Highest charge to search for." restrictions="1:" />
|
28
|
+
<ITEM name="mz_tolerance" value="0.04" type="float" description="Tolerated m/z deviation from the theoretical isotopic pattern.#br#It should be larger than the m/z resolution of the instument.#br#This value must be smaller than that 1/charge_high!" restrictions="0:" />
|
29
|
+
<ITEM name="intensity_percentage" value="10" type="float" description="Isotopic peaks that contribute more than this percentage to the overall isotope pattern intensity must be present." tags="advanced" restrictions="0:100" />
|
30
|
+
<ITEM name="intensity_percentage_optional" value="0.1" type="float" description="Isotopic peaks that contribute more than this percentage to the overall isotope pattern intensity can be missing." tags="advanced" restrictions="0:100" />
|
31
|
+
<ITEM name="optional_fit_improvement" value="2" type="float" description="Minimal percental improvement of isotope fit to allow leaving out an optional peak." tags="advanced" restrictions="0:100" />
|
32
|
+
<ITEM name="mass_window_width" value="25" type="float" description="Window width in Dalton for precalculation of estimated isotope distribtions." tags="advanced" restrictions="1:200" />
|
33
|
+
</NODE>
|
34
|
+
<NODE name="seed" description="Settings that determine which peaks are considered a seed">
|
35
|
+
<ITEM name="min_score" value="0.8" type="float" description="Minimum seed score a peak has to reach to be used as seed.#br#The seed score is the geometric mean of intensity score, mass trace score and isotope pattern score.#br#If your features show a large deviation from the averagene isotope distribution or from an gaussian elution profile, lower this score." restrictions="0:1" />
|
36
|
+
</NODE>
|
37
|
+
<NODE name="fit" description="Settings for the model fitting">
|
38
|
+
<ITEM name="epsilon_abs" value="0.0001" type="float" description="Absolute epsilon used for convergence of the fit." tags="advanced" restrictions="0:" />
|
39
|
+
<ITEM name="epsilon_rel" value="0.0001" type="float" description="Relative epsilon used for convergence of the fit." tags="advanced" restrictions="0:" />
|
40
|
+
<ITEM name="max_iterations" value="500" type="int" description="Maximum number of iterations of the fit." tags="advanced" restrictions="1:" />
|
41
|
+
</NODE>
|
42
|
+
<NODE name="feature" description="Settings for the features (intensity, quality assessment, ...)">
|
43
|
+
<ITEM name="min_score" value="0.7" type="float" description="Feature score threshold for a feature to be reported.#br#The feature score is the geometric mean of the average relative deviation and the correlation between the model and the observed peaks." restrictions="0:1" />
|
44
|
+
<ITEM name="min_isotope_fit" value="0.8" type="float" description="Minimum isotope fit of the feature before model fitting." tags="advanced" restrictions="0:1" />
|
45
|
+
<ITEM name="min_trace_score" value="0.5" type="float" description="Trace score threshold.#br#Traces below this threshold are removed after the model fitting.#br#This parameter is important for features that overlap in m/z dimension." tags="advanced" restrictions="0:1" />
|
46
|
+
<ITEM name="min_rt_span" value="0.333" type="float" description="Minimum RT span in relation to extended area that has to remain after model fitting." tags="advanced" restrictions="0:1" />
|
47
|
+
<ITEM name="max_rt_span" value="2.5" type="float" description="Maximum RT span in relation to extended area that the model is allowed to have." tags="advanced" restrictions="0.5:" />
|
48
|
+
<ITEM name="rt_shape" value="symmetric" type="string" description="Choose model used for RT profile fitting. If set to symmetric a gauss shape is used, in case of asymmetric an EGH shape is used." tags="advanced" restrictions="symmetric,asymmetric" />
|
49
|
+
<ITEM name="max_intersection" value="0.35" type="float" description="Maximum allowed intersection of features." tags="advanced" restrictions="0:1" />
|
50
|
+
<ITEM name="reported_mz" value="monoisotopic" type="string" description="The mass type that is reported for features.#br#'maximum' returns the m/z value of the highest mass trace.#br#'average' returns the intensity-weighted average m/z value of all contained peaks.#br#'monoisotopic' returns the monoisotopic m/z value derived from the fitted isotope model." restrictions="maximum,average,monoisotopic" />
|
51
|
+
</NODE>
|
52
|
+
<NODE name="user-seed" description="Settings for user-specified seeds.">
|
53
|
+
<ITEM name="rt_tolerance" value="5" type="float" description="Allowed RT deviation of seeds from the user-specified seed position." restrictions="0:" />
|
54
|
+
<ITEM name="mz_tolerance" value="1.1" type="float" description="Allowed m/z deviation of seeds from the user-specified seed position." restrictions="0:" />
|
55
|
+
<ITEM name="min_score" value="0.5" type="float" description="Overwrites 'seed:min_score' for user-specified seeds. The cutoff is typically a bit lower in this case." restrictions="0:1" />
|
56
|
+
</NODE>
|
57
|
+
<NODE name="debug" description="">
|
58
|
+
<ITEM name="pseudo_rt_shift" value="500" type="float" description="Pseudo RT shift used when ." tags="advanced" restrictions="1:" />
|
59
|
+
</NODE>
|
60
|
+
</NODE>
|
61
|
+
</NODE>
|
62
|
+
</NODE>
|
63
|
+
</PARAMETERS>
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Lists applications and their package dependencies
|
2
|
+
# For each application here include a shell script named install-appname.sh
|
3
|
+
#
|
4
|
+
|
5
|
+
rvm:
|
6
|
+
- curl
|
7
|
+
- git
|
8
|
+
- patch
|
9
|
+
- build-essential
|
10
|
+
- openssl
|
11
|
+
- libreadline6
|
12
|
+
- libreadline6-dev
|
13
|
+
- openssl
|
14
|
+
- libreadline6
|
15
|
+
- libreadline6-dev
|
16
|
+
- git-core
|
17
|
+
- zlib1g
|
18
|
+
- zlib1g-dev
|
19
|
+
- libssl-dev
|
20
|
+
- autoconf
|
21
|
+
- libc6-dev
|
22
|
+
- ncurses-dev
|
23
|
+
- automake
|
24
|
+
- libtool
|
25
|
+
- bison
|
26
|
+
- subversion
|
27
|
+
- pkg-config
|
28
|
+
|
29
|
+
galaxy:
|
30
|
+
- mercurial
|
31
|
+
|
32
|
+
tpp:
|
33
|
+
- cpanminus
|
34
|
+
- g++
|
35
|
+
- subversion
|
36
|
+
- libbz2-dev
|
37
|
+
- swig
|
38
|
+
- expat
|
39
|
+
- libpng12-dev
|
40
|
+
- gnuplot
|
41
|
+
- libperl-dev
|
42
|
+
- build-essential
|
43
|
+
- libgd2-xpm
|
44
|
+
- libfuse-dev
|
45
|
+
- libcurl4-openssl-dev
|
46
|
+
- libxml2-dev
|
47
|
+
- libgd2-xpm-dev
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# This file is part of protk
|
2
|
+
# Created by Ira Cooke 14/12/2010
|
3
|
+
#
|
4
|
+
# This file contains default global constants and settings.
|
5
|
+
# User editable settings are found in ~/.protk/config.yml
|
6
|
+
#
|
7
|
+
|
8
|
+
message_level: info
|
9
|
+
protein_database_root: Databases
|
10
|
+
plasmodb_annotation_database: plasmodb_annotation
|
11
|
+
uniprot_sprot_annotation_database: swissprot_annotation
|
12
|
+
uniprot_trembl_annotation_database: trembl_annotation
|
13
|
+
galaxy_root: galaxy
|
14
|
+
default_mascot_server: www.matrixscience.com
|
15
|
+
tpp_root: tools/tpp
|
16
|
+
omssa_root: tools/omssa
|
17
|
+
openms_root: tools/openms
|
18
|
+
msgfplus_root: tools/msgfplus
|
19
|
+
blast_root: tools/blast
|
20
|
+
log_file: Logs/protk.log
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# The crap database from gpmdb
|
5
|
+
#
|
6
|
+
---
|
7
|
+
:description: The crap database from gpmdb
|
8
|
+
:decoy_prefix: decoy_
|
9
|
+
:make_blast_index: true
|
10
|
+
:include_filters: []
|
11
|
+
:format: fasta
|
12
|
+
:id_regexes:
|
13
|
+
- sp\|(.*)\|
|
14
|
+
:decoys: true
|
15
|
+
:archive_old: true
|
16
|
+
:sources:
|
17
|
+
- - ftp://ftp.thegpm.org/fasta/crap/crap.fasta
|
18
|
+
- none
|
19
|
+
:is_annotation_db: false
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# Swissprot database filtered for human entries only and appending the CRAP database from gpmdb
|
5
|
+
# Requires that CRAP is installed first
|
6
|
+
#
|
7
|
+
---
|
8
|
+
:description: Swissprot database filtered for human entries only and appending the CRAP database from gpmdb
|
9
|
+
:decoy_prefix: decoy_
|
10
|
+
:make_msgf_index: true
|
11
|
+
:make_blast_index: true
|
12
|
+
:include_filters:
|
13
|
+
- - OS=Homo\ssapiens
|
14
|
+
- - .*
|
15
|
+
:format: fasta
|
16
|
+
:id_regexes:
|
17
|
+
- sp\|.*\|(.*?)\s
|
18
|
+
- sp\|(.*)\|
|
19
|
+
:decoys: true
|
20
|
+
:archive_old: true
|
21
|
+
:sources:
|
22
|
+
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
|
23
|
+
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
24
|
+
- downloads/ftp.gpmdb.org/fasta/crap/crap.fasta
|
25
|
+
:is_annotation_db: false
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
+
#
|
6
|
+
---
|
7
|
+
:description: Swissprot_uniprot annotation database (full entries for each protein)
|
8
|
+
:archive_old: false
|
9
|
+
:is_annotation_db: true
|
10
|
+
:decoy_prefix: decoy_
|
11
|
+
:include_filters: []
|
12
|
+
|
13
|
+
:format: dat
|
14
|
+
:id_regexes: []
|
15
|
+
|
16
|
+
:make_blast_index: false
|
17
|
+
:sources:
|
18
|
+
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz
|
19
|
+
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
+
:decoys: false
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#
|
2
|
+
# This is a predefined setup file for manage_db
|
3
|
+
#
|
4
|
+
# Fasta files to be indexed for annotation (by protvis)
|
5
|
+
#
|
6
|
+
---
|
7
|
+
:description: An indexed version of the swissprot database for annotation (sequences only)
|
8
|
+
:archive_old: false
|
9
|
+
:is_annotation_db: true
|
10
|
+
:decoy_prefix: decoy_
|
11
|
+
:make_blast_index: true
|
12
|
+
:format: fasta
|
13
|
+
:include_filters: []
|
14
|
+
|
15
|
+
:id_regexes: []
|
16
|
+
|
17
|
+
:sources:
|
18
|
+
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
|
19
|
+
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
+
:decoys: false
|
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<bioml>
|
3
|
+
|
4
|
+
<note> TAXONOMY FILE. This is a file containing references to the sequence databases. Point it to your own taxonomy.xml if needed.</note>
|
5
|
+
<note type="input" label="list path, taxonomy information">Temporary tandem taxonomy file generated for each run</note>
|
6
|
+
|
7
|
+
<note> PROTEIN SEQUENCE DATABASE. This refers to identifiers in the taxonomy.xml, not the .fasta files themselves! Make sure the database you want is present as an entry in the taxonomy.xml referenced above. This is REQUIRED. </note>
|
8
|
+
<note type="input" label="protein, taxon">sphuman</note>
|
9
|
+
|
10
|
+
<note> FILE LOCATIONS. Replace them with your input (.mzXML) file and output file -- these are REQUIRED. Optionally a log file and a sequence output file of all protein sequences identified in the first-pass can be specified. Use of FULL path (not relative) paths is recommended. </note>
|
11
|
+
<note type="input" label="spectrum, path">/var/www/ISB/data/Data/microTOF/1010/mt164/mt164-CD14LPS_RD4_01_3725.d_raw.mzML</note>
|
12
|
+
<note type="input" label="output, path">/var/www/ISB/data/Data/microTOF/1010/mt164/mt164-CD14LPS_RD4_01_3725.d_raw.tandem</note>
|
13
|
+
|
14
|
+
<note> DEFAULT PARAMETERS. The value of "isb_default_input_kscore.xml" is recommended. Change to "isb_default_input_native.xml" for native X!Tandem scoring.</note>
|
15
|
+
<note type="input" label="list path, default parameters">/usr/local/tpp-4-4-0/bin/isb_default_input_kscore.xml</note>
|
16
|
+
|
17
|
+
<note> FRAGMENT MASS TOLERANCES </note>
|
18
|
+
<note type="input" label="spectrum, fragment monoisotopic mass error">0.65</note>
|
19
|
+
|
20
|
+
<note> PRECURSOR MASS TOLERANCES. In the example below, a -2.0 Da to 4.0 Da (monoisotopic mass) window is searched for peptide candidates. Since this is monoisotopic mass, so for non-accurate-mass instruments, for which the precursor is often taken nearer to the isotopically averaged mass, an asymmetric tolerance (-2.0 Da to 4.0 Da) is preferable. This somewhat imitates a (-3.0 Da to 3.0 Da) window for averaged mass (but not exactly)</note>
|
21
|
+
<note type="input" label="spectrum, parent monoisotopic mass error minus">100</note>
|
22
|
+
<note type="input" label="spectrum, parent monoisotopic mass error plus">100</note>
|
23
|
+
<note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note>
|
24
|
+
<note>The value for this parameter may be 'Daltons' or 'ppm': all other values are ignored</note>
|
25
|
+
<note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note>
|
26
|
+
<note>This allows peptide candidates in windows around -1 Da and -2 Da from the acquired mass to be considered. Only applicable when the minus/plus window above is set to less than 0.5 Da. Good for accurate-mass instruments for which the reported precursor mass is not corrected to the monoisotopic mass. </note>
|
27
|
+
|
28
|
+
|
29
|
+
<note> MODIFICATIONS. In the example below, there is a static (carbamidomethyl) modification on C, and variable modifications on M (oxidation). Multiple modifications can be separated by commas, as in "80.0@S,80.0@T". Peptide terminal modifications can be specified with the symbol '[' for N-terminus and ']' for C-terminus, such as 42.0@[ . </note>
|
30
|
+
<note id="carbamidomethyl-fixed" type="input" label="residue, modification mass">57.021464@C</note>
|
31
|
+
<note id="methionine-oxidation-variable" type="input" label="residue, potential modification mass">15.994915@M</note>
|
32
|
+
<note id="glyco-variable" type="input" label="residue, potential modification motif">0.998@N!{P}[ST]</note>
|
33
|
+
<note> You can specify a variable modification when present in a motif. For instance, 0.998@N!{P}[ST] is a deamidation modification on N only if it is present in an N[any but P][S or T] motif (N-glycosite). </note>
|
34
|
+
|
35
|
+
<note type="input" label="protein, N-terminal residue modification mass"></note>
|
36
|
+
<note type="input" label="protein, C-terminal residue modification mass"></note>
|
37
|
+
<note> These are *static* modifications on the PROTEINS' N or C-termini. </note>
|
38
|
+
|
39
|
+
<note> SEMI-TRYPTICS AND MISSED CLEAVAGES. In the example below, semitryptic peptides are allowed, and up to 2 missed cleavages are allowed. </note>
|
40
|
+
<note type="input" label="protein, cleavage semi">yes</note>
|
41
|
+
<note type="input" label="scoring, maximum missed cleavage sites">2</note>
|
42
|
+
|
43
|
+
<note> REFINEMENT. Do not use unless you know what you are doing. Set "refine" to "yes" and specify what you want to search in the refinement. For non-confusing results, repeat the same modifications you set above for the first-pass here.</note>
|
44
|
+
<note type="input" label="refine">no</note>
|
45
|
+
<note type="input" label="refine, maximum valid expectation value">0.1</note>
|
46
|
+
<note type="input" label="refine, modification mass">57.012@C</note>
|
47
|
+
<note type="input" label="refine, potential modification mass">15.994915@M</note>
|
48
|
+
<note type="input" label="refine, potential modification motif"></note>
|
49
|
+
<note type="input" label="refine, cleavage semi">yes</note>
|
50
|
+
<note type="input" label="refine, unanticipated cleavage">no</note>
|
51
|
+
<note type="input" label="refine, potential N-terminus modifications"></note>
|
52
|
+
<note type="input" label="refine, potential C-terminus modifications"></note>
|
53
|
+
<note type="input" label="refine, point mutations">no</note>
|
54
|
+
<note type="input" label="refine, use potential modifications for full refinement">no</note>
|
55
|
+
|
56
|
+
</bioml>
|