protk 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/annotate_ids.rb +2 -2
- data/bin/feature_finder.rb +8 -2
- data/bin/file_convert.rb +8 -2
- data/bin/gffmerge.rb +15 -20
- data/bin/interprophet.rb +7 -3
- data/bin/make_decoy.rb +9 -2
- data/bin/mascot2xml.rb +87 -0
- data/bin/mascot_search.rb +126 -187
- data/bin/mascot_to_pepxml.rb +32 -3
- data/bin/msgfplus_search.rb +58 -12
- data/bin/omssa_search.rb +13 -2
- data/bin/peptide_prophet.rb +8 -2
- data/bin/pepxml_to_table.rb +8 -2
- data/bin/protein_prophet.rb +8 -2
- data/bin/protxml_to_table.rb +82 -0
- data/bin/repair_run_summary.rb +7 -1
- data/bin/sixframe.rb +48 -2
- data/bin/tandem_search.rb +11 -2
- data/bin/toppas_pipeline.rb +8 -2
- data/bin/uniprot_annotation.rb +8 -2
- data/bin/uniprot_mapper.rb +8 -2
- data/bin/xls_to_table.rb +8 -2
- data/lib/protk/constants.rb +2 -0
- data/lib/protk/data/pepxml_mascot_template.xml +29 -0
- data/lib/protk/mascot_util.rb +5 -0
- data/lib/protk/prophet_tool.rb +1 -3
- data/lib/protk/search_tool.rb +75 -86
- data/lib/protk/setup_rakefile.rake +12 -5
- data/lib/protk/tool.rb +26 -12
- metadata +23 -9
- data/bin/big_search.rb +0 -41
- data/bin/template_search.rb +0 -144
- data/lib/convert_util.rb +0 -27
- data/lib/pepxml.rb +0 -22
- data/lib/protk/big_search_rakefile.rake +0 -16
- data/lib/protk/big_search_tool.rb +0 -23
data/lib/protk/tool.rb
CHANGED
@@ -49,7 +49,7 @@ class Tool
|
|
49
49
|
# Creates an empty options object to hold commandline options
|
50
50
|
# Also creates an option_parser with default options common to all tools
|
51
51
|
#
|
52
|
-
def initialize(option_support=
|
52
|
+
def initialize(option_support=[])
|
53
53
|
@jobid_prefix = "x"
|
54
54
|
@options = OpenStruct.new
|
55
55
|
options.library = []
|
@@ -59,8 +59,8 @@ class Tool
|
|
59
59
|
options.verbose = false
|
60
60
|
|
61
61
|
@option_parser=OptionParser.new do |opts|
|
62
|
-
|
63
|
-
if ( option_support
|
62
|
+
|
63
|
+
if ( option_support.include? :prefix_suffix)
|
64
64
|
|
65
65
|
@options.output_prefix = ""
|
66
66
|
opts.on( '-b', '--output-prefix pref', 'A string to prepend to the name of output files' ) do |prefix|
|
@@ -74,14 +74,14 @@ class Tool
|
|
74
74
|
|
75
75
|
end
|
76
76
|
|
77
|
-
if ( option_support
|
77
|
+
if ( option_support.include? :explicit_output )
|
78
78
|
@options.explicit_output = nil
|
79
79
|
opts.on( '-o', '--output out', 'An explicitly named output file.' ) do |out|
|
80
80
|
@options.explicit_output = out
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
if ( option_support
|
84
|
+
if ( option_support.include? :over_write)
|
85
85
|
|
86
86
|
@options.over_write=false
|
87
87
|
opts.on( '-r', '--replace-output', 'Dont skip analyses for which the output file already exists' ) do
|
@@ -90,7 +90,7 @@ class Tool
|
|
90
90
|
|
91
91
|
end
|
92
92
|
|
93
|
-
if ( option_support
|
93
|
+
if ( option_support.include? :background)
|
94
94
|
|
95
95
|
@options.background = false
|
96
96
|
opts.on( '-z', '--background', 'Run jobs in the background using pbs' ) do
|
@@ -99,12 +99,10 @@ class Tool
|
|
99
99
|
|
100
100
|
end
|
101
101
|
|
102
|
-
if ( option_support[:help]==true)
|
103
102
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
end
|
103
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
104
|
+
puts opts
|
105
|
+
exit
|
108
106
|
end
|
109
107
|
|
110
108
|
end
|
@@ -133,7 +131,23 @@ class Tool
|
|
133
131
|
end
|
134
132
|
|
135
133
|
|
136
|
-
|
134
|
+
def check_options(mandatory=[])
|
135
|
+
# Checking for required options
|
136
|
+
begin
|
137
|
+
self.option_parser.parse!
|
138
|
+
missing = mandatory.select{ |param| self.send(param).nil? }
|
139
|
+
if not missing.empty?
|
140
|
+
puts "Missing options: #{missing.join(', ')}"
|
141
|
+
puts self.option_parser
|
142
|
+
return false
|
143
|
+
end
|
144
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
145
|
+
puts $!.to_s
|
146
|
+
puts tool.option_parser
|
147
|
+
return false
|
148
|
+
end
|
149
|
+
return true
|
150
|
+
end
|
137
151
|
|
138
152
|
# Create a full base path (without extension) representing the input file for this analysis
|
139
153
|
# Optionally provide the extension to be removed (if not provided it will be inferred)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ftools
|
@@ -123,6 +123,22 @@ dependencies:
|
|
123
123
|
- - ! '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: 2.3.3
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: mascot-dat
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 0.3.1
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 0.3.1
|
126
142
|
- !ruby/object:Gem::Dependency
|
127
143
|
name: rspec
|
128
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -151,7 +167,6 @@ executables:
|
|
151
167
|
- mascot_search.rb
|
152
168
|
- omssa_search.rb
|
153
169
|
- msgfplus_search.rb
|
154
|
-
- big_search.rb
|
155
170
|
- mascot_to_pepxml.rb
|
156
171
|
- file_convert.rb
|
157
172
|
- make_decoy.rb
|
@@ -171,13 +186,11 @@ executables:
|
|
171
186
|
- gffmerge.rb
|
172
187
|
- sixframe.rb
|
173
188
|
- uniprot_annotation.rb
|
189
|
+
- protxml_to_table.rb
|
174
190
|
extensions:
|
175
191
|
- ext/protk/extconf.rb
|
176
192
|
extra_rdoc_files: []
|
177
193
|
files:
|
178
|
-
- lib/convert_util.rb
|
179
|
-
- lib/pepxml.rb
|
180
|
-
- lib/protk/big_search_tool.rb
|
181
194
|
- lib/protk/bio_sptr_extensions.rb
|
182
195
|
- lib/protk/biotools_excel_converter.rb
|
183
196
|
- lib/protk/command_runner.rb
|
@@ -206,12 +219,10 @@ files:
|
|
206
219
|
- lib/protk/uniprot_mapper.rb
|
207
220
|
- lib/protk/xtandem_defaults.rb
|
208
221
|
- lib/protk.rb
|
209
|
-
- lib/protk/big_search_rakefile.rake
|
210
222
|
- lib/protk/manage_db_rakefile.rake
|
211
223
|
- lib/protk/setup_rakefile.rake
|
212
224
|
- bin/annotate_ids.rb
|
213
225
|
- bin/asapratio.rb
|
214
|
-
- bin/big_search.rb
|
215
226
|
- bin/correct_omssa_retention_times.rb
|
216
227
|
- bin/feature_finder.rb
|
217
228
|
- bin/file_convert.rb
|
@@ -221,6 +232,7 @@ files:
|
|
221
232
|
- bin/libra.rb
|
222
233
|
- bin/make_decoy.rb
|
223
234
|
- bin/manage_db.rb
|
235
|
+
- bin/mascot2xml.rb
|
224
236
|
- bin/mascot_search.rb
|
225
237
|
- bin/mascot_to_pepxml.rb
|
226
238
|
- bin/msgfplus_search.rb
|
@@ -229,10 +241,10 @@ files:
|
|
229
241
|
- bin/pepxml_to_table.rb
|
230
242
|
- bin/protein_prophet.rb
|
231
243
|
- bin/protk_setup.rb
|
244
|
+
- bin/protxml_to_table.rb
|
232
245
|
- bin/repair_run_summary.rb
|
233
246
|
- bin/sixframe.rb
|
234
247
|
- bin/tandem_search.rb
|
235
|
-
- bin/template_search.rb
|
236
248
|
- bin/toppas_pipeline.rb
|
237
249
|
- bin/unimod_to_loc.rb
|
238
250
|
- bin/uniprot_annotation.rb
|
@@ -247,6 +259,7 @@ files:
|
|
247
259
|
- lib/protk/data/FeatureFinderCentroided.ini
|
248
260
|
- lib/protk/data/FeatureFinderIsotopeWavelet.ini
|
249
261
|
- lib/protk/data/galaxyenv.sh
|
262
|
+
- lib/protk/data/pepxml_mascot_template.xml
|
250
263
|
- lib/protk/data/predefined_db.crap.yaml
|
251
264
|
- lib/protk/data/predefined_db.sphuman.yaml
|
252
265
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
@@ -287,3 +300,4 @@ signing_key:
|
|
287
300
|
specification_version: 3
|
288
301
|
summary: Proteomics Toolkit
|
289
302
|
test_files: []
|
303
|
+
has_rdoc:
|
data/bin/big_search.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 14/12/2010
|
5
|
-
#
|
6
|
-
# Runs an MS/MS search using multiple search engines on multiple files in parallel
|
7
|
-
# Merges results using interprophet to produce a single output file
|
8
|
-
#
|
9
|
-
# This tool assumes that datasets are from an ESI-QUAD-TOF instrument
|
10
|
-
#
|
11
|
-
require 'protk/constants'
|
12
|
-
require 'protk/command_runner'
|
13
|
-
require 'protk/search_tool'
|
14
|
-
require 'protk/big_search_tool'
|
15
|
-
require 'rest_client'
|
16
|
-
require 'rake'
|
17
|
-
|
18
|
-
# Environment with global constants
|
19
|
-
#
|
20
|
-
genv=Constants.new
|
21
|
-
|
22
|
-
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
23
|
-
#
|
24
|
-
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:database=>true,:over_write=>true,:glyco=>true,:explicit_output=>true})
|
25
|
-
search_tool.jobid_prefix="b"
|
26
|
-
|
27
|
-
search_tool.option_parser.banner = "Run a multi-search engine search on a set of input files.\n\nUsage: big_search.rb [options] file1.mzML file2.mzML ..."
|
28
|
-
search_tool.options.output_suffix="_multisearch"
|
29
|
-
|
30
|
-
|
31
|
-
search_tool.options.ncpu=1
|
32
|
-
search_tool.option_parser.on( '-N', '--ncpu n', 'Split tasks into n separate processes if possible' ) do |n|
|
33
|
-
search_tool.options.ncpu=n
|
34
|
-
end
|
35
|
-
|
36
|
-
search_tool.option_parser.parse!
|
37
|
-
|
38
|
-
bgsrch = BigSearchTool.new
|
39
|
-
|
40
|
-
|
41
|
-
p bgsrch.run ["hi", "howdy"]
|
data/bin/template_search.rb
DELETED
@@ -1,144 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 14/12/2010
|
5
|
-
#
|
6
|
-
# Runs an MS/MS search using the MSGFPlus search engine
|
7
|
-
#
|
8
|
-
require 'protk/search_tool'
|
9
|
-
|
10
|
-
|
11
|
-
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
12
|
-
#
|
13
|
-
search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
|
14
|
-
search_tool.option_parser.banner = "Run an msms search on a set of msms spectrum input files.\n\nUsage: template_search.rb [options] file1.mzML file2.mzML ..."
|
15
|
-
search_tool.options.output_suffix="_template"
|
16
|
-
|
17
|
-
search_tool.options.custom_option="default"
|
18
|
-
search_tool.option_parser.on('--custom-opt value','Custom option relevant to this tool only (Default default)') do |val|
|
19
|
-
search_tool.options.custom_option=val
|
20
|
-
end
|
21
|
-
|
22
|
-
search_tool.option_parser.parse!
|
23
|
-
|
24
|
-
# Set search engine specific parameters on the SearchTool object
|
25
|
-
#
|
26
|
-
msgf_bin="#{genv.msgf_bin}/MSGFPlus.jar"
|
27
|
-
|
28
|
-
case
|
29
|
-
when Pathname.new(search_tool.database).exist? # It's an explicitly named db
|
30
|
-
current_db=Pathname.new(search_tool.database).realpath.to_s
|
31
|
-
else
|
32
|
-
current_db=search_tool.current_database :fasta
|
33
|
-
end
|
34
|
-
|
35
|
-
fragment_tol = search_tool.fragment_tol
|
36
|
-
precursor_tol = search_tool.precursor_tol
|
37
|
-
|
38
|
-
|
39
|
-
throw "When --output is set only one file at a time can be run" if ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil )
|
40
|
-
|
41
|
-
# Run the search engine on each input file
|
42
|
-
#
|
43
|
-
ARGV.each do |filename|
|
44
|
-
|
45
|
-
if ( search_tool.explicit_output!=nil)
|
46
|
-
output_path=search_tool.explicit_output
|
47
|
-
else
|
48
|
-
output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
|
49
|
-
end
|
50
|
-
|
51
|
-
# (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
|
52
|
-
# Get the input file extension
|
53
|
-
ext = Pathname.new(filename).extname
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
|
58
|
-
|
59
|
-
# Only proceed if the output file is not present or we have opted to over-write it
|
60
|
-
#
|
61
|
-
if ( search_tool.over_write || !Pathname.new(output_path).exist? )
|
62
|
-
|
63
|
-
# The basic command
|
64
|
-
#
|
65
|
-
cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
|
66
|
-
|
67
|
-
#Missed cleavages
|
68
|
-
#
|
69
|
-
throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
|
70
|
-
cmd << " -ntt #{search_tool.missed_cleavages}"
|
71
|
-
|
72
|
-
# Precursor tolerance
|
73
|
-
#
|
74
|
-
cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
|
75
|
-
|
76
|
-
# Instrument type
|
77
|
-
#
|
78
|
-
cmd << " -inst 2"
|
79
|
-
|
80
|
-
# cmd << " -m 4"
|
81
|
-
|
82
|
-
cmd << " -addFeatures 1"
|
83
|
-
|
84
|
-
# Enzyme
|
85
|
-
#
|
86
|
-
# if ( search_tool.enzyme!="Trypsin")
|
87
|
-
# cmd << " -e #{search_tool.enzyme}"
|
88
|
-
# end
|
89
|
-
|
90
|
-
mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
|
91
|
-
mods_file=File.open(mods_path,'w+')
|
92
|
-
|
93
|
-
# Variable Modifications
|
94
|
-
#
|
95
|
-
if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
|
96
|
-
var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
|
97
|
-
if ( var_mods !="" )
|
98
|
-
cmd << " -mv #{var_mods}"
|
99
|
-
end
|
100
|
-
else
|
101
|
-
# Add options related to peptide modifications
|
102
|
-
#
|
103
|
-
if ( search_tool.glyco )
|
104
|
-
cmd << " -mv 119 "
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# Fixed modifications
|
109
|
-
#
|
110
|
-
if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
|
111
|
-
fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
|
112
|
-
if ( fix_mods !="")
|
113
|
-
cmd << " -mf #{fix_mods}"
|
114
|
-
end
|
115
|
-
else
|
116
|
-
if ( search_tool.has_modifications )
|
117
|
-
cmd << " -mf "
|
118
|
-
if ( search_tool.carbamidomethyl )
|
119
|
-
cmd<<"3 "
|
120
|
-
end
|
121
|
-
|
122
|
-
if ( search_tool.methionine_oxidation )
|
123
|
-
cmd<<"1 "
|
124
|
-
end
|
125
|
-
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
# Up to here we've formulated the omssa command. The rest is cleanup
|
130
|
-
p "Running:#{cmd}"
|
131
|
-
|
132
|
-
# Run the search
|
133
|
-
#
|
134
|
-
job_params= {:jobid => search_tool.jobid_from_filename(filename) }
|
135
|
-
job_params[:queue]="lowmem"
|
136
|
-
job_params[:vmem]="900mb"
|
137
|
-
search_tool.run(cmd,genv,job_params)
|
138
|
-
|
139
|
-
|
140
|
-
else
|
141
|
-
genv.log("Skipping search on existing file #{output_path}",:warn)
|
142
|
-
end
|
143
|
-
|
144
|
-
end
|
data/lib/convert_util.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'libxml'
|
2
|
-
require 'constants'
|
3
|
-
|
4
|
-
class ConvertUtil
|
5
|
-
|
6
|
-
def self.ensure_mzml_indexed(run_file)
|
7
|
-
if unindexed_mzml?(run_file)
|
8
|
-
index_mzml(run_file)
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.index_mzml(mzml_file)
|
13
|
-
Dir.mktmpdir do |tmpdir|
|
14
|
-
genv=Constants.new
|
15
|
-
%x["#{genv.tpp_bin}/msconvert -o #{tmpdir} #{mzml_file}"]
|
16
|
-
indexed_file = Dir["#{tmpdir}/*"][0]
|
17
|
-
FileUtils.mv(indexed_file, mzml_file)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.unindexed_mzml?(mzml_file)
|
22
|
-
reader = LibXML::XML::Reader.file(mzml_file)
|
23
|
-
reader.read
|
24
|
-
reader.name == "mzML"
|
25
|
-
end
|
26
|
-
|
27
|
-
end
|
data/lib/pepxml.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rexml/document'
|
3
|
-
require 'rexml/xpath'
|
4
|
-
|
5
|
-
class PepXML
|
6
|
-
def initialize(file_name)
|
7
|
-
@doc=REXML::Document.new(File.new(file_name))
|
8
|
-
end
|
9
|
-
|
10
|
-
def find_runs()
|
11
|
-
runs = {}
|
12
|
-
REXML::XPath.each(@doc,"//msms_run_summary") do |summary|
|
13
|
-
base_name = summary.attributes["base_name"]
|
14
|
-
if not runs.has_key?(base_name)
|
15
|
-
runs[base_name] = {:base_name => summary.attributes["base_name"],
|
16
|
-
:type => summary.attributes["raw_data"]}
|
17
|
-
end
|
18
|
-
end
|
19
|
-
runs
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# create rake dependencies
|
2
|
-
# run rakefile
|
3
|
-
#
|
4
|
-
require 'optparse'
|
5
|
-
require 'pathname'
|
6
|
-
require 'protk/tool'
|
7
|
-
require 'protk/command_runner'
|
8
|
-
require 'pp'
|
9
|
-
require 'rake'
|
10
|
-
|
11
|
-
class BigSearchTool < Tool
|
12
|
-
|
13
|
-
def run input_files
|
14
|
-
command = "rake -f #{rakefile_path} #{input_files.join(" ")}"
|
15
|
-
runner=CommandRunner.new(Constants.new)
|
16
|
-
runner.run_local(command)
|
17
|
-
end
|
18
|
-
|
19
|
-
def rakefile_path
|
20
|
-
"#{File.dirname(__FILE__)}/big_search_rakefile.rake"
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|