protk 1.2.4 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/add_retention_times.rb +89 -0
- data/bin/augustus_to_proteindb.rb +193 -0
- data/bin/blastxml_to_table.rb +72 -0
- data/bin/feature_finder.rb +7 -1
- data/bin/make_decoy.rb +10 -2
- data/bin/mascot_search.rb +14 -4
- data/bin/msgfplus_search.rb +14 -5
- data/bin/peptide_prophet.rb +14 -7
- data/bin/protxml_to_gff.rb +624 -0
- data/bin/protxml_to_table.rb +19 -2
- data/bin/sixframe.rb +3 -1
- data/bin/tandem_search.rb +51 -23
- data/bin/toppas_pipeline.rb +8 -3
- data/bin/uniprot_annotation.rb +6 -1
- data/ext/protk/{protk.c → decoymaker/decoymaker.c} +13 -15
- data/ext/protk/decoymaker/extconf.rb +3 -0
- data/ext/protk/simplealign/extconf.rb +3 -0
- data/lib/protk/data/FeatureFinderIsotopeWavelet.ini +6 -6
- data/lib/protk/gapped_aligner.rb +264 -0
- data/lib/protk/manage_db_rakefile.rake +2 -1
- data/lib/protk/mascot_util.rb +7 -2
- data/lib/protk/randomize.rb +2 -2
- data/lib/protk/search_tool.rb +1 -1
- data/lib/protk/setup_rakefile.rake +25 -2
- data/lib/protk/spreadsheet_extensions.rb +1 -0
- data/lib/protk/swissprot_database.rb +11 -1
- metadata +30 -8
- data/bin/mascot2xml.rb +0 -87
- data/ext/protk/extconf.rb +0 -3
- data/lib/protk/data/pepxml_mascot_template.xml +0 -29
- data/lib/protk/data/predefined_db.trembl_annotation.yaml +0 -20
@@ -449,7 +449,8 @@ file db_filename do
|
|
449
449
|
|
450
450
|
# Symlink to the source file
|
451
451
|
#
|
452
|
-
|
452
|
+
source_db_filename_relative = Pathname.new(source_db_filename).basename.to_s
|
453
|
+
File.symlink(source_db_filename_relative,db_filename)
|
453
454
|
end
|
454
455
|
end
|
455
456
|
|
data/lib/protk/mascot_util.rb
CHANGED
@@ -54,12 +54,17 @@ class MascotUtil
|
|
54
54
|
|
55
55
|
if ( spec!=nil && rt!=nil)
|
56
56
|
# Remove charge from the end of the title
|
57
|
-
spec_id= remove_charge_from_title_string(spec[1])
|
57
|
+
# spec_id= remove_charge_from_title_string(spec[1])
|
58
|
+
spec_id= spec[1]
|
59
|
+
|
60
|
+
# $stdout.write "#{spec_id} \r"
|
61
|
+
|
58
62
|
|
59
63
|
rt_table[spec_id]=rt[1]
|
60
64
|
end
|
61
|
-
|
65
|
+
|
62
66
|
end
|
67
|
+
# $stdout.write "\n"
|
63
68
|
|
64
69
|
return rt_table
|
65
70
|
|
data/lib/protk/randomize.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'protk/
|
1
|
+
require 'protk/decoymaker'
|
2
2
|
|
3
3
|
class Randomize
|
4
4
|
def self.make_decoys input_path, db_len, output_path, prefix
|
5
|
-
|
5
|
+
Decoymaker.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
|
6
6
|
end
|
7
7
|
end
|
data/lib/protk/search_tool.rb
CHANGED
@@ -154,7 +154,7 @@ class SearchTool < Tool
|
|
154
154
|
|
155
155
|
def jobid_from_filename(filename)
|
156
156
|
jobid="protk"
|
157
|
-
jobnum_match=filename.match(/(.{1,10})
|
157
|
+
jobnum_match=filename.match(/(.{1,10}).*?\./)
|
158
158
|
if (jobnum_match!=nil)
|
159
159
|
jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
|
160
160
|
end
|
@@ -133,10 +133,10 @@ task :perl_locallib => [perl_locallib_installed_file]
|
|
133
133
|
#
|
134
134
|
# TPP
|
135
135
|
#
|
136
|
-
tpp_version="4.6.
|
136
|
+
tpp_version="4.6.2"
|
137
137
|
tpp_packagefile="TPP-#{tpp_version}.tgz"
|
138
138
|
tpp_installed_file = "#{env.xinteract}"
|
139
|
-
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.
|
139
|
+
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.2.tgz"
|
140
140
|
|
141
141
|
tpp_download_file = download_task tpp_url, tpp_packagefile
|
142
142
|
|
@@ -380,6 +380,29 @@ end
|
|
380
380
|
|
381
381
|
task :galaxyenv => protk_galaxy_envfile
|
382
382
|
|
383
|
+
|
384
|
+
#
|
385
|
+
# NCBI GI and Taxonomy Databases
|
386
|
+
#
|
387
|
+
|
388
|
+
# gi_taxid_package_file="gi_taxid_prot.zip"
|
389
|
+
# gi_taxid_url="ftp://ftp.ncbi.nih.gov/pub/taxonomy/#{gi_taxid_package_file}"
|
390
|
+
# gi_taxid_installed_file=
|
391
|
+
|
392
|
+
# file "gi_taxid_prot.zip" do
|
393
|
+
# %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.zip]
|
394
|
+
# %x[unzip gi_taxid_prot.zip]
|
395
|
+
# end
|
396
|
+
|
397
|
+
# file "taxdmp.zip" do
|
398
|
+
# %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip]
|
399
|
+
# %x[unzip taxdmp.zip]
|
400
|
+
# end
|
401
|
+
|
402
|
+
|
403
|
+
# multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
|
404
|
+
|
405
|
+
|
383
406
|
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
|
384
407
|
|
385
408
|
# Special task when installing via toolshed
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bio'
|
3
3
|
require 'protk/constants'
|
4
|
+
require 'pathname'
|
4
5
|
|
5
6
|
# Provides fast indexed access to a swissprot database in a flat .dat file
|
6
7
|
#
|
@@ -13,7 +14,16 @@ class SwissprotDatabase
|
|
13
14
|
@genv=Constants.new
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
+
|
18
|
+
dbpath=Pathname.new(database)
|
19
|
+
|
20
|
+
if ( dbpath.exist? )
|
21
|
+
# require 'debugger';debugger
|
22
|
+
dbclass=Bio::SPTR
|
23
|
+
parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
|
24
|
+
Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, dbpath.realpath.to_s)
|
25
|
+
@db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
|
26
|
+
elsif ( database=="swissprot")
|
17
27
|
@db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
|
18
28
|
else
|
19
29
|
@db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -139,6 +139,22 @@ dependencies:
|
|
139
139
|
- - ! '>='
|
140
140
|
- !ruby/object:Gem::Version
|
141
141
|
version: 0.3.1
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: bio-blastxmlparser
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.1.1
|
150
|
+
type: :runtime
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.1.1
|
142
158
|
- !ruby/object:Gem::Dependency
|
143
159
|
name: rspec
|
144
160
|
requirement: !ruby/object:Gem::Requirement
|
@@ -172,6 +188,7 @@ executables:
|
|
172
188
|
- make_decoy.rb
|
173
189
|
- correct_omssa_retention_times.rb
|
174
190
|
- repair_run_summary.rb
|
191
|
+
- add_retention_times.rb
|
175
192
|
- peptide_prophet.rb
|
176
193
|
- interprophet.rb
|
177
194
|
- protein_prophet.rb
|
@@ -185,10 +202,13 @@ executables:
|
|
185
202
|
- toppas_pipeline.rb
|
186
203
|
- gffmerge.rb
|
187
204
|
- sixframe.rb
|
205
|
+
- augustus_to_proteindb.rb
|
206
|
+
- protxml_to_gff.rb
|
188
207
|
- uniprot_annotation.rb
|
189
208
|
- protxml_to_table.rb
|
209
|
+
- blastxml_to_table.rb
|
190
210
|
extensions:
|
191
|
-
- ext/protk/extconf.rb
|
211
|
+
- ext/protk/decoymaker/extconf.rb
|
192
212
|
extra_rdoc_files: []
|
193
213
|
files:
|
194
214
|
- lib/protk/bio_sptr_extensions.rb
|
@@ -201,6 +221,7 @@ files:
|
|
201
221
|
- lib/protk/fastadb.rb
|
202
222
|
- lib/protk/galaxy_stager.rb
|
203
223
|
- lib/protk/galaxy_util.rb
|
224
|
+
- lib/protk/gapped_aligner.rb
|
204
225
|
- lib/protk/manage_db_tool.rb
|
205
226
|
- lib/protk/mascot_util.rb
|
206
227
|
- lib/protk/omssa_util.rb
|
@@ -221,8 +242,11 @@ files:
|
|
221
242
|
- lib/protk.rb
|
222
243
|
- lib/protk/manage_db_rakefile.rake
|
223
244
|
- lib/protk/setup_rakefile.rake
|
245
|
+
- bin/add_retention_times.rb
|
224
246
|
- bin/annotate_ids.rb
|
225
247
|
- bin/asapratio.rb
|
248
|
+
- bin/augustus_to_proteindb.rb
|
249
|
+
- bin/blastxml_to_table.rb
|
226
250
|
- bin/correct_omssa_retention_times.rb
|
227
251
|
- bin/feature_finder.rb
|
228
252
|
- bin/file_convert.rb
|
@@ -232,7 +256,6 @@ files:
|
|
232
256
|
- bin/libra.rb
|
233
257
|
- bin/make_decoy.rb
|
234
258
|
- bin/manage_db.rb
|
235
|
-
- bin/mascot2xml.rb
|
236
259
|
- bin/mascot_search.rb
|
237
260
|
- bin/mascot_to_pepxml.rb
|
238
261
|
- bin/msgfplus_search.rb
|
@@ -241,6 +264,7 @@ files:
|
|
241
264
|
- bin/pepxml_to_table.rb
|
242
265
|
- bin/protein_prophet.rb
|
243
266
|
- bin/protk_setup.rb
|
267
|
+
- bin/protxml_to_gff.rb
|
244
268
|
- bin/protxml_to_table.rb
|
245
269
|
- bin/repair_run_summary.rb
|
246
270
|
- bin/sixframe.rb
|
@@ -259,12 +283,10 @@ files:
|
|
259
283
|
- lib/protk/data/FeatureFinderCentroided.ini
|
260
284
|
- lib/protk/data/FeatureFinderIsotopeWavelet.ini
|
261
285
|
- lib/protk/data/galaxyenv.sh
|
262
|
-
- lib/protk/data/pepxml_mascot_template.xml
|
263
286
|
- lib/protk/data/predefined_db.crap.yaml
|
264
287
|
- lib/protk/data/predefined_db.sphuman.yaml
|
265
288
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
266
289
|
- lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
|
267
|
-
- lib/protk/data/predefined_db.trembl_annotation.yaml
|
268
290
|
- lib/protk/data/tandem_params.xml
|
269
291
|
- lib/protk/data/taxonomy_template.xml
|
270
292
|
- lib/protk/data/unimod.xml
|
@@ -272,8 +294,9 @@ files:
|
|
272
294
|
- lib/protk/data/uniprot_accessions_table.txt
|
273
295
|
- lib/protk/data/uniprot_input_accessions.loc
|
274
296
|
- lib/protk/data/yum_packages.yaml
|
275
|
-
- ext/protk/
|
276
|
-
- ext/protk/extconf.rb
|
297
|
+
- ext/protk/decoymaker/decoymaker.c
|
298
|
+
- ext/protk/decoymaker/extconf.rb
|
299
|
+
- ext/protk/simplealign/extconf.rb
|
277
300
|
homepage: http://rubygems.org/gems/protk
|
278
301
|
licenses: []
|
279
302
|
post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
|
@@ -300,4 +323,3 @@ signing_key:
|
|
300
323
|
specification_version: 3
|
301
324
|
summary: Proteomics Toolkit
|
302
325
|
test_files: []
|
303
|
-
has_rdoc:
|
data/bin/mascot2xml.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 12/4/2010
|
5
|
-
#
|
6
|
-
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
require 'protk/constants'
|
11
|
-
require 'protk/search_tool'
|
12
|
-
require 'mascot/dat'
|
13
|
-
require 'libxml'
|
14
|
-
|
15
|
-
include LibXML
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# Environment with global constants
|
21
|
-
#
|
22
|
-
genv=Constants.new
|
23
|
-
|
24
|
-
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
-
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
-
tool.option_parser.parse!
|
27
|
-
|
28
|
-
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
-
|
30
|
-
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
-
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
-
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
-
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
-
end
|
35
|
-
|
36
|
-
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
-
dat_enzyme=mascot_dat.enzyme
|
38
|
-
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
-
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
-
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
-
# p dat_enzyme.cleavages
|
42
|
-
# TODO: What does the spec say about multiple cut sites
|
43
|
-
# specificity.attributes['cut']=
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ARGV.each do |file_name|
|
49
|
-
name=file_name.chomp
|
50
|
-
|
51
|
-
dat = Mascot::DAT.open(name)
|
52
|
-
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
-
pepxml_doc = pepxml_template_parser.parse
|
54
|
-
|
55
|
-
outname = nil
|
56
|
-
if ( tool.explicit_output !=nil)
|
57
|
-
outname = tool.explicit_output
|
58
|
-
else
|
59
|
-
basename = Pathname.new(name).basename
|
60
|
-
outname = "#{basename}.pepXML"
|
61
|
-
end
|
62
|
-
|
63
|
-
$pepxml_ns_prefix="xmlns:"
|
64
|
-
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
-
if not pepxml_doc.root.namespaces.default
|
66
|
-
$pepxml_ns_prefix=""
|
67
|
-
$pepxml_ns=nil
|
68
|
-
end
|
69
|
-
|
70
|
-
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
-
update_enzyme(pepxml_doc,dat)
|
72
|
-
|
73
|
-
spectrum_queries={}
|
74
|
-
|
75
|
-
dat.peptides.each do |psm|
|
76
|
-
# psm.score
|
77
|
-
qnum= psm.query
|
78
|
-
if qnum
|
79
|
-
qid=qnum.to_s
|
80
|
-
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
-
spectrum_queries[qid] << psm
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
-
|
87
|
-
end
|
data/ext/protk/extconf.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
-
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
-
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
-
<sample_enzyme name="trypsin">
|
6
|
-
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
-
</sample_enzyme>
|
8
|
-
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
-
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
-
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
-
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
-
<parameter name="" value=""/>
|
13
|
-
</search_summary>
|
14
|
-
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
-
<search_result>
|
16
|
-
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
-
<modification_info>
|
18
|
-
<mod_aminoacid_mass position="" mass=""/>
|
19
|
-
</modification_info>
|
20
|
-
<search_score name="ionscore" value=""/>
|
21
|
-
<search_score name="identityscore" value=""/>
|
22
|
-
<search_score name="star" value="0"/>
|
23
|
-
<search_score name="homologyscore" value=""/>
|
24
|
-
<search_score name="expect" value=""/>
|
25
|
-
</search_hit>
|
26
|
-
</search_result>
|
27
|
-
</spectrum_query>
|
28
|
-
</msms_run_summary>
|
29
|
-
</msms_pipeline_analysis>
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This is a predefined setup file for manage_db
|
3
|
-
#
|
4
|
-
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
-
#
|
6
|
-
---
|
7
|
-
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
-
:archive_old: false
|
9
|
-
:is_annotation_db: true
|
10
|
-
:decoy_prefix: decoy_
|
11
|
-
:include_filters: []
|
12
|
-
|
13
|
-
:format: dat
|
14
|
-
:id_regexes: []
|
15
|
-
|
16
|
-
:make_blast_index: false
|
17
|
-
:sources:
|
18
|
-
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
-
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
-
:decoys: false
|