protk 1.2.4 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/add_retention_times.rb +89 -0
- data/bin/augustus_to_proteindb.rb +193 -0
- data/bin/blastxml_to_table.rb +72 -0
- data/bin/feature_finder.rb +7 -1
- data/bin/make_decoy.rb +10 -2
- data/bin/mascot_search.rb +14 -4
- data/bin/msgfplus_search.rb +14 -5
- data/bin/peptide_prophet.rb +14 -7
- data/bin/protxml_to_gff.rb +624 -0
- data/bin/protxml_to_table.rb +19 -2
- data/bin/sixframe.rb +3 -1
- data/bin/tandem_search.rb +51 -23
- data/bin/toppas_pipeline.rb +8 -3
- data/bin/uniprot_annotation.rb +6 -1
- data/ext/protk/{protk.c → decoymaker/decoymaker.c} +13 -15
- data/ext/protk/decoymaker/extconf.rb +3 -0
- data/ext/protk/simplealign/extconf.rb +3 -0
- data/lib/protk/data/FeatureFinderIsotopeWavelet.ini +6 -6
- data/lib/protk/gapped_aligner.rb +264 -0
- data/lib/protk/manage_db_rakefile.rake +2 -1
- data/lib/protk/mascot_util.rb +7 -2
- data/lib/protk/randomize.rb +2 -2
- data/lib/protk/search_tool.rb +1 -1
- data/lib/protk/setup_rakefile.rake +25 -2
- data/lib/protk/spreadsheet_extensions.rb +1 -0
- data/lib/protk/swissprot_database.rb +11 -1
- metadata +30 -8
- data/bin/mascot2xml.rb +0 -87
- data/ext/protk/extconf.rb +0 -3
- data/lib/protk/data/pepxml_mascot_template.xml +0 -29
- data/lib/protk/data/predefined_db.trembl_annotation.yaml +0 -20
@@ -449,7 +449,8 @@ file db_filename do
|
|
449
449
|
|
450
450
|
# Symlink to the source file
|
451
451
|
#
|
452
|
-
|
452
|
+
source_db_filename_relative = Pathname.new(source_db_filename).basename.to_s
|
453
|
+
File.symlink(source_db_filename_relative,db_filename)
|
453
454
|
end
|
454
455
|
end
|
455
456
|
|
data/lib/protk/mascot_util.rb
CHANGED
@@ -54,12 +54,17 @@ class MascotUtil
|
|
54
54
|
|
55
55
|
if ( spec!=nil && rt!=nil)
|
56
56
|
# Remove charge from the end of the title
|
57
|
-
spec_id= remove_charge_from_title_string(spec[1])
|
57
|
+
# spec_id= remove_charge_from_title_string(spec[1])
|
58
|
+
spec_id= spec[1]
|
59
|
+
|
60
|
+
# $stdout.write "#{spec_id} \r"
|
61
|
+
|
58
62
|
|
59
63
|
rt_table[spec_id]=rt[1]
|
60
64
|
end
|
61
|
-
|
65
|
+
|
62
66
|
end
|
67
|
+
# $stdout.write "\n"
|
63
68
|
|
64
69
|
return rt_table
|
65
70
|
|
data/lib/protk/randomize.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'protk/
|
1
|
+
require 'protk/decoymaker'
|
2
2
|
|
3
3
|
class Randomize
|
4
4
|
def self.make_decoys input_path, db_len, output_path, prefix
|
5
|
-
|
5
|
+
Decoymaker.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
|
6
6
|
end
|
7
7
|
end
|
data/lib/protk/search_tool.rb
CHANGED
@@ -154,7 +154,7 @@ class SearchTool < Tool
|
|
154
154
|
|
155
155
|
def jobid_from_filename(filename)
|
156
156
|
jobid="protk"
|
157
|
-
jobnum_match=filename.match(/(.{1,10})
|
157
|
+
jobnum_match=filename.match(/(.{1,10}).*?\./)
|
158
158
|
if (jobnum_match!=nil)
|
159
159
|
jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
|
160
160
|
end
|
@@ -133,10 +133,10 @@ task :perl_locallib => [perl_locallib_installed_file]
|
|
133
133
|
#
|
134
134
|
# TPP
|
135
135
|
#
|
136
|
-
tpp_version="4.6.
|
136
|
+
tpp_version="4.6.2"
|
137
137
|
tpp_packagefile="TPP-#{tpp_version}.tgz"
|
138
138
|
tpp_installed_file = "#{env.xinteract}"
|
139
|
-
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.
|
139
|
+
tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.2.tgz"
|
140
140
|
|
141
141
|
tpp_download_file = download_task tpp_url, tpp_packagefile
|
142
142
|
|
@@ -380,6 +380,29 @@ end
|
|
380
380
|
|
381
381
|
task :galaxyenv => protk_galaxy_envfile
|
382
382
|
|
383
|
+
|
384
|
+
#
|
385
|
+
# NCBI GI and Taxonomy Databases
|
386
|
+
#
|
387
|
+
|
388
|
+
# gi_taxid_package_file="gi_taxid_prot.zip"
|
389
|
+
# gi_taxid_url="ftp://ftp.ncbi.nih.gov/pub/taxonomy/#{gi_taxid_package_file}"
|
390
|
+
# gi_taxid_installed_file=
|
391
|
+
|
392
|
+
# file "gi_taxid_prot.zip" do
|
393
|
+
# %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.zip]
|
394
|
+
# %x[unzip gi_taxid_prot.zip]
|
395
|
+
# end
|
396
|
+
|
397
|
+
# file "taxdmp.zip" do
|
398
|
+
# %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip]
|
399
|
+
# %x[unzip taxdmp.zip]
|
400
|
+
# end
|
401
|
+
|
402
|
+
|
403
|
+
# multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
|
404
|
+
|
405
|
+
|
383
406
|
task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
|
384
407
|
|
385
408
|
# Special task when installing via toolshed
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bio'
|
3
3
|
require 'protk/constants'
|
4
|
+
require 'pathname'
|
4
5
|
|
5
6
|
# Provides fast indexed access to a swissprot database in a flat .dat file
|
6
7
|
#
|
@@ -13,7 +14,16 @@ class SwissprotDatabase
|
|
13
14
|
@genv=Constants.new
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
+
|
18
|
+
dbpath=Pathname.new(database)
|
19
|
+
|
20
|
+
if ( dbpath.exist? )
|
21
|
+
# require 'debugger';debugger
|
22
|
+
dbclass=Bio::SPTR
|
23
|
+
parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
|
24
|
+
Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, dbpath.realpath.to_s)
|
25
|
+
@db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
|
26
|
+
elsif ( database=="swissprot")
|
17
27
|
@db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
|
18
28
|
else
|
19
29
|
@db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -139,6 +139,22 @@ dependencies:
|
|
139
139
|
- - ! '>='
|
140
140
|
- !ruby/object:Gem::Version
|
141
141
|
version: 0.3.1
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: bio-blastxmlparser
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.1.1
|
150
|
+
type: :runtime
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.1.1
|
142
158
|
- !ruby/object:Gem::Dependency
|
143
159
|
name: rspec
|
144
160
|
requirement: !ruby/object:Gem::Requirement
|
@@ -172,6 +188,7 @@ executables:
|
|
172
188
|
- make_decoy.rb
|
173
189
|
- correct_omssa_retention_times.rb
|
174
190
|
- repair_run_summary.rb
|
191
|
+
- add_retention_times.rb
|
175
192
|
- peptide_prophet.rb
|
176
193
|
- interprophet.rb
|
177
194
|
- protein_prophet.rb
|
@@ -185,10 +202,13 @@ executables:
|
|
185
202
|
- toppas_pipeline.rb
|
186
203
|
- gffmerge.rb
|
187
204
|
- sixframe.rb
|
205
|
+
- augustus_to_proteindb.rb
|
206
|
+
- protxml_to_gff.rb
|
188
207
|
- uniprot_annotation.rb
|
189
208
|
- protxml_to_table.rb
|
209
|
+
- blastxml_to_table.rb
|
190
210
|
extensions:
|
191
|
-
- ext/protk/extconf.rb
|
211
|
+
- ext/protk/decoymaker/extconf.rb
|
192
212
|
extra_rdoc_files: []
|
193
213
|
files:
|
194
214
|
- lib/protk/bio_sptr_extensions.rb
|
@@ -201,6 +221,7 @@ files:
|
|
201
221
|
- lib/protk/fastadb.rb
|
202
222
|
- lib/protk/galaxy_stager.rb
|
203
223
|
- lib/protk/galaxy_util.rb
|
224
|
+
- lib/protk/gapped_aligner.rb
|
204
225
|
- lib/protk/manage_db_tool.rb
|
205
226
|
- lib/protk/mascot_util.rb
|
206
227
|
- lib/protk/omssa_util.rb
|
@@ -221,8 +242,11 @@ files:
|
|
221
242
|
- lib/protk.rb
|
222
243
|
- lib/protk/manage_db_rakefile.rake
|
223
244
|
- lib/protk/setup_rakefile.rake
|
245
|
+
- bin/add_retention_times.rb
|
224
246
|
- bin/annotate_ids.rb
|
225
247
|
- bin/asapratio.rb
|
248
|
+
- bin/augustus_to_proteindb.rb
|
249
|
+
- bin/blastxml_to_table.rb
|
226
250
|
- bin/correct_omssa_retention_times.rb
|
227
251
|
- bin/feature_finder.rb
|
228
252
|
- bin/file_convert.rb
|
@@ -232,7 +256,6 @@ files:
|
|
232
256
|
- bin/libra.rb
|
233
257
|
- bin/make_decoy.rb
|
234
258
|
- bin/manage_db.rb
|
235
|
-
- bin/mascot2xml.rb
|
236
259
|
- bin/mascot_search.rb
|
237
260
|
- bin/mascot_to_pepxml.rb
|
238
261
|
- bin/msgfplus_search.rb
|
@@ -241,6 +264,7 @@ files:
|
|
241
264
|
- bin/pepxml_to_table.rb
|
242
265
|
- bin/protein_prophet.rb
|
243
266
|
- bin/protk_setup.rb
|
267
|
+
- bin/protxml_to_gff.rb
|
244
268
|
- bin/protxml_to_table.rb
|
245
269
|
- bin/repair_run_summary.rb
|
246
270
|
- bin/sixframe.rb
|
@@ -259,12 +283,10 @@ files:
|
|
259
283
|
- lib/protk/data/FeatureFinderCentroided.ini
|
260
284
|
- lib/protk/data/FeatureFinderIsotopeWavelet.ini
|
261
285
|
- lib/protk/data/galaxyenv.sh
|
262
|
-
- lib/protk/data/pepxml_mascot_template.xml
|
263
286
|
- lib/protk/data/predefined_db.crap.yaml
|
264
287
|
- lib/protk/data/predefined_db.sphuman.yaml
|
265
288
|
- lib/protk/data/predefined_db.swissprot_annotation.yaml
|
266
289
|
- lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
|
267
|
-
- lib/protk/data/predefined_db.trembl_annotation.yaml
|
268
290
|
- lib/protk/data/tandem_params.xml
|
269
291
|
- lib/protk/data/taxonomy_template.xml
|
270
292
|
- lib/protk/data/unimod.xml
|
@@ -272,8 +294,9 @@ files:
|
|
272
294
|
- lib/protk/data/uniprot_accessions_table.txt
|
273
295
|
- lib/protk/data/uniprot_input_accessions.loc
|
274
296
|
- lib/protk/data/yum_packages.yaml
|
275
|
-
- ext/protk/
|
276
|
-
- ext/protk/extconf.rb
|
297
|
+
- ext/protk/decoymaker/decoymaker.c
|
298
|
+
- ext/protk/decoymaker/extconf.rb
|
299
|
+
- ext/protk/simplealign/extconf.rb
|
277
300
|
homepage: http://rubygems.org/gems/protk
|
278
301
|
licenses: []
|
279
302
|
post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
|
@@ -300,4 +323,3 @@ signing_key:
|
|
300
323
|
specification_version: 3
|
301
324
|
summary: Proteomics Toolkit
|
302
325
|
test_files: []
|
303
|
-
has_rdoc:
|
data/bin/mascot2xml.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This file is part of protk
|
4
|
-
# Created by Ira Cooke 12/4/2010
|
5
|
-
#
|
6
|
-
# Convert mascot dat files to pepxml without using TPP Mascot2XML
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
require 'protk/constants'
|
11
|
-
require 'protk/search_tool'
|
12
|
-
require 'mascot/dat'
|
13
|
-
require 'libxml'
|
14
|
-
|
15
|
-
include LibXML
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
# Environment with global constants
|
21
|
-
#
|
22
|
-
genv=Constants.new
|
23
|
-
|
24
|
-
tool=SearchTool.new([:database,:explicit_output,:over_write])
|
25
|
-
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
|
26
|
-
tool.option_parser.parse!
|
27
|
-
|
28
|
-
throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
|
29
|
-
|
30
|
-
def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
|
31
|
-
pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
|
32
|
-
pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
|
33
|
-
pipeline_analysis_node.attributes['summary_xml']=outname
|
34
|
-
end
|
35
|
-
|
36
|
-
def update_enzyme(pepxml_doc,mascot_dat)
|
37
|
-
dat_enzyme=mascot_dat.enzyme
|
38
|
-
enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
|
39
|
-
enzyme_node.attributes['name']=dat_enzyme.title
|
40
|
-
specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
|
41
|
-
# p dat_enzyme.cleavages
|
42
|
-
# TODO: What does the spec say about multiple cut sites
|
43
|
-
# specificity.attributes['cut']=
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
ARGV.each do |file_name|
|
49
|
-
name=file_name.chomp
|
50
|
-
|
51
|
-
dat = Mascot::DAT.open(name)
|
52
|
-
pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
|
53
|
-
pepxml_doc = pepxml_template_parser.parse
|
54
|
-
|
55
|
-
outname = nil
|
56
|
-
if ( tool.explicit_output !=nil)
|
57
|
-
outname = tool.explicit_output
|
58
|
-
else
|
59
|
-
basename = Pathname.new(name).basename
|
60
|
-
outname = "#{basename}.pepXML"
|
61
|
-
end
|
62
|
-
|
63
|
-
$pepxml_ns_prefix="xmlns:"
|
64
|
-
$pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
|
65
|
-
if not pepxml_doc.root.namespaces.default
|
66
|
-
$pepxml_ns_prefix=""
|
67
|
-
$pepxml_ns=nil
|
68
|
-
end
|
69
|
-
|
70
|
-
update_msms_pipeline_analysis(pepxml_doc,dat,outname)
|
71
|
-
update_enzyme(pepxml_doc,dat)
|
72
|
-
|
73
|
-
spectrum_queries={}
|
74
|
-
|
75
|
-
dat.peptides.each do |psm|
|
76
|
-
# psm.score
|
77
|
-
qnum= psm.query
|
78
|
-
if qnum
|
79
|
-
qid=qnum.to_s
|
80
|
-
spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
|
81
|
-
spectrum_queries[qid] << psm
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
|
86
|
-
|
87
|
-
end
|
data/ext/protk/extconf.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
|
3
|
-
<msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
|
4
|
-
<msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
|
5
|
-
<sample_enzyme name="trypsin">
|
6
|
-
<specificity cut="KR" no_cut="P" sense="C"/>
|
7
|
-
</sample_enzyme>
|
8
|
-
<search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
|
9
|
-
<search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
|
10
|
-
<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
|
11
|
-
<aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
|
12
|
-
<parameter name="" value=""/>
|
13
|
-
</search_summary>
|
14
|
-
<spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
|
15
|
-
<search_result>
|
16
|
-
<search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
|
17
|
-
<modification_info>
|
18
|
-
<mod_aminoacid_mass position="" mass=""/>
|
19
|
-
</modification_info>
|
20
|
-
<search_score name="ionscore" value=""/>
|
21
|
-
<search_score name="identityscore" value=""/>
|
22
|
-
<search_score name="star" value="0"/>
|
23
|
-
<search_score name="homologyscore" value=""/>
|
24
|
-
<search_score name="expect" value=""/>
|
25
|
-
</search_hit>
|
26
|
-
</search_result>
|
27
|
-
</spectrum_query>
|
28
|
-
</msms_run_summary>
|
29
|
-
</msms_pipeline_analysis>
|
@@ -1,20 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This is a predefined setup file for manage_db
|
3
|
-
#
|
4
|
-
# Swissprot_uniprot annotation database (full entries for each protein)
|
5
|
-
#
|
6
|
-
---
|
7
|
-
:description: Swissprot Trembl annotation database (full entries for each protein)
|
8
|
-
:archive_old: false
|
9
|
-
:is_annotation_db: true
|
10
|
-
:decoy_prefix: decoy_
|
11
|
-
:include_filters: []
|
12
|
-
|
13
|
-
:format: dat
|
14
|
-
:id_regexes: []
|
15
|
-
|
16
|
-
:make_blast_index: false
|
17
|
-
:sources:
|
18
|
-
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
|
19
|
-
- ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
|
20
|
-
:decoys: false
|