protk 1.2.4 → 1.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -449,7 +449,8 @@ file db_filename do
449
449
 
450
450
  # Symlink to the source file
451
451
  #
452
- File.symlink(source_db_filename,db_filename)
452
+ source_db_filename_relative = Pathname.new(source_db_filename).basename.to_s
453
+ File.symlink(source_db_filename_relative,db_filename)
453
454
  end
454
455
  end
455
456
 
@@ -54,12 +54,17 @@ class MascotUtil
54
54
 
55
55
  if ( spec!=nil && rt!=nil)
56
56
  # Remove charge from the end of the title
57
- spec_id= remove_charge_from_title_string(spec[1])
57
+ # spec_id= remove_charge_from_title_string(spec[1])
58
+ spec_id= spec[1]
59
+
60
+ # $stdout.write "#{spec_id} \r"
61
+
58
62
 
59
63
  rt_table[spec_id]=rt[1]
60
64
  end
61
-
65
+
62
66
  end
67
+ # $stdout.write "\n"
63
68
 
64
69
  return rt_table
65
70
 
@@ -1,7 +1,7 @@
1
- require 'protk/protk'
1
+ require 'protk/decoymaker'
2
2
 
3
3
  class Randomize
4
4
  def self.make_decoys input_path, db_len, output_path, prefix
5
- Protk.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
5
+ Decoymaker.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
6
6
  end
7
7
  end
@@ -154,7 +154,7 @@ class SearchTool < Tool
154
154
 
155
155
  def jobid_from_filename(filename)
156
156
  jobid="protk"
157
- jobnum_match=filename.match(/(.{1,10})\.d/)
157
+ jobnum_match=filename.match(/(.{1,10}).*?\./)
158
158
  if (jobnum_match!=nil)
159
159
  jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
160
160
  end
@@ -133,10 +133,10 @@ task :perl_locallib => [perl_locallib_installed_file]
133
133
  #
134
134
  # TPP
135
135
  #
136
- tpp_version="4.6.1"
136
+ tpp_version="4.6.2"
137
137
  tpp_packagefile="TPP-#{tpp_version}.tgz"
138
138
  tpp_installed_file = "#{env.xinteract}"
139
- tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
139
+ tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.2.tgz"
140
140
 
141
141
  tpp_download_file = download_task tpp_url, tpp_packagefile
142
142
 
@@ -380,6 +380,29 @@ end
380
380
 
381
381
  task :galaxyenv => protk_galaxy_envfile
382
382
 
383
+
384
+ #
385
+ # NCBI GI and Taxonomy Databases
386
+ #
387
+
388
+ # gi_taxid_package_file="gi_taxid_prot.zip"
389
+ # gi_taxid_url="ftp://ftp.ncbi.nih.gov/pub/taxonomy/#{gi_taxid_package_file}"
390
+ # gi_taxid_installed_file=
391
+
392
+ # file "gi_taxid_prot.zip" do
393
+ # %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.zip]
394
+ # %x[unzip gi_taxid_prot.zip]
395
+ # end
396
+
397
+ # file "taxdmp.zip" do
398
+ # %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip]
399
+ # %x[unzip taxdmp.zip]
400
+ # end
401
+
402
+
403
+ # multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
404
+
405
+
383
406
  task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
384
407
 
385
408
  # Special task when installing via toolshed
@@ -1,3 +1,4 @@
1
+ require 'spreadsheet'
1
2
  # Add a method to the Spreadsheet::Worksheet class to insert a column
2
3
  class Spreadsheet::Worksheet < Object
3
4
  def insert_column(col,index)
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'bio'
3
3
  require 'protk/constants'
4
+ require 'pathname'
4
5
 
5
6
  # Provides fast indexed access to a swissprot database in a flat .dat file
6
7
  #
@@ -13,7 +14,16 @@ class SwissprotDatabase
13
14
  @genv=Constants.new
14
15
  end
15
16
 
16
- if ( database=="swissprot")
17
+
18
+ dbpath=Pathname.new(database)
19
+
20
+ if ( dbpath.exist? )
21
+ # require 'debugger';debugger
22
+ dbclass=Bio::SPTR
23
+ parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
24
+ Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, dbpath.realpath.to_s)
25
+ @db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
26
+ elsif ( database=="swissprot")
17
27
  @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
18
28
  else
19
29
  @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -139,6 +139,22 @@ dependencies:
139
139
  - - ! '>='
140
140
  - !ruby/object:Gem::Version
141
141
  version: 0.3.1
142
+ - !ruby/object:Gem::Dependency
143
+ name: bio-blastxmlparser
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: 1.1.1
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: 1.1.1
142
158
  - !ruby/object:Gem::Dependency
143
159
  name: rspec
144
160
  requirement: !ruby/object:Gem::Requirement
@@ -172,6 +188,7 @@ executables:
172
188
  - make_decoy.rb
173
189
  - correct_omssa_retention_times.rb
174
190
  - repair_run_summary.rb
191
+ - add_retention_times.rb
175
192
  - peptide_prophet.rb
176
193
  - interprophet.rb
177
194
  - protein_prophet.rb
@@ -185,10 +202,13 @@ executables:
185
202
  - toppas_pipeline.rb
186
203
  - gffmerge.rb
187
204
  - sixframe.rb
205
+ - augustus_to_proteindb.rb
206
+ - protxml_to_gff.rb
188
207
  - uniprot_annotation.rb
189
208
  - protxml_to_table.rb
209
+ - blastxml_to_table.rb
190
210
  extensions:
191
- - ext/protk/extconf.rb
211
+ - ext/protk/decoymaker/extconf.rb
192
212
  extra_rdoc_files: []
193
213
  files:
194
214
  - lib/protk/bio_sptr_extensions.rb
@@ -201,6 +221,7 @@ files:
201
221
  - lib/protk/fastadb.rb
202
222
  - lib/protk/galaxy_stager.rb
203
223
  - lib/protk/galaxy_util.rb
224
+ - lib/protk/gapped_aligner.rb
204
225
  - lib/protk/manage_db_tool.rb
205
226
  - lib/protk/mascot_util.rb
206
227
  - lib/protk/omssa_util.rb
@@ -221,8 +242,11 @@ files:
221
242
  - lib/protk.rb
222
243
  - lib/protk/manage_db_rakefile.rake
223
244
  - lib/protk/setup_rakefile.rake
245
+ - bin/add_retention_times.rb
224
246
  - bin/annotate_ids.rb
225
247
  - bin/asapratio.rb
248
+ - bin/augustus_to_proteindb.rb
249
+ - bin/blastxml_to_table.rb
226
250
  - bin/correct_omssa_retention_times.rb
227
251
  - bin/feature_finder.rb
228
252
  - bin/file_convert.rb
@@ -232,7 +256,6 @@ files:
232
256
  - bin/libra.rb
233
257
  - bin/make_decoy.rb
234
258
  - bin/manage_db.rb
235
- - bin/mascot2xml.rb
236
259
  - bin/mascot_search.rb
237
260
  - bin/mascot_to_pepxml.rb
238
261
  - bin/msgfplus_search.rb
@@ -241,6 +264,7 @@ files:
241
264
  - bin/pepxml_to_table.rb
242
265
  - bin/protein_prophet.rb
243
266
  - bin/protk_setup.rb
267
+ - bin/protxml_to_gff.rb
244
268
  - bin/protxml_to_table.rb
245
269
  - bin/repair_run_summary.rb
246
270
  - bin/sixframe.rb
@@ -259,12 +283,10 @@ files:
259
283
  - lib/protk/data/FeatureFinderCentroided.ini
260
284
  - lib/protk/data/FeatureFinderIsotopeWavelet.ini
261
285
  - lib/protk/data/galaxyenv.sh
262
- - lib/protk/data/pepxml_mascot_template.xml
263
286
  - lib/protk/data/predefined_db.crap.yaml
264
287
  - lib/protk/data/predefined_db.sphuman.yaml
265
288
  - lib/protk/data/predefined_db.swissprot_annotation.yaml
266
289
  - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
267
- - lib/protk/data/predefined_db.trembl_annotation.yaml
268
290
  - lib/protk/data/tandem_params.xml
269
291
  - lib/protk/data/taxonomy_template.xml
270
292
  - lib/protk/data/unimod.xml
@@ -272,8 +294,9 @@ files:
272
294
  - lib/protk/data/uniprot_accessions_table.txt
273
295
  - lib/protk/data/uniprot_input_accessions.loc
274
296
  - lib/protk/data/yum_packages.yaml
275
- - ext/protk/protk.c
276
- - ext/protk/extconf.rb
297
+ - ext/protk/decoymaker/decoymaker.c
298
+ - ext/protk/decoymaker/extconf.rb
299
+ - ext/protk/simplealign/extconf.rb
277
300
  homepage: http://rubygems.org/gems/protk
278
301
  licenses: []
279
302
  post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
@@ -300,4 +323,3 @@ signing_key:
300
323
  specification_version: 3
301
324
  summary: Proteomics Toolkit
302
325
  test_files: []
303
- has_rdoc:
data/bin/mascot2xml.rb DELETED
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # This file is part of protk
4
- # Created by Ira Cooke 12/4/2010
5
- #
6
- # Convert mascot dat files to pepxml without using TPP Mascot2XML
7
- #
8
-
9
-
10
- require 'protk/constants'
11
- require 'protk/search_tool'
12
- require 'mascot/dat'
13
- require 'libxml'
14
-
15
- include LibXML
16
-
17
-
18
-
19
-
20
- # Environment with global constants
21
- #
22
- genv=Constants.new
23
-
24
- tool=SearchTool.new([:database,:explicit_output,:over_write])
25
- tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
26
- tool.option_parser.parse!
27
-
28
- throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
29
-
30
- def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
31
- pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
32
- pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
33
- pipeline_analysis_node.attributes['summary_xml']=outname
34
- end
35
-
36
- def update_enzyme(pepxml_doc,mascot_dat)
37
- dat_enzyme=mascot_dat.enzyme
38
- enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
39
- enzyme_node.attributes['name']=dat_enzyme.title
40
- specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
41
- # p dat_enzyme.cleavages
42
- # TODO: What does the spec say about multiple cut sites
43
- # specificity.attributes['cut']=
44
- end
45
-
46
-
47
-
48
- ARGV.each do |file_name|
49
- name=file_name.chomp
50
-
51
- dat = Mascot::DAT.open(name)
52
- pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
53
- pepxml_doc = pepxml_template_parser.parse
54
-
55
- outname = nil
56
- if ( tool.explicit_output !=nil)
57
- outname = tool.explicit_output
58
- else
59
- basename = Pathname.new(name).basename
60
- outname = "#{basename}.pepXML"
61
- end
62
-
63
- $pepxml_ns_prefix="xmlns:"
64
- $pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
65
- if not pepxml_doc.root.namespaces.default
66
- $pepxml_ns_prefix=""
67
- $pepxml_ns=nil
68
- end
69
-
70
- update_msms_pipeline_analysis(pepxml_doc,dat,outname)
71
- update_enzyme(pepxml_doc,dat)
72
-
73
- spectrum_queries={}
74
-
75
- dat.peptides.each do |psm|
76
- # psm.score
77
- qnum= psm.query
78
- if qnum
79
- qid=qnum.to_s
80
- spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
81
- spectrum_queries[qid] << psm
82
- end
83
- end
84
-
85
- spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
86
-
87
- end
data/ext/protk/extconf.rb DELETED
@@ -1,3 +0,0 @@
1
- require 'mkmf'
2
-
3
- create_makefile('protk/protk')
@@ -1,29 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
3
- <msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
4
- <msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
5
- <sample_enzyme name="trypsin">
6
- <specificity cut="KR" no_cut="P" sense="C"/>
7
- </sample_enzyme>
8
- <search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
9
- <search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
10
- <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
11
- <aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
12
- <parameter name="" value=""/>
13
- </search_summary>
14
- <spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
15
- <search_result>
16
- <search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
17
- <modification_info>
18
- <mod_aminoacid_mass position="" mass=""/>
19
- </modification_info>
20
- <search_score name="ionscore" value=""/>
21
- <search_score name="identityscore" value=""/>
22
- <search_score name="star" value="0"/>
23
- <search_score name="homologyscore" value=""/>
24
- <search_score name="expect" value=""/>
25
- </search_hit>
26
- </search_result>
27
- </spectrum_query>
28
- </msms_run_summary>
29
- </msms_pipeline_analysis>
@@ -1,20 +0,0 @@
1
- #
2
- # This is a predefined setup file for manage_db
3
- #
4
- # Swissprot_uniprot annotation database (full entries for each protein)
5
- #
6
- ---
7
- :description: Swissprot Trembl annotation database (full entries for each protein)
8
- :archive_old: false
9
- :is_annotation_db: true
10
- :decoy_prefix: decoy_
11
- :include_filters: []
12
-
13
- :format: dat
14
- :id_regexes: []
15
-
16
- :make_blast_index: false
17
- :sources:
18
- - - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
19
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
20
- :decoys: false