protk 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -449,7 +449,8 @@ file db_filename do
449
449
 
450
450
  # Symlink to the source file
451
451
  #
452
- File.symlink(source_db_filename,db_filename)
452
+ source_db_filename_relative = Pathname.new(source_db_filename).basename.to_s
453
+ File.symlink(source_db_filename_relative,db_filename)
453
454
  end
454
455
  end
455
456
 
@@ -54,12 +54,17 @@ class MascotUtil
54
54
 
55
55
  if ( spec!=nil && rt!=nil)
56
56
  # Remove charge from the end of the title
57
- spec_id= remove_charge_from_title_string(spec[1])
57
+ # spec_id= remove_charge_from_title_string(spec[1])
58
+ spec_id= spec[1]
59
+
60
+ # $stdout.write "#{spec_id} \r"
61
+
58
62
 
59
63
  rt_table[spec_id]=rt[1]
60
64
  end
61
-
65
+
62
66
  end
67
+ # $stdout.write "\n"
63
68
 
64
69
  return rt_table
65
70
 
@@ -1,7 +1,7 @@
1
- require 'protk/protk'
1
+ require 'protk/decoymaker'
2
2
 
3
3
  class Randomize
4
4
  def self.make_decoys input_path, db_len, output_path, prefix
5
- Protk.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
5
+ Decoymaker.make_decoys input_path.to_s, db_len.to_i, output_path.to_s, prefix.to_s
6
6
  end
7
7
  end
@@ -154,7 +154,7 @@ class SearchTool < Tool
154
154
 
155
155
  def jobid_from_filename(filename)
156
156
  jobid="protk"
157
- jobnum_match=filename.match(/(.{1,10})\.d/)
157
+ jobnum_match=filename.match(/(.{1,10}).*?\./)
158
158
  if (jobnum_match!=nil)
159
159
  jobid="#{self.jobid_prefix}#{jobnum_match[1]}"
160
160
  end
@@ -133,10 +133,10 @@ task :perl_locallib => [perl_locallib_installed_file]
133
133
  #
134
134
  # TPP
135
135
  #
136
- tpp_version="4.6.1"
136
+ tpp_version="4.6.2"
137
137
  tpp_packagefile="TPP-#{tpp_version}.tgz"
138
138
  tpp_installed_file = "#{env.xinteract}"
139
- tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.1.tgz"
139
+ tpp_url = "https://dl.dropbox.com/u/226794/TPP-4.6.2.tgz"
140
140
 
141
141
  tpp_download_file = download_task tpp_url, tpp_packagefile
142
142
 
@@ -380,6 +380,29 @@ end
380
380
 
381
381
  task :galaxyenv => protk_galaxy_envfile
382
382
 
383
+
384
+ #
385
+ # NCBI GI and Taxonomy Databases
386
+ #
387
+
388
+ # gi_taxid_package_file="gi_taxid_prot.zip"
389
+ # gi_taxid_url="ftp://ftp.ncbi.nih.gov/pub/taxonomy/#{gi_taxid_package_file}"
390
+ # gi_taxid_installed_file=
391
+
392
+ # file "gi_taxid_prot.zip" do
393
+ # %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.zip]
394
+ # %x[unzip gi_taxid_prot.zip]
395
+ # end
396
+
397
+ # file "taxdmp.zip" do
398
+ # %x[wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip]
399
+ # %x[unzip taxdmp.zip]
400
+ # end
401
+
402
+
403
+ # multitask :downloads => FileList["nr","env_nr","gi_taxid_prot.zip","taxdmp.zip"]
404
+
405
+
383
406
  task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz,:openms,:galaxyenv]
384
407
 
385
408
  # Special task when installing via toolshed
@@ -1,3 +1,4 @@
1
+ require 'spreadsheet'
1
2
  # Add a method to the Spreadsheet::Worksheet class to insert a column
2
3
  class Spreadsheet::Worksheet < Object
3
4
  def insert_column(col,index)
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'bio'
3
3
  require 'protk/constants'
4
+ require 'pathname'
4
5
 
5
6
  # Provides fast indexed access to a swissprot database in a flat .dat file
6
7
  #
@@ -13,7 +14,16 @@ class SwissprotDatabase
13
14
  @genv=Constants.new
14
15
  end
15
16
 
16
- if ( database=="swissprot")
17
+
18
+ dbpath=Pathname.new(database)
19
+
20
+ if ( dbpath.exist? )
21
+ # require 'debugger';debugger
22
+ dbclass=Bio::SPTR
23
+ parser = Bio::FlatFileIndex::Indexer::Parser.new(dbclass, nil, nil)
24
+ Bio::FlatFileIndex::Indexer::makeindexFlat(dbpath.realpath.dirname.to_s, parser, {}, dbpath.realpath.to_s)
25
+ @db_object=Bio::FlatFileIndex.new("#{dbpath.realpath.dirname.to_s}")
26
+ elsif ( database=="swissprot")
17
27
  @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
18
28
  else
19
29
  @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -139,6 +139,22 @@ dependencies:
139
139
  - - ! '>='
140
140
  - !ruby/object:Gem::Version
141
141
  version: 0.3.1
142
+ - !ruby/object:Gem::Dependency
143
+ name: bio-blastxmlparser
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: 1.1.1
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: 1.1.1
142
158
  - !ruby/object:Gem::Dependency
143
159
  name: rspec
144
160
  requirement: !ruby/object:Gem::Requirement
@@ -172,6 +188,7 @@ executables:
172
188
  - make_decoy.rb
173
189
  - correct_omssa_retention_times.rb
174
190
  - repair_run_summary.rb
191
+ - add_retention_times.rb
175
192
  - peptide_prophet.rb
176
193
  - interprophet.rb
177
194
  - protein_prophet.rb
@@ -185,10 +202,13 @@ executables:
185
202
  - toppas_pipeline.rb
186
203
  - gffmerge.rb
187
204
  - sixframe.rb
205
+ - augustus_to_proteindb.rb
206
+ - protxml_to_gff.rb
188
207
  - uniprot_annotation.rb
189
208
  - protxml_to_table.rb
209
+ - blastxml_to_table.rb
190
210
  extensions:
191
- - ext/protk/extconf.rb
211
+ - ext/protk/decoymaker/extconf.rb
192
212
  extra_rdoc_files: []
193
213
  files:
194
214
  - lib/protk/bio_sptr_extensions.rb
@@ -201,6 +221,7 @@ files:
201
221
  - lib/protk/fastadb.rb
202
222
  - lib/protk/galaxy_stager.rb
203
223
  - lib/protk/galaxy_util.rb
224
+ - lib/protk/gapped_aligner.rb
204
225
  - lib/protk/manage_db_tool.rb
205
226
  - lib/protk/mascot_util.rb
206
227
  - lib/protk/omssa_util.rb
@@ -221,8 +242,11 @@ files:
221
242
  - lib/protk.rb
222
243
  - lib/protk/manage_db_rakefile.rake
223
244
  - lib/protk/setup_rakefile.rake
245
+ - bin/add_retention_times.rb
224
246
  - bin/annotate_ids.rb
225
247
  - bin/asapratio.rb
248
+ - bin/augustus_to_proteindb.rb
249
+ - bin/blastxml_to_table.rb
226
250
  - bin/correct_omssa_retention_times.rb
227
251
  - bin/feature_finder.rb
228
252
  - bin/file_convert.rb
@@ -232,7 +256,6 @@ files:
232
256
  - bin/libra.rb
233
257
  - bin/make_decoy.rb
234
258
  - bin/manage_db.rb
235
- - bin/mascot2xml.rb
236
259
  - bin/mascot_search.rb
237
260
  - bin/mascot_to_pepxml.rb
238
261
  - bin/msgfplus_search.rb
@@ -241,6 +264,7 @@ files:
241
264
  - bin/pepxml_to_table.rb
242
265
  - bin/protein_prophet.rb
243
266
  - bin/protk_setup.rb
267
+ - bin/protxml_to_gff.rb
244
268
  - bin/protxml_to_table.rb
245
269
  - bin/repair_run_summary.rb
246
270
  - bin/sixframe.rb
@@ -259,12 +283,10 @@ files:
259
283
  - lib/protk/data/FeatureFinderCentroided.ini
260
284
  - lib/protk/data/FeatureFinderIsotopeWavelet.ini
261
285
  - lib/protk/data/galaxyenv.sh
262
- - lib/protk/data/pepxml_mascot_template.xml
263
286
  - lib/protk/data/predefined_db.crap.yaml
264
287
  - lib/protk/data/predefined_db.sphuman.yaml
265
288
  - lib/protk/data/predefined_db.swissprot_annotation.yaml
266
289
  - lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml
267
- - lib/protk/data/predefined_db.trembl_annotation.yaml
268
290
  - lib/protk/data/tandem_params.xml
269
291
  - lib/protk/data/taxonomy_template.xml
270
292
  - lib/protk/data/unimod.xml
@@ -272,8 +294,9 @@ files:
272
294
  - lib/protk/data/uniprot_accessions_table.txt
273
295
  - lib/protk/data/uniprot_input_accessions.loc
274
296
  - lib/protk/data/yum_packages.yaml
275
- - ext/protk/protk.c
276
- - ext/protk/extconf.rb
297
+ - ext/protk/decoymaker/decoymaker.c
298
+ - ext/protk/decoymaker/extconf.rb
299
+ - ext/protk/simplealign/extconf.rb
277
300
  homepage: http://rubygems.org/gems/protk
278
301
  licenses: []
279
302
  post_install_message: Now run protk_setup.rb to install third party tools and manage_db.rb
@@ -300,4 +323,3 @@ signing_key:
300
323
  specification_version: 3
301
324
  summary: Proteomics Toolkit
302
325
  test_files: []
303
- has_rdoc:
data/bin/mascot2xml.rb DELETED
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # This file is part of protk
4
- # Created by Ira Cooke 12/4/2010
5
- #
6
- # Convert mascot dat files to pepxml without using TPP Mascot2XML
7
- #
8
-
9
-
10
- require 'protk/constants'
11
- require 'protk/search_tool'
12
- require 'mascot/dat'
13
- require 'libxml'
14
-
15
- include LibXML
16
-
17
-
18
-
19
-
20
- # Environment with global constants
21
- #
22
- genv=Constants.new
23
-
24
- tool=SearchTool.new([:database,:explicit_output,:over_write])
25
- tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot2xml.rb [options] file1.dat file2.dat ... "
26
- tool.option_parser.parse!
27
-
28
- throw "Only one file at a time is allowed when using explicit output" if (tool.explicit_output && ARGV.length > 1)
29
-
30
- def update_msms_pipeline_analysis(pepxml_doc,mascot_dat,outname)
31
- pipeline_analysis_node=pepxml_doc.find("//#{$pepxml_ns_prefix}msms_pipeline_analysis", $pepxml_ns)[0]
32
- pipeline_analysis_node.attributes['date']=Time.at(mascot_dat.header.date.to_i).to_s
33
- pipeline_analysis_node.attributes['summary_xml']=outname
34
- end
35
-
36
- def update_enzyme(pepxml_doc,mascot_dat)
37
- dat_enzyme=mascot_dat.enzyme
38
- enzyme_node=pepxml_doc.find("//#{$pepxml_ns_prefix}sample_enzyme", $pepxml_ns)[0]
39
- enzyme_node.attributes['name']=dat_enzyme.title
40
- specificity=enzyme_node.find("./#{$pepxml_ns_prefix}specificity",$pepxml_ns)[0]
41
- # p dat_enzyme.cleavages
42
- # TODO: What does the spec say about multiple cut sites
43
- # specificity.attributes['cut']=
44
- end
45
-
46
-
47
-
48
- ARGV.each do |file_name|
49
- name=file_name.chomp
50
-
51
- dat = Mascot::DAT.open(name)
52
- pepxml_template_parser=XML::Parser.file("#{genv.data_lib_dir}/pepxml_mascot_template.xml")
53
- pepxml_doc = pepxml_template_parser.parse
54
-
55
- outname = nil
56
- if ( tool.explicit_output !=nil)
57
- outname = tool.explicit_output
58
- else
59
- basename = Pathname.new(name).basename
60
- outname = "#{basename}.pepXML"
61
- end
62
-
63
- $pepxml_ns_prefix="xmlns:"
64
- $pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
65
- if not pepxml_doc.root.namespaces.default
66
- $pepxml_ns_prefix=""
67
- $pepxml_ns=nil
68
- end
69
-
70
- update_msms_pipeline_analysis(pepxml_doc,dat,outname)
71
- update_enzyme(pepxml_doc,dat)
72
-
73
- spectrum_queries={}
74
-
75
- dat.peptides.each do |psm|
76
- # psm.score
77
- qnum= psm.query
78
- if qnum
79
- qid=qnum.to_s
80
- spectrum_queries[qid]=[] if ( spectrum_queries[qid]==nil)
81
- spectrum_queries[qid] << psm
82
- end
83
- end
84
-
85
- spectrum_queries.each_pair { |name, val| p "#{name} #{val[0].pep}" }
86
-
87
- end
data/ext/protk/extconf.rb DELETED
@@ -1,3 +0,0 @@
1
- require 'mkmf'
2
-
3
- create_makefile('protk/protk')
@@ -1,29 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <?xml-stylesheet type="text/xsl" href="/home/iracooke/.protk/tools/tpp/schema/pepXML_std.xsl"?>
3
- <msms_pipeline_analysis date="2013-01-07T18:29:15" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /home/iracooke/.protk/tools/tpp/schema/pepXML_v117.xsd" summary_xml="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml.pep.xml">
4
- <msms_run_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" raw_data_type="raw" raw_data="(null)">
5
- <sample_enzyme name="trypsin">
6
- <specificity cut="KR" no_cut="P" sense="C"/>
7
- </sample_enzyme>
8
- <search_summary base_name="/home/iracooke/FionaHela/mr176/mr176-HeLa4hrx2_GC1_01_8136.d_mascot2xml" search_engine="MASCOT" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
9
- <search_database local_path="/home/iracooke/.protk/Databases/sphuman/current.fasta" type="AA"/>
10
- <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="2"/>
11
- <aminoacid_modification aminoacid="C" massdiff="57.0215" mass="160.0306" variable="N"/>
12
- <parameter name="" value=""/>
13
- </search_summary>
14
- <spectrum_query spectrum="" start_scan="" end_scan="" precursor_neutral_mass="" assumed_charge="" index="">
15
- <search_result>
16
- <search_hit hit_rank="" peptide="" peptide_prev_aa="" peptide_next_aa="" protein="" num_tot_proteins="" num_matched_ions="" tot_num_ions="" calc_neutral_pep_mass="" massdiff="" num_tol_term="" num_missed_cleavages="" is_rejected="">
17
- <modification_info>
18
- <mod_aminoacid_mass position="" mass=""/>
19
- </modification_info>
20
- <search_score name="ionscore" value=""/>
21
- <search_score name="identityscore" value=""/>
22
- <search_score name="star" value="0"/>
23
- <search_score name="homologyscore" value=""/>
24
- <search_score name="expect" value=""/>
25
- </search_hit>
26
- </search_result>
27
- </spectrum_query>
28
- </msms_run_summary>
29
- </msms_pipeline_analysis>
@@ -1,20 +0,0 @@
1
- #
2
- # This is a predefined setup file for manage_db
3
- #
4
- # Swissprot_uniprot annotation database (full entries for each protein)
5
- #
6
- ---
7
- :description: Swissprot Trembl annotation database (full entries for each protein)
8
- :archive_old: false
9
- :is_annotation_db: true
10
- :decoy_prefix: decoy_
11
- :include_filters: []
12
-
13
- :format: dat
14
- :id_regexes: []
15
-
16
- :make_blast_index: false
17
- :sources:
18
- - - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz
19
- - ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt
20
- :decoys: false