protk 1.1.0.pre → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,46 @@
1
+ require 'protk/pepxml'
2
+ require 'protk/galaxy_stager'
3
+ require 'protk/galaxy_util'
4
+ require 'protk/convert_util'
5
+ require 'fileutils'
6
+
1
7
  class GalaxyUtil
2
8
 
3
- def self.for_galaxy
9
+ def self.for_galaxy?
4
10
  for_galaxy = ARGV[0] == "--galaxy"
5
11
  ARGV.shift if for_galaxy
6
12
  return for_galaxy
7
13
  end
8
14
 
9
- end
15
+
16
+ def self.stage_protxml(input_protxml_path)
17
+ # This method takes in the path to a protxml created in Galaxy,
18
+ # finds the dependent pepxml and peak lists (mzml files), creates
19
+ # symbolic links to the peak lists with the correct extension and
20
+ # and indexes them if needed (both seem required for TPP quant
21
+ # tools) and then produces new protxml and pepxml files with paths
22
+ # updated to these new peak list files.
23
+
24
+ protxml_path="interact.prot.xml"
25
+ FileUtils.copy(input_protxml_path, "interact.prot.xml")
26
+
27
+ protxml = ProtXML.new(protxml_path)
28
+ pepxml_path = protxml.find_pep_xml()
29
+
30
+ protxml_stager = GalaxyStager.new(protxml_path, :extension => ".prot.xml", :force_copy => true)
31
+ pepxml_stager = GalaxyStager.new(pepxml_path, :name => "interact", :extension => ".xml", :force_copy => true)
32
+ pepxml_path = pepxml_stager.staged_path
33
+ pepxml_stager.replace_references(protxml_path)
34
+ runs = PepXML.new(pepxml_stager.staged_path).find_runs()
35
+
36
+ run_stagers = runs.map do |base_name, run|
37
+ run_stager = GalaxyStager.new(base_name, :extension => ".#{run[:type]}")
38
+ ConvertUtil.ensure_mzml_indexed(run_stager.staged_path)
39
+ run_stager.replace_references(pepxml_path, :base_only => true)
40
+ run_stager
41
+ end
42
+
43
+ protxml_path
44
+ end
45
+
46
+ end
@@ -35,6 +35,7 @@ def check_ftp_release_notes(release_notes)
35
35
 
36
36
  rn_path="#{$genv.database_downloads}/#{rn_uri.host}/#{rn_uri.path}"
37
37
 
38
+ update_needed=false
38
39
 
39
40
  host=rn_uri.host
40
41
  Net::FTP.open(host) do |ftp|
@@ -104,10 +105,12 @@ def check_ftp_release_notes(release_notes)
104
105
  when ( existing_digest != rn_digest )
105
106
  FileUtils.mkpath(Pathname.new(rn_path).dirname.to_s)
106
107
  File.open(rn_path, "w") {|file| file.puts(rn_data) }
108
+ update_needed = true
107
109
  else
108
110
  p "Release notes are up to date"
109
- end
111
+ end
110
112
  end
113
+ update_needed
111
114
  end
112
115
 
113
116
  def download_ftp_file(ftp,file_name,dest_dir)
@@ -216,16 +219,23 @@ def ftp_source(ftpsource)
216
219
 
217
220
  release_notes_url=ftpsource[1]
218
221
  release_notes_exist=true
219
- release_notes_exist=false if release_notes_url =~ /^\s*none\s*$/
222
+ release_notes_exist=false if (release_notes_url =~ /^\s*none\s*$/) || (release_notes_url==nil)
223
+
224
+ release_notes_show_update_needed = true
225
+
220
226
  if release_notes_exist
221
- data_rn=URI.parse(release_notes_url) unless
222
- release_notes_file_path="#{$genv.database_downloads}/#{data_rn.host}/#{data_rn.path}"
223
227
 
224
- task :check_rn do
225
- check_ftp_release_notes(release_notes_url)
226
- end
228
+ data_rn=URI.parse(release_notes_url)
229
+
230
+ if ( data_rn != nil )
231
+ release_notes_file_path="#{$genv.database_downloads}/#{data_rn.host}/#{data_rn.path}"
227
232
 
228
- file release_notes_file_path => :check_rn
233
+ task :check_rn do
234
+ release_notes_show_update_needed = check_ftp_release_notes(release_notes_url)
235
+ end
236
+
237
+ file release_notes_file_path => :check_rn
238
+ end
229
239
  else
230
240
  task :check_date do
231
241
 
@@ -233,38 +243,41 @@ def ftp_source(ftpsource)
233
243
  end
234
244
 
235
245
 
236
-
237
246
  if ( data_file_path=~/\*/) # A wildcard
238
247
  unpacked_data_path=data_file_path.gsub(/\*/,"_all_").gsub(/\.gz$/,'')
239
248
  end
240
249
 
241
- file unpacked_data_path do #Unpacking. Includes unzipping and/or concatenating
242
- download_ftp_source(ftpsource[0])
250
+ task unpacked_data_path do #Unpacking. Includes unzipping and/or concatenating
251
+ if ( release_notes_show_update_needed )
252
+ download_ftp_source(ftpsource[0])
253
+ file_pattern = Pathname.new(data_file_path).basename.to_s
243
254
 
244
- case
245
- when data_file_path=~/\*/ # Multiple files to unzip/concatenate and we don't know what they are yet
246
- file_pattern = Pathname.new(data_file_path).basename.to_s
247
- if file_pattern =~ /.gz$/
248
- unzipcmd="gunzip -vdf #{file_pattern}"
249
- p "Unzipping #{unzipcmd} ... this could take a while"
250
- sh %{ cd #{Pathname.new(data_file_path).dirname}; #{unzipcmd} }
251
- end
255
+ case
256
+
257
+ when data_file_path=~/\*/ # Multiple files to unzip/concatenate and we don't know what they are yet
252
258
 
253
- file_pattern.gsub!(/\.gz$/,'')
254
- catcmd="cat #{file_pattern} > #{unpacked_data_path}"
259
+ if file_pattern =~ /.gz$/
260
+ unzipcmd="gunzip -vdf #{file_pattern}"
261
+ p "Unzipping #{unzipcmd} ... this could take a while"
262
+ sh %{ cd #{Pathname.new(data_file_path).dirname}; #{unzipcmd} }
263
+ end
264
+
265
+ file_pattern.gsub!(/\.gz$/,'')
266
+ catcmd="cat #{file_pattern} > #{unpacked_data_path}"
255
267
 
256
- p "Concatenating files #{catcmd} ... this could take a while"
257
- sh %{ cd #{Pathname.new(data_file_path).dirname}; #{catcmd} }
268
+ p "Concatenating files #{catcmd} ... this could take a while"
269
+ sh %{ cd #{Pathname.new(data_file_path).dirname}; #{catcmd} }
258
270
 
259
- else # Simple case. A single file
260
- if file_pattern =~ /.gz$/
261
- p "Unzipping #{Pathname.new(data_file_path).basename} ... "
262
- sh %{ cd #{Pathname.new(data_file_path).dirname}; gunzip -f #{Pathname.new(data_file_path).basename} }
271
+ else # Simple case. A single file
272
+ if file_pattern =~ /.gz$/
273
+ p "Unzipping #{Pathname.new(data_file_path).basename} ... "
274
+ sh %{ cd #{Pathname.new(data_file_path).dirname}; gunzip -f #{Pathname.new(data_file_path).basename} }
275
+ end
263
276
  end
264
277
  end
265
278
  end
266
279
 
267
- task release_notes_file_path => release_notes_file_path if release_notes_exist
280
+ file unpacked_data_path => release_notes_file_path if release_notes_exist
268
281
 
269
282
  unpacked_data_path
270
283
  end
@@ -380,8 +393,8 @@ file decoy_db_filename => raw_db_filename do
380
393
 
381
394
  p "Generating decoy sequences ... this could take a while"
382
395
  # Make decoys, concatenate and delete decoy only file
383
- Randomize.make_decoys #{raw_db_filename} #{db_length} #{decoys_filename} #{decoy_prefix}"
384
- cmd << "cat #{raw_db_filename} #{decoys_filename} >> #{decoy_db_filename}; rm #{decoys_filename}"
396
+ Randomize.make_decoys raw_db_filename, db_length, decoys_filename, decoy_prefix
397
+ cmd = "cat #{raw_db_filename} #{decoys_filename} >> #{decoy_db_filename}; rm #{decoys_filename}"
385
398
  sh %{ #{cmd} }
386
399
  end
387
400
 
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'rexml/document'
3
+ require 'rexml/xpath'
4
+
5
+ class PepXML
6
+ def initialize(file_name)
7
+ @doc=REXML::Document.new(File.new(file_name))
8
+ end
9
+
10
+ def find_runs()
11
+ runs = {}
12
+ REXML::XPath.each(@doc,"//msms_run_summary") do |summary|
13
+ base_name = summary.attributes["base_name"]
14
+ if not runs.has_key?(base_name)
15
+ runs[base_name] = {:base_name => summary.attributes["base_name"],
16
+ :type => summary.attributes["raw_data"]}
17
+ end
18
+ end
19
+ runs
20
+ end
21
+
22
+ end
data/lib/protk/protxml.rb CHANGED
@@ -2,7 +2,6 @@ require 'rubygems'
2
2
  require 'rexml/document'
3
3
  require 'rexml/xpath'
4
4
 
5
-
6
5
  class ProtXML
7
6
 
8
7
  attr_accessor :groups
@@ -60,6 +59,11 @@ class ProtXML
60
59
  @doc=REXML::Document.new(File.new(file_name))
61
60
  @groups=self.init_groups
62
61
  end
62
+
63
+ def find_pep_xml()
64
+ header = REXML::XPath.first(@doc, "//protein_summary_header")
65
+ source_file = header.attributes["source_files"]
66
+ end
63
67
 
64
68
  def peptide_sequences_from_protein(prot)
65
69
  peptides=prot[:peptides]
@@ -11,10 +11,18 @@ directory @build_dir
11
11
  directory @download_dir
12
12
 
13
13
  def package_manager_name
14
- if RbConfig::CONFIG['host_os'] =~ /darwin/
15
- return 'brew'
14
+ package_managers = ["brew","yum","apt-get"]
15
+
16
+ package_managers.each do |pmname|
17
+ if supports_package_manager pmname
18
+ return pmname
19
+ end
16
20
  end
17
- 'apt-get'
21
+ end
22
+
23
+ def supports_package_manager name
24
+ res = %x[which #{name}]
25
+ (res == "")
18
26
  end
19
27
 
20
28
  def clean_build_dir
@@ -105,6 +113,9 @@ file perl_locallib_installed_file => [@build_dir,"#{@download_dir}/#{perl_local
105
113
  if !Pathname.new("~/.bash_profile").exist? || File.read("~/.bash_profile") =~ /Mlocal::lib/
106
114
  sh "echo 'eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})' >>~/.bash_profile"
107
115
  end
116
+
117
+ sh "eval $(perl -I#{perl_dir}/lib/perl5 -Mlocal::lib=#{perl_dir})"
118
+ sh "curl -L http://cpanmin.us | perl - --self-upgrade"
108
119
  end
109
120
 
110
121
  task :perl_locallib => [perl_locallib_installed_file]
@@ -129,7 +140,7 @@ download_task tpp_url, tpp_packagefile
129
140
  file tpp_installed_file => [:perl_locallib,@build_dir,"#{@download_dir}/#{tpp_packagefile}"] do
130
141
  sh %{cp #{@download_dir}/#{tpp_packagefile} #{@build_dir}}
131
142
  sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::Parser}
132
- sh %{cpanm --local-lib=#{env.protk_dir}/perl5 XML::CGI --force}
143
+ sh %{cpanm --local-lib=#{env.protk_dir}/perl5 CGI --force}
133
144
 
134
145
  sh %{cd #{@build_dir};tar -xvzf TPP-#{tpp_version}.tgz}
135
146
 
@@ -226,9 +237,9 @@ task :blast => blast_installed_file
226
237
  #
227
238
  # MSGFPlus
228
239
  #
229
- msgfplus_version="20120823"
230
- msgfplus_packagefile="MSGFPlus.20120823.zip"
231
- msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.20120823.zip"
240
+ msgfplus_version="20121116"
241
+ msgfplus_packagefile="MSGFPlus.#{msgfplus_version}.zip"
242
+ msgfplus_url="http://proteomics.ucsd.edu/Downloads/MSGFPlus.#{msgfplus_version}.zip"
232
243
  msgfplus_installed_file="#{env.msgfplusjar}"
233
244
 
234
245
  download_task msgfplus_url, msgfplus_packagefile
@@ -242,4 +253,40 @@ end
242
253
 
243
254
  task :msgfplus => msgfplus_installed_file
244
255
 
245
- task :all => [:tpp,:omssa,:blast]
256
+ #
257
+ # pwiz
258
+ #
259
+ def pwiz_platform
260
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
261
+ return 'darwin-x86-xgcc40'
262
+ end
263
+ 'linux-x86_64-gcc42'
264
+ end
265
+
266
+ def platform_bunzip
267
+ if RbConfig::CONFIG['host_os'] =~ /darwin/
268
+ return 'pbunzip2'
269
+ end
270
+ 'bunzip2'
271
+ end
272
+
273
+ pwiz_version="3_0_4146"
274
+ pwiz_packagefile="pwiz-bin-#{pwiz_platform}-release-#{pwiz_version}.tar.bz2"
275
+ pwiz_url="https://dl.dropbox.com/u/226794/#{pwiz_packagefile}"
276
+ pwiz_installed_file="#{env.idconvert}"
277
+
278
+ download_task pwiz_url, pwiz_packagefile
279
+
280
+ file pwiz_installed_file => [@build_dir,"#{@download_dir}/#{pwiz_packagefile}"] do
281
+ sh %{cp #{@download_dir}/#{pwiz_packagefile} #{@build_dir}}
282
+ sh %{cd #{@build_dir}; #{platform_bunzip} -f #{pwiz_packagefile}}
283
+ sh %{cd #{@build_dir}; tar -xvf #{pwiz_packagefile.chomp('.bz2')}}
284
+ sh %{mkdir -p #{env.pwiz_root}}
285
+ sh %{cd #{@build_dir}; cp idconvert msconvert #{env.pwiz_root}/}
286
+ end
287
+
288
+ task :pwiz => pwiz_installed_file
289
+
290
+ task :all => [:tpp,:omssa,:blast,:msgfplus,:pwiz]
291
+
292
+
@@ -16,7 +16,7 @@ class SwissprotDatabase
16
16
  if ( database=="swissprot")
17
17
  @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_sprot_annotation_database}")
18
18
  else
19
- @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}#{@genv.uniprot_trembl_annotation_database}")
19
+ @db_object=Bio::FlatFileIndex.new("#{@genv.protein_database_root}/#{@genv.uniprot_trembl_annotation_database}")
20
20
  end
21
21
 
22
22
  @db_object.always_check_consistency=false
@@ -0,0 +1,47 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'protk/constants'
4
+
5
+ # Provides access to uniprot.org via its API
6
+ # See docs and examples here http://www.uniprot.org/faq/28#id_mapping_examples
7
+ #
8
+ class UniprotMapper
9
+
10
+ def initialize
11
+ @genv = Constants.new
12
+ end
13
+
14
+ def map(from_id_type,from_ids,output_id)
15
+
16
+ from_query = from_ids.join(" ")
17
+
18
+ base = 'www.uniprot.org'
19
+ tool = 'mapping'
20
+ params = {
21
+ 'from' => from_id_type, 'to' => output_id, 'format' => 'tab',
22
+ 'query' => from_query
23
+ }
24
+
25
+ http = Net::HTTP.new base
26
+ @genv.log "Mapping to #{output_id}" ,:info
27
+ response = http.request_post '/' + tool + '/',
28
+ params.keys.map {|key| key + '=' + params[key]}.join('&')
29
+
30
+ loc = nil
31
+ while response.code == '302'
32
+ loc = response['Location']
33
+ response = http.request_get loc
34
+ end
35
+
36
+ while loc
37
+ wait = response['Retry-After'] or break
38
+ @genv.log "Waiting (#{wait})..." , :info
39
+ sleep wait.to_i
40
+ response = http.request_get loc
41
+ end
42
+
43
+ response.value # raises http error if not 2xx
44
+ return response.body
45
+ end
46
+
47
+ end
data/lib/protk.rb CHANGED
@@ -16,3 +16,4 @@ require 'protk/constants.rb'
16
16
  require 'protk/command_runner.rb'
17
17
  require 'protk/biotools_excel_converter.rb'
18
18
  require 'protk/bio_sptr_extensions.rb'
19
+ require 'protk/galaxy_stager.rb'
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: protk
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.pre
5
- prerelease: 6
4
+ version: 1.1.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ira Cooke
@@ -144,6 +144,9 @@ email: iracooke@gmail.com
144
144
  executables:
145
145
  - protk_setup.rb
146
146
  - manage_db.rb
147
+ - asapratio.rb
148
+ - libra.rb
149
+ - xpress.rb
147
150
  - tandem_search.rb
148
151
  - mascot_search.rb
149
152
  - omssa_search.rb
@@ -162,6 +165,7 @@ executables:
162
165
  - annotate_ids.rb
163
166
  - unimod_to_loc.rb
164
167
  - generate_omssa_loc.rb
168
+ - uniprot_mapper.rb
165
169
  extensions:
166
170
  - ext/protk/extconf.rb
167
171
  extra_rdoc_files: []
@@ -171,12 +175,15 @@ files:
171
175
  - lib/protk/biotools_excel_converter.rb
172
176
  - lib/protk/command_runner.rb
173
177
  - lib/protk/constants.rb
178
+ - lib/protk/convert_util.rb
179
+ - lib/protk/data/make_uniprot_table.rb
174
180
  - lib/protk/eupathdb_gene_information_table.rb
175
181
  - lib/protk/galaxy_stager.rb
176
182
  - lib/protk/galaxy_util.rb
177
183
  - lib/protk/manage_db_tool.rb
178
184
  - lib/protk/mascot_util.rb
179
185
  - lib/protk/omssa_util.rb
186
+ - lib/protk/pepxml.rb
180
187
  - lib/protk/plasmodb.rb
181
188
  - lib/protk/prophet_tool.rb
182
189
  - lib/protk/protein_annotator.rb
@@ -187,18 +194,21 @@ files:
187
194
  - lib/protk/spreadsheet_extensions.rb
188
195
  - lib/protk/swissprot_database.rb
189
196
  - lib/protk/tool.rb
197
+ - lib/protk/uniprot_mapper.rb
190
198
  - lib/protk/xtandem_defaults.rb
191
199
  - lib/protk.rb
192
200
  - lib/protk/big_search_rakefile.rake
193
201
  - lib/protk/manage_db_rakefile.rake
194
202
  - lib/protk/setup_rakefile.rake
195
203
  - bin/annotate_ids.rb
204
+ - bin/asapratio.rb
196
205
  - bin/big_search.rb
197
206
  - bin/correct_omssa_retention_times.rb
198
207
  - bin/feature_finder.rb
199
208
  - bin/file_convert.rb
200
209
  - bin/generate_omssa_loc.rb
201
210
  - bin/interprophet.rb
211
+ - bin/libra.rb
202
212
  - bin/make_decoy.rb
203
213
  - bin/manage_db.rb
204
214
  - bin/mascot_search.rb
@@ -213,7 +223,9 @@ files:
213
223
  - bin/tandem_search.rb
214
224
  - bin/template_search.rb
215
225
  - bin/unimod_to_loc.rb
226
+ - bin/uniprot_mapper.rb
216
227
  - bin/xls_to_table.rb
228
+ - bin/xpress.rb
217
229
  - README.md
218
230
  - lib/protk/data/apt-get_packages.yaml
219
231
  - lib/protk/data/brew_packages.yaml
@@ -226,6 +238,10 @@ files:
226
238
  - lib/protk/data/tandem_params.xml
227
239
  - lib/protk/data/taxonomy_template.xml
228
240
  - lib/protk/data/unimod.xml
241
+ - lib/protk/data/uniprot_accessions.loc
242
+ - lib/protk/data/uniprot_accessions_table.txt
243
+ - lib/protk/data/uniprot_input_accessions.loc
244
+ - lib/protk/data/yum_packages.yaml
229
245
  - ext/protk/protk.c
230
246
  - ext/protk/extconf.rb
231
247
  homepage: http://rubygems.org/gems/protk
@@ -244,9 +260,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
244
260
  required_rubygems_version: !ruby/object:Gem::Requirement
245
261
  none: false
246
262
  requirements:
247
- - - ! '>'
263
+ - - ! '>='
248
264
  - !ruby/object:Gem::Version
249
- version: 1.3.1
265
+ version: '0'
250
266
  requirements: []
251
267
  rubyforge_project:
252
268
  rubygems_version: 1.8.24