ncbi-blast-dbs 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bcce1b77f891ca2abb26a791de778fb9305d0e8a998f971eff97393e11f6ea87
4
+ data.tar.gz: 1dc5dc4d9f3d2bf0f6dea3b75bc04f1cefbece636598c941dcf08f9c2affba2a
5
+ SHA512:
6
+ metadata.gz: fb6f3996b8e344a7e98af27c6187bc71a57cdf3996ed74d84243355c6c12aa9e122081b31b2b006d995fea04011c698cde49ef692c8b8eb6b45a790b2490d17f
7
+ data.tar.gz: bc9f835b65e8bb3fd7923d697a9b98096f2d5b5b8e9f7e5c0482f0996aece512397fecc70db78426e93bb91b99a199f7b9cd20bb48c0f46b915e5e580b7f7ef5
data/README.md CHANGED
@@ -8,8 +8,8 @@ or re-downloaded if corrupt. Aborted downloads are safely resumed.
8
8
 
9
9
  `ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
10
10
  `update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
11
- download and checksum verification to `wget` and `md5sum` and is thus not as
12
- universal.
11
+ download and checksum verification to `wget` and `md5sum` / `md5` and is thus
12
+ not as universal.
13
13
 
14
14
  ### Installation
15
15
 
data/bin/ncbi-blast-dbs CHANGED
@@ -1,13 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rake'
4
- import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
5
4
 
6
5
  trap :INT do
7
6
  puts "Quitting ..."
8
7
  exit!
9
8
  end
10
9
 
11
- Rake.application.init 'ncbi-blast-dbs'
12
- Rake.application.load_imports
13
- Rake.application.top_level
10
+ if ARGV.include? "http";
11
+ import "#{File.dirname(__FILE__)}/../lib/http-ncbi-blast-dbs.rake"
12
+ Rake.application.init 'http-ncbi-blast-dbs'
13
+ Rake.application.load_imports
14
+ Rake.application.top_level
15
+ else;
16
+ import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
17
+ Rake.application.init 'ncbi-blast-dbs'
18
+ Rake.application.load_imports
19
+ Rake.application.top_level
20
+ end
@@ -0,0 +1,84 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ puts "using http-ncbi-dbs-dgs.rake"
4
+ # Downloads tarball at the given URL if a local copy does not exist, or if the
5
+ # local copy is older than at the given URL, or if the local copy is corrupt.
6
+ def download(url, last_to_do)
7
+ file = File.basename(url)
8
+
9
+ # # Resume an interrupted download or fetch the file for the first time. If
10
+ # # the file on the server is newer, then it is downloaded from start.
11
+
12
+ sh "wget -Nc --no-verbose #{url}"
13
+ # If the local copy is already fully retrieved, then the previous command
14
+ # ignores the timestamp. So we check with the server again if the file on
15
+ # the server is newer and if so download the new copy.
16
+ sh "wget -N --no-verbose #{url}"
17
+ sh "wget -Nc --no-verbose #{url}.md5"
18
+ sh "wget -N --no-verbose #{url}.md5"
19
+ # Immediately download md5 and verify the tarball. Re-download tarball if
20
+ # corrupt; extract otherwise.
21
+ sh "md5sum -c #{file}.md5" do |matched, _|
22
+ if !matched
23
+ sh "rm #{file} #{file}.md5"; download(url)
24
+ # too many tar instances unzipping the same file clutter the system
25
+ elsif file == last_to_do;
26
+ sh "tar xfov #{file}"
27
+ else
28
+ # at least nr and nt tarballs have identical files .?al; unsure of others
29
+ sh "tar xfov #{file} --exclude='*.?al' --exclude='taxdb*'"
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ def databases
36
+ method = 'https://'
37
+ host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
38
+ uri = URI.parse(method + host + "/" + dir + "/")
39
+
40
+ response = Net::HTTP.get_response(uri)
41
+ body = response.body.split
42
+
43
+ array_of_files = []
44
+ body.each do |line|
45
+ # regex takes the raw http response, matches lines such as:
46
+ # href="tsa_nt.06.tar.gz.md5">tsa_nt.06.tar.gz</a>
47
+ # Returns:
48
+ # tsa_nt.06.tar.gz
49
+ filenames_and_newlines = line[/(^href=".*">)(.*tar.gz|.*md5)(<\/a>)$/, 2]
50
+ array_of_files.append(filenames_and_newlines) unless filenames_and_newlines.nil?
51
+ end
52
+
53
+ # append the full path to file for downstream wget
54
+ array_of_files.map! { |string| "".concat("/blast/db/", string ) }
55
+ array_of_files.
56
+ map { |file| File.join(host, file) }.
57
+ select { |file| file.match(/\.tar\.gz$/) }.
58
+ group_by { |file| File.basename(file).split('.')[0] }
59
+ end
60
+
61
+
62
+ # Create user-facing task for each database to drive the download of its
63
+ # volumes in parallel.
64
+ databases.each do |name, files|
65
+ last = { name => files.last }
66
+ multitask(name => files.map { |file| task(file) { download(file, last.values.uniq) } })
67
+ end
68
+
69
+ # List name of all databases that can be downloaded if executed without
70
+ # any arguments.
71
+ task :default do
72
+ databases
73
+ puts databases.keys.push('taxdump').join(', ')
74
+ end
75
+
76
+ task :taxdump do
77
+ download('https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', "nil")
78
+ end
79
+
80
+ # Ruby being over my head, this is my quick-and-dirty way to trick it ignoring
81
+ # "http" as a task rather than a specification. Happy for an expert to fix it up!
82
+ task :http do
83
+ puts "using http method"
84
+ end
@@ -6,20 +6,32 @@ def download(url)
6
6
  file = File.basename(url)
7
7
  # Resume an interrupted download or fetch the file for the first time. If
8
8
  # the file on the server is newer, then it is downloaded from start.
9
- sh "wget -Nc #{url}"
9
+ sh "wget -Nc --no-verbose #{url}"
10
10
  # If the local copy is already fully retrieved, then the previous command
11
11
  # ignores the timestamp. So we check with the server again if the file on
12
12
  # the server is newer and if so download the new copy.
13
- sh "wget -N #{url}"
14
-
15
- # Immediately download md5 and verify the tarball. Re-download tarball if
16
- # corrupt; extract otherwise.
17
- sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
18
- if !matched
19
- sh "rm #{file} #{file}.md5"; download(url)
20
- else
21
- sh "tar xvf #{file}"
22
- end
13
+ sh "wget -N --no-verbose #{url}"
14
+
15
+ # Download Md5
16
+ sh "wget --no-verbose #{url}.md5"
17
+
18
+ # Verify the tarball using md5sum or md5
19
+ if system("which md5sum > /dev/null")
20
+ matched = system("md5sum -c #{file}.md5")
21
+ elsif system("which md5 > /dev/null")
22
+ md5_out = %x[md5 -q #{file}].chomp
23
+ md5_actual = File.read("#{file}.md5").split[0]
24
+ matched = md5_out == md5_actual
25
+ else
26
+ puts "Cannot find md5sum or md5. Please install md5sum or md5 and try again"
27
+ exit 1
28
+ end
29
+
30
+ # Re-download tarball if corrupt; extract otherwise.
31
+ if !matched
32
+ sh "rm #{file} #{file}.md5"; download(url)
33
+ else
34
+ sh "tar xf #{file}"
23
35
  end
24
36
  end
25
37
 
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
2
2
  s.authors = ['Anurag Priyam']
3
3
  s.email = ['anurag08priyam@gmail.com']
4
4
  s.name = 'ncbi-blast-dbs'
5
- s.version = '0.0.6'
5
+ s.version = '0.0.7'
6
6
  s.summary = 'Fast download BLAST databases from NCBI.'
7
7
  s.description = <<DESC
8
8
  Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
metadata CHANGED
@@ -1,46 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ncbi-blast-dbs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.0.7
6
5
  platform: ruby
7
6
  authors:
8
7
  - Anurag Priyam
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2017-04-30 00:00:00.000000000 Z
11
+ date: 2021-06-16 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '10.3'
22
- - - ! '>='
20
+ - - ">="
23
21
  - !ruby/object:Gem::Version
24
22
  version: 10.3.2
25
23
  type: :runtime
26
24
  prerelease: false
27
25
  version_requirements: !ruby/object:Gem::Requirement
28
- none: false
29
26
  requirements:
30
- - - ~>
27
+ - - "~>"
31
28
  - !ruby/object:Gem::Version
32
29
  version: '10.3'
33
- - - ! '>='
30
+ - - ">="
34
31
  - !ruby/object:Gem::Version
35
32
  version: 10.3.2
36
- description: ! 'Downloads BLAST databases from NCBI. Database files (volumes) are
37
- downloaded in
38
-
33
+ description: |
34
+ Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
39
35
  parallel; number of threads to use is determined automatically. Database files
40
-
41
36
  are verified and extracted upon download.
42
-
43
- '
44
37
  email:
45
38
  - anurag08priyam@gmail.com
46
39
  executables:
@@ -48,36 +41,35 @@ executables:
48
41
  extensions: []
49
42
  extra_rdoc_files: []
50
43
  files:
51
- - .ruby-version
44
+ - ".ruby-version"
52
45
  - Gemfile
53
46
  - LICENSE.txt
54
47
  - README.md
55
48
  - bin/ncbi-blast-dbs
49
+ - lib/http-ncbi-blast-dbs.rake
56
50
  - lib/ncbi-blast-dbs.rake
57
51
  - ncbi-blast-dbs.gemspec
58
52
  homepage: http://github.com/yeban/ncbi-blast-dbs
59
53
  licenses:
60
54
  - MIT
55
+ metadata: {}
61
56
  post_install_message:
62
57
  rdoc_options: []
63
58
  require_paths:
64
59
  - lib
65
60
  required_ruby_version: !ruby/object:Gem::Requirement
66
- none: false
67
61
  requirements:
68
- - - ! '>='
62
+ - - ">="
69
63
  - !ruby/object:Gem::Version
70
64
  version: '0'
71
65
  required_rubygems_version: !ruby/object:Gem::Requirement
72
- none: false
73
66
  requirements:
74
- - - ! '>='
67
+ - - ">="
75
68
  - !ruby/object:Gem::Version
76
69
  version: '0'
77
70
  requirements: []
78
- rubyforge_project:
79
- rubygems_version: 1.8.23.2
71
+ rubygems_version: 3.0.3
80
72
  signing_key:
81
- specification_version: 3
73
+ specification_version: 4
82
74
  summary: Fast download BLAST databases from NCBI.
83
75
  test_files: []