ncbi-blast-dbs 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bcce1b77f891ca2abb26a791de778fb9305d0e8a998f971eff97393e11f6ea87
4
+ data.tar.gz: 1dc5dc4d9f3d2bf0f6dea3b75bc04f1cefbece636598c941dcf08f9c2affba2a
5
+ SHA512:
6
+ metadata.gz: fb6f3996b8e344a7e98af27c6187bc71a57cdf3996ed74d84243355c6c12aa9e122081b31b2b006d995fea04011c698cde49ef692c8b8eb6b45a790b2490d17f
7
+ data.tar.gz: bc9f835b65e8bb3fd7923d697a9b98096f2d5b5b8e9f7e5c0482f0996aece512397fecc70db78426e93bb91b99a199f7b9cd20bb48c0f46b915e5e580b7f7ef5
data/README.md CHANGED
@@ -8,8 +8,8 @@ or re-downloaded if corrupt. Aborted downloads are safely resumed.
8
8
 
9
9
  `ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
10
10
  `update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
11
- download and checksum verification to `wget` and `md5sum` and is thus not as
12
- universal.
11
+ download and checksum verification to `wget` and `md5sum` / `md5` and is thus
12
+ not as universal.
13
13
 
14
14
  ### Installation
15
15
 
data/bin/ncbi-blast-dbs CHANGED
@@ -1,13 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rake'
4
- import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
5
4
 
6
5
  trap :INT do
7
6
  puts "Quitting ..."
8
7
  exit!
9
8
  end
10
9
 
11
- Rake.application.init 'ncbi-blast-dbs'
12
- Rake.application.load_imports
13
- Rake.application.top_level
10
+ if ARGV.include? "http";
11
+ import "#{File.dirname(__FILE__)}/../lib/http-ncbi-blast-dbs.rake"
12
+ Rake.application.init 'http-ncbi-blast-dbs'
13
+ Rake.application.load_imports
14
+ Rake.application.top_level
15
+ else;
16
+ import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
17
+ Rake.application.init 'ncbi-blast-dbs'
18
+ Rake.application.load_imports
19
+ Rake.application.top_level
20
+ end
@@ -0,0 +1,84 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ puts "using http-ncbi-dbs-dgs.rake"
4
+ # Downloads tarball at the given URL if a local copy does not exist, or if the
5
+ # local copy is older than at the given URL, or if the local copy is corrupt.
6
+ def download(url, last_to_do)
7
+ file = File.basename(url)
8
+
9
+ # # Resume an interrupted download or fetch the file for the first time. If
10
+ # # the file on the server is newer, then it is downloaded from start.
11
+
12
+ sh "wget -Nc --no-verbose #{url}"
13
+ # If the local copy is already fully retrieved, then the previous command
14
+ # ignores the timestamp. So we check with the server again if the file on
15
+ # the server is newer and if so download the new copy.
16
+ sh "wget -N --no-verbose #{url}"
17
+ sh "wget -Nc --no-verbose #{url}.md5"
18
+ sh "wget -N --no-verbose #{url}.md5"
19
+ # Immediately download md5 and verify the tarball. Re-download tarball if
20
+ # corrupt; extract otherwise.
21
+ sh "md5sum -c #{file}.md5" do |matched, _|
22
+ if !matched
23
+ sh "rm #{file} #{file}.md5"; download(url)
24
+ # too many tar instances unzipping the same file clutter the system
25
+ elsif file == last_to_do;
26
+ sh "tar xfov #{file}"
27
+ else
28
+ # at least nr and nt tarballs have identical files .?al; unsure of others
29
+ sh "tar xfov #{file} --exclude='*.?al' --exclude='taxdb*'"
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ def databases
36
+ method = 'https://'
37
+ host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
38
+ uri = URI.parse(method + host + "/" + dir + "/")
39
+
40
+ response = Net::HTTP.get_response(uri)
41
+ body = response.body.split
42
+
43
+ array_of_files = []
44
+ body.each do |line|
45
+ # regex takes the raw http response, matches lines such as:
46
+ # href="tsa_nt.06.tar.gz.md5">tsa_nt.06.tar.gz</a>
47
+ # Returns:
48
+ # tsa_nt.06.tar.gz
49
+ filenames_and_newlines = line[/(^href=".*">)(.*tar.gz|.*md5)(<\/a>)$/, 2]
50
+ array_of_files.append(filenames_and_newlines) unless filenames_and_newlines.nil?
51
+ end
52
+
53
+ # append the full path to file for downstream wget
54
+ array_of_files.map! { |string| "".concat("/blast/db/", string ) }
55
+ array_of_files.
56
+ map { |file| File.join(host, file) }.
57
+ select { |file| file.match(/\.tar\.gz$/) }.
58
+ group_by { |file| File.basename(file).split('.')[0] }
59
+ end
60
+
61
+
62
+ # Create user-facing task for each database to drive the download of its
63
+ # volumes in parallel.
64
+ databases.each do |name, files|
65
+ last = { name => files.last }
66
+ multitask(name => files.map { |file| task(file) { download(file, last.values.uniq) } })
67
+ end
68
+
69
+ # List name of all databases that can be downloaded if executed without
70
+ # any arguments.
71
+ task :default do
72
+ databases
73
+ puts databases.keys.push('taxdump').join(', ')
74
+ end
75
+
76
+ task :taxdump do
77
+ download('https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', "nil")
78
+ end
79
+
80
+ # Ruby being over my head, this is my quick-and-dirty way to trick it ignoring
81
+ # "http" as a task rather than a specification. Happy for an expert to fix it up!
82
+ task :http do
83
+ puts "using http method"
84
+ end
@@ -6,20 +6,32 @@ def download(url)
6
6
  file = File.basename(url)
7
7
  # Resume an interrupted download or fetch the file for the first time. If
8
8
  # the file on the server is newer, then it is downloaded from start.
9
- sh "wget -Nc #{url}"
9
+ sh "wget -Nc --no-verbose #{url}"
10
10
  # If the local copy is already fully retrieved, then the previous command
11
11
  # ignores the timestamp. So we check with the server again if the file on
12
12
  # the server is newer and if so download the new copy.
13
- sh "wget -N #{url}"
14
-
15
- # Immediately download md5 and verify the tarball. Re-download tarball if
16
- # corrupt; extract otherwise.
17
- sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
18
- if !matched
19
- sh "rm #{file} #{file}.md5"; download(url)
20
- else
21
- sh "tar xvf #{file}"
22
- end
13
+ sh "wget -N --no-verbose #{url}"
14
+
15
+ # Download Md5
16
+ sh "wget --no-verbose #{url}.md5"
17
+
18
+ # Verify the tarball using md5sum or md5
19
+ if system("which md5sum > /dev/null")
20
+ matched = system("md5sum -c #{file}.md5")
21
+ elsif system("which md5 > /dev/null")
22
+ md5_out = %x[md5 -q #{file}].chomp
23
+ md5_actual = File.read("#{file}.md5").split[0]
24
+ matched = md5_out == md5_actual
25
+ else
26
+ puts "Cannot find md5sum or md5. Please install md5sum or md5 and try again"
27
+ exit 1
28
+ end
29
+
30
+ # Re-download tarball if corrupt; extract otherwise.
31
+ if !matched
32
+ sh "rm #{file} #{file}.md5"; download(url)
33
+ else
34
+ sh "tar xf #{file}"
23
35
  end
24
36
  end
25
37
 
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
2
2
  s.authors = ['Anurag Priyam']
3
3
  s.email = ['anurag08priyam@gmail.com']
4
4
  s.name = 'ncbi-blast-dbs'
5
- s.version = '0.0.6'
5
+ s.version = '0.0.7'
6
6
  s.summary = 'Fast download BLAST databases from NCBI.'
7
7
  s.description = <<DESC
8
8
  Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
metadata CHANGED
@@ -1,46 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ncbi-blast-dbs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.0.7
6
5
  platform: ruby
7
6
  authors:
8
7
  - Anurag Priyam
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2017-04-30 00:00:00.000000000 Z
11
+ date: 2021-06-16 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '10.3'
22
- - - ! '>='
20
+ - - ">="
23
21
  - !ruby/object:Gem::Version
24
22
  version: 10.3.2
25
23
  type: :runtime
26
24
  prerelease: false
27
25
  version_requirements: !ruby/object:Gem::Requirement
28
- none: false
29
26
  requirements:
30
- - - ~>
27
+ - - "~>"
31
28
  - !ruby/object:Gem::Version
32
29
  version: '10.3'
33
- - - ! '>='
30
+ - - ">="
34
31
  - !ruby/object:Gem::Version
35
32
  version: 10.3.2
36
- description: ! 'Downloads BLAST databases from NCBI. Database files (volumes) are
37
- downloaded in
38
-
33
+ description: |
34
+ Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
39
35
  parallel; number of threads to use is determined automatically. Database files
40
-
41
36
  are verified and extracted upon download.
42
-
43
- '
44
37
  email:
45
38
  - anurag08priyam@gmail.com
46
39
  executables:
@@ -48,36 +41,35 @@ executables:
48
41
  extensions: []
49
42
  extra_rdoc_files: []
50
43
  files:
51
- - .ruby-version
44
+ - ".ruby-version"
52
45
  - Gemfile
53
46
  - LICENSE.txt
54
47
  - README.md
55
48
  - bin/ncbi-blast-dbs
49
+ - lib/http-ncbi-blast-dbs.rake
56
50
  - lib/ncbi-blast-dbs.rake
57
51
  - ncbi-blast-dbs.gemspec
58
52
  homepage: http://github.com/yeban/ncbi-blast-dbs
59
53
  licenses:
60
54
  - MIT
55
+ metadata: {}
61
56
  post_install_message:
62
57
  rdoc_options: []
63
58
  require_paths:
64
59
  - lib
65
60
  required_ruby_version: !ruby/object:Gem::Requirement
66
- none: false
67
61
  requirements:
68
- - - ! '>='
62
+ - - ">="
69
63
  - !ruby/object:Gem::Version
70
64
  version: '0'
71
65
  required_rubygems_version: !ruby/object:Gem::Requirement
72
- none: false
73
66
  requirements:
74
- - - ! '>='
67
+ - - ">="
75
68
  - !ruby/object:Gem::Version
76
69
  version: '0'
77
70
  requirements: []
78
- rubyforge_project:
79
- rubygems_version: 1.8.23.2
71
+ rubygems_version: 3.0.3
80
72
  signing_key:
81
- specification_version: 3
73
+ specification_version: 4
82
74
  summary: Fast download BLAST databases from NCBI.
83
75
  test_files: []