ncbi-blast-dbs 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +2 -2
- data/bin/ncbi-blast-dbs +11 -4
- data/lib/http-ncbi-blast-dbs.rake +84 -0
- data/lib/ncbi-blast-dbs.rake +23 -11
- data/ncbi-blast-dbs.gemspec +1 -1
- metadata +15 -23
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bcce1b77f891ca2abb26a791de778fb9305d0e8a998f971eff97393e11f6ea87
|
4
|
+
data.tar.gz: 1dc5dc4d9f3d2bf0f6dea3b75bc04f1cefbece636598c941dcf08f9c2affba2a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fb6f3996b8e344a7e98af27c6187bc71a57cdf3996ed74d84243355c6c12aa9e122081b31b2b006d995fea04011c698cde49ef692c8b8eb6b45a790b2490d17f
|
7
|
+
data.tar.gz: bc9f835b65e8bb3fd7923d697a9b98096f2d5b5b8e9f7e5c0482f0996aece512397fecc70db78426e93bb91b99a199f7b9cd20bb48c0f46b915e5e580b7f7ef5
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ or re-downloaded if corrupt. Aborted downloads are safely resumed.
|
|
8
8
|
|
9
9
|
`ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
|
10
10
|
`update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
|
11
|
-
download and checksum verification to `wget` and `md5sum` and is thus
|
12
|
-
universal.
|
11
|
+
download and checksum verification to `wget` and `md5sum` / `md5` and is thus
|
12
|
+
not as universal.
|
13
13
|
|
14
14
|
### Installation
|
15
15
|
|
data/bin/ncbi-blast-dbs
CHANGED
@@ -1,13 +1,20 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rake'
|
4
|
-
import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
|
5
4
|
|
6
5
|
trap :INT do
|
7
6
|
puts "Quitting ..."
|
8
7
|
exit!
|
9
8
|
end
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
Rake.application.
|
10
|
+
if ARGV.include? "http";
|
11
|
+
import "#{File.dirname(__FILE__)}/../lib/http-ncbi-blast-dbs.rake"
|
12
|
+
Rake.application.init 'http-ncbi-blast-dbs'
|
13
|
+
Rake.application.load_imports
|
14
|
+
Rake.application.top_level
|
15
|
+
else;
|
16
|
+
import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
|
17
|
+
Rake.application.init 'ncbi-blast-dbs'
|
18
|
+
Rake.application.load_imports
|
19
|
+
Rake.application.top_level
|
20
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
puts "using http-ncbi-dbs-dgs.rake"
|
4
|
+
# Downloads tarball at the given URL if a local copy does not exist, or if the
|
5
|
+
# local copy is older than at the given URL, or if the local copy is corrupt.
|
6
|
+
def download(url, last_to_do)
|
7
|
+
file = File.basename(url)
|
8
|
+
|
9
|
+
# # Resume an interrupted download or fetch the file for the first time. If
|
10
|
+
# # the file on the server is newer, then it is downloaded from start.
|
11
|
+
|
12
|
+
sh "wget -Nc --no-verbose #{url}"
|
13
|
+
# If the local copy is already fully retrieved, then the previous command
|
14
|
+
# ignores the timestamp. So we check with the server again if the file on
|
15
|
+
# the server is newer and if so download the new copy.
|
16
|
+
sh "wget -N --no-verbose #{url}"
|
17
|
+
sh "wget -Nc --no-verbose #{url}.md5"
|
18
|
+
sh "wget -N --no-verbose #{url}.md5"
|
19
|
+
# Immediately download md5 and verify the tarball. Re-download tarball if
|
20
|
+
# corrupt; extract otherwise.
|
21
|
+
sh "md5sum -c #{file}.md5" do |matched, _|
|
22
|
+
if !matched
|
23
|
+
sh "rm #{file} #{file}.md5"; download(url)
|
24
|
+
# too many tar instances unzipping the same file clutter the system
|
25
|
+
elsif file == last_to_do;
|
26
|
+
sh "tar xfov #{file}"
|
27
|
+
else
|
28
|
+
# at least nr and nt tarballs have identical files .?al; unsure of others
|
29
|
+
sh "tar xfov #{file} --exclude='*.?al' --exclude='taxdb*'"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def databases
|
36
|
+
method = 'https://'
|
37
|
+
host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
|
38
|
+
uri = URI.parse(method + host + "/" + dir + "/")
|
39
|
+
|
40
|
+
response = Net::HTTP.get_response(uri)
|
41
|
+
body = response.body.split
|
42
|
+
|
43
|
+
array_of_files = []
|
44
|
+
body.each do |line|
|
45
|
+
# regex takes the raw http response, matches lines such as:
|
46
|
+
# href="tsa_nt.06.tar.gz.md5">tsa_nt.06.tar.gz</a>
|
47
|
+
# Returns:
|
48
|
+
# tsa_nt.06.tar.gz
|
49
|
+
filenames_and_newlines = line[/(^href=".*">)(.*tar.gz|.*md5)(<\/a>)$/, 2]
|
50
|
+
array_of_files.append(filenames_and_newlines) unless filenames_and_newlines.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
# append the full path to file for downstream wget
|
54
|
+
array_of_files.map! { |string| "".concat("/blast/db/", string ) }
|
55
|
+
array_of_files.
|
56
|
+
map { |file| File.join(host, file) }.
|
57
|
+
select { |file| file.match(/\.tar\.gz$/) }.
|
58
|
+
group_by { |file| File.basename(file).split('.')[0] }
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Create user-facing task for each database to drive the download of its
|
63
|
+
# volumes in parallel.
|
64
|
+
databases.each do |name, files|
|
65
|
+
last = { name => files.last }
|
66
|
+
multitask(name => files.map { |file| task(file) { download(file, last.values.uniq) } })
|
67
|
+
end
|
68
|
+
|
69
|
+
# List name of all databases that can be downloaded if executed without
|
70
|
+
# any arguments.
|
71
|
+
task :default do
|
72
|
+
databases
|
73
|
+
puts databases.keys.push('taxdump').join(', ')
|
74
|
+
end
|
75
|
+
|
76
|
+
task :taxdump do
|
77
|
+
download('https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', "nil")
|
78
|
+
end
|
79
|
+
|
80
|
+
# Ruby being over my head, this is my quick-and-dirty way to trick it ignoring
|
81
|
+
# "http" as a task rather than a specification. Happy for an expert to fix it up!
|
82
|
+
task :http do
|
83
|
+
puts "using http method"
|
84
|
+
end
|
data/lib/ncbi-blast-dbs.rake
CHANGED
@@ -6,20 +6,32 @@ def download(url)
|
|
6
6
|
file = File.basename(url)
|
7
7
|
# Resume an interrupted download or fetch the file for the first time. If
|
8
8
|
# the file on the server is newer, then it is downloaded from start.
|
9
|
-
sh "wget -Nc #{url}"
|
9
|
+
sh "wget -Nc --no-verbose #{url}"
|
10
10
|
# If the local copy is already fully retrieved, then the previous command
|
11
11
|
# ignores the timestamp. So we check with the server again if the file on
|
12
12
|
# the server is newer and if so download the new copy.
|
13
|
-
sh "wget -N #{url}"
|
14
|
-
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
13
|
+
sh "wget -N --no-verbose #{url}"
|
14
|
+
|
15
|
+
# Download Md5
|
16
|
+
sh "wget --no-verbose #{url}.md5"
|
17
|
+
|
18
|
+
# Verify the tarball using md5sum or md5
|
19
|
+
if system("which md5sum > /dev/null")
|
20
|
+
matched = system("md5sum -c #{file}.md5")
|
21
|
+
elsif system("which md5 > /dev/null")
|
22
|
+
md5_out = %x[md5 -q #{file}].chomp
|
23
|
+
md5_actual = File.read("#{file}.md5").split[0]
|
24
|
+
matched = md5_out == md5_actual
|
25
|
+
else
|
26
|
+
puts "Cannot find md5sum or md5. Please install md5sum or md5 and try again"
|
27
|
+
exit 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# Re-download tarball if corrupt; extract otherwise.
|
31
|
+
if !matched
|
32
|
+
sh "rm #{file} #{file}.md5"; download(url)
|
33
|
+
else
|
34
|
+
sh "tar xf #{file}"
|
23
35
|
end
|
24
36
|
end
|
25
37
|
|
data/ncbi-blast-dbs.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ['Anurag Priyam']
|
3
3
|
s.email = ['anurag08priyam@gmail.com']
|
4
4
|
s.name = 'ncbi-blast-dbs'
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.7'
|
6
6
|
s.summary = 'Fast download BLAST databases from NCBI.'
|
7
7
|
s.description = <<DESC
|
8
8
|
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
metadata
CHANGED
@@ -1,46 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ncbi-blast-dbs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.7
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Anurag Priyam
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2021-06-16 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rake
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '10.3'
|
22
|
-
- -
|
20
|
+
- - ">="
|
23
21
|
- !ruby/object:Gem::Version
|
24
22
|
version: 10.3.2
|
25
23
|
type: :runtime
|
26
24
|
prerelease: false
|
27
25
|
version_requirements: !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
26
|
requirements:
|
30
|
-
- - ~>
|
27
|
+
- - "~>"
|
31
28
|
- !ruby/object:Gem::Version
|
32
29
|
version: '10.3'
|
33
|
-
- -
|
30
|
+
- - ">="
|
34
31
|
- !ruby/object:Gem::Version
|
35
32
|
version: 10.3.2
|
36
|
-
description:
|
37
|
-
downloaded in
|
38
|
-
|
33
|
+
description: |
|
34
|
+
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
39
35
|
parallel; number of threads to use is determined automatically. Database files
|
40
|
-
|
41
36
|
are verified and extracted upon download.
|
42
|
-
|
43
|
-
'
|
44
37
|
email:
|
45
38
|
- anurag08priyam@gmail.com
|
46
39
|
executables:
|
@@ -48,36 +41,35 @@ executables:
|
|
48
41
|
extensions: []
|
49
42
|
extra_rdoc_files: []
|
50
43
|
files:
|
51
|
-
- .ruby-version
|
44
|
+
- ".ruby-version"
|
52
45
|
- Gemfile
|
53
46
|
- LICENSE.txt
|
54
47
|
- README.md
|
55
48
|
- bin/ncbi-blast-dbs
|
49
|
+
- lib/http-ncbi-blast-dbs.rake
|
56
50
|
- lib/ncbi-blast-dbs.rake
|
57
51
|
- ncbi-blast-dbs.gemspec
|
58
52
|
homepage: http://github.com/yeban/ncbi-blast-dbs
|
59
53
|
licenses:
|
60
54
|
- MIT
|
55
|
+
metadata: {}
|
61
56
|
post_install_message:
|
62
57
|
rdoc_options: []
|
63
58
|
require_paths:
|
64
59
|
- lib
|
65
60
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
61
|
requirements:
|
68
|
-
- -
|
62
|
+
- - ">="
|
69
63
|
- !ruby/object:Gem::Version
|
70
64
|
version: '0'
|
71
65
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
-
none: false
|
73
66
|
requirements:
|
74
|
-
- -
|
67
|
+
- - ">="
|
75
68
|
- !ruby/object:Gem::Version
|
76
69
|
version: '0'
|
77
70
|
requirements: []
|
78
|
-
|
79
|
-
rubygems_version: 1.8.23.2
|
71
|
+
rubygems_version: 3.0.3
|
80
72
|
signing_key:
|
81
|
-
specification_version:
|
73
|
+
specification_version: 4
|
82
74
|
summary: Fast download BLAST databases from NCBI.
|
83
75
|
test_files: []
|