ncbi-blast-dbs 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -8
- data/lib/ncbi-blast-dbs.rake +17 -7
- data/ncbi-blast-dbs.gemspec +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
## Fast download BLAST databases from NCBI
|
2
2
|
|
3
3
|
Database files (volumes) are downloaded in parallel: number of threads to use
|
4
|
-
is determined automatically. MD5 checksum is verified and the database
|
5
|
-
extracted upon download.
|
6
|
-
order.
|
7
|
-
|
4
|
+
is determined automatically. MD5 checksum is verified and the database volume
|
5
|
+
extracted upon download. Database volumes are not downloaded in a particular
|
6
|
+
order. The volumes are updated if a newer version is available on the server,
|
7
|
+
or re-downloaded if corrupt. Aborted downloads are safely resumed.
|
8
8
|
|
9
|
-
|
10
|
-
which is a pure Perl script,
|
11
|
-
to `wget` and `md5sum` and is thus not as
|
9
|
+
`ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
|
10
|
+
`update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
|
11
|
+
download and checksum verification to `wget` and `md5sum` and is thus not as
|
12
|
+
universal.
|
12
13
|
|
13
14
|
### Installation
|
14
15
|
|
@@ -22,7 +23,12 @@ to `wget` and `md5sum` and is thus not as universal.
|
|
22
23
|
|
23
24
|
#### Download all volumes of a BLAST database
|
24
25
|
|
25
|
-
ncbi-blast-dbs nr
|
26
|
+
ncbi-blast-dbs nt nr
|
27
|
+
|
28
|
+
Databases are downloaded one after the other while volumes of each database
|
29
|
+
are downloaded in parallel.
|
30
|
+
|
31
|
+
ncbi-blast-dbs nt nr
|
26
32
|
|
27
33
|
NCBI expects users to submit their email address when downloading data from
|
28
34
|
their FTP server. To comply with that, download as:
|
data/lib/ncbi-blast-dbs.rake
CHANGED
@@ -4,15 +4,16 @@ require 'net/ftp'
|
|
4
4
|
# local copy is older than at the given URL, or if the local copy is corrupt.
|
5
5
|
def download(url)
|
6
6
|
file = File.basename(url)
|
7
|
-
#
|
8
|
-
#
|
7
|
+
# Resume an interrupted download or fetch the file for the first time. If
|
8
|
+
# the file on the server is newer, then it is downloaded from start.
|
9
|
+
sh "wget -Nc #{url}"
|
10
|
+
# If the local copy is already fully retrieved, then the previous command
|
11
|
+
# ignores the timestamp. So we check with the server again if the file on
|
12
|
+
# the server is newer and if so download the new copy.
|
9
13
|
sh "wget -N #{url}"
|
10
|
-
# Resume aborted download. Do nothing if the file is already fully retrieved
|
11
|
-
# (at the cost is a round trip to server).
|
12
|
-
sh "wget -c #{url}"
|
13
14
|
|
14
|
-
#
|
15
|
-
# extract otherwise.
|
15
|
+
# Immediately download md5 and verify the tarball. Re-download tarball if
|
16
|
+
# corrupt; extract otherwise.
|
16
17
|
sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
|
17
18
|
if !matched
|
18
19
|
sh "rm #{file} #{file}.md5"; download(url)
|
@@ -22,6 +23,11 @@ def download(url)
|
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
26
|
+
# Connects to NCBI's FTP server, gets the URL of all database volumes and
|
27
|
+
# returns them grouped by database name:
|
28
|
+
#
|
29
|
+
# {'nr' => ['ftp://...', ...], 'nt' => [...], ...}
|
30
|
+
#
|
25
31
|
def databases
|
26
32
|
host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
|
27
33
|
usr, pswd = 'anonymous', ENV['email']
|
@@ -35,10 +41,14 @@ def databases
|
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
44
|
+
# Create user-facing task for each database to drive the download of its
|
45
|
+
# volumes in parallel.
|
38
46
|
databases.each do |name, files|
|
39
47
|
multitask(name => files.map { |file| task(file) { download(file) } })
|
40
48
|
end
|
41
49
|
|
50
|
+
# List name of all databases that can be downloaded if executed without
|
51
|
+
# any arguments.
|
42
52
|
task :default do
|
43
53
|
puts databases.keys.join(', ')
|
44
54
|
end
|
data/ncbi-blast-dbs.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ['Anurag Priyam']
|
3
3
|
s.email = ['anurag08priyam@gmail.com']
|
4
4
|
s.name = 'ncbi-blast-dbs'
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.5'
|
6
6
|
s.summary = 'Fast download BLAST databases from NCBI.'
|
7
7
|
s.description = <<DESC
|
8
8
|
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ncbi-blast-dbs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-02-
|
12
|
+
date: 2016-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|