ncbi-blast-dbs 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -8
- data/lib/ncbi-blast-dbs.rake +17 -7
- data/ncbi-blast-dbs.gemspec +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
## Fast download BLAST databases from NCBI
|
2
2
|
|
3
3
|
Database files (volumes) are downloaded in parallel: number of threads to use
|
4
|
-
is determined automatically. MD5 checksum is verified and the database
|
5
|
-
extracted upon download.
|
6
|
-
order.
|
7
|
-
|
4
|
+
is determined automatically. MD5 checksum is verified and the database volume
|
5
|
+
extracted upon download. Database volumes are not downloaded in a particular
|
6
|
+
order. The volumes are updated if a newer version is available on the server,
|
7
|
+
or re-downloaded if corrupt. Aborted downloads are safely resumed.
|
8
8
|
|
9
|
-
|
10
|
-
which is a pure Perl script,
|
11
|
-
to `wget` and `md5sum` and is thus not as
|
9
|
+
`ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
|
10
|
+
`update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
|
11
|
+
download and checksum verification to `wget` and `md5sum` and is thus not as
|
12
|
+
universal.
|
12
13
|
|
13
14
|
### Installation
|
14
15
|
|
@@ -22,7 +23,12 @@ to `wget` and `md5sum` and is thus not as universal.
|
|
22
23
|
|
23
24
|
#### Download all volumes of a BLAST database
|
24
25
|
|
25
|
-
ncbi-blast-dbs nr
|
26
|
+
ncbi-blast-dbs nt nr
|
27
|
+
|
28
|
+
Databases are downloaded one after the other while volumes of each database
|
29
|
+
are downloaded in parallel.
|
30
|
+
|
31
|
+
ncbi-blast-dbs nt nr
|
26
32
|
|
27
33
|
NCBI expects users to submit their email address when downloading data from
|
28
34
|
their FTP server. To comply with that, download as:
|
data/lib/ncbi-blast-dbs.rake
CHANGED
@@ -4,15 +4,16 @@ require 'net/ftp'
|
|
4
4
|
# local copy is older than at the given URL, or if the local copy is corrupt.
|
5
5
|
def download(url)
|
6
6
|
file = File.basename(url)
|
7
|
-
#
|
8
|
-
#
|
7
|
+
# Resume an interrupted download or fetch the file for the first time. If
|
8
|
+
# the file on the server is newer, then it is downloaded from start.
|
9
|
+
sh "wget -Nc #{url}"
|
10
|
+
# If the local copy is already fully retrieved, then the previous command
|
11
|
+
# ignores the timestamp. So we check with the server again if the file on
|
12
|
+
# the server is newer and if so download the new copy.
|
9
13
|
sh "wget -N #{url}"
|
10
|
-
# Resume aborted download. Do nothing if the file is already fully retrieved
|
11
|
-
# (at the cost is a round trip to server).
|
12
|
-
sh "wget -c #{url}"
|
13
14
|
|
14
|
-
#
|
15
|
-
# extract otherwise.
|
15
|
+
# Immediately download md5 and verify the tarball. Re-download tarball if
|
16
|
+
# corrupt; extract otherwise.
|
16
17
|
sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
|
17
18
|
if !matched
|
18
19
|
sh "rm #{file} #{file}.md5"; download(url)
|
@@ -22,6 +23,11 @@ def download(url)
|
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
26
|
+
# Connects to NCBI's FTP server, gets the URL of all database volumes and
|
27
|
+
# returns them grouped by database name:
|
28
|
+
#
|
29
|
+
# {'nr' => ['ftp://...', ...], 'nt' => [...], ...}
|
30
|
+
#
|
25
31
|
def databases
|
26
32
|
host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
|
27
33
|
usr, pswd = 'anonymous', ENV['email']
|
@@ -35,10 +41,14 @@ def databases
|
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
44
|
+
# Create user-facing task for each database to drive the download of its
|
45
|
+
# volumes in parallel.
|
38
46
|
databases.each do |name, files|
|
39
47
|
multitask(name => files.map { |file| task(file) { download(file) } })
|
40
48
|
end
|
41
49
|
|
50
|
+
# List name of all databases that can be downloaded if executed without
|
51
|
+
# any arguments.
|
42
52
|
task :default do
|
43
53
|
puts databases.keys.join(', ')
|
44
54
|
end
|
data/ncbi-blast-dbs.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ['Anurag Priyam']
|
3
3
|
s.email = ['anurag08priyam@gmail.com']
|
4
4
|
s.name = 'ncbi-blast-dbs'
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.5'
|
6
6
|
s.summary = 'Fast download BLAST databases from NCBI.'
|
7
7
|
s.description = <<DESC
|
8
8
|
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ncbi-blast-dbs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-02-
|
12
|
+
date: 2016-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|