ncbi-blast-dbs 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +2 -2
- data/bin/ncbi-blast-dbs +11 -4
- data/lib/http-ncbi-blast-dbs.rake +84 -0
- data/lib/ncbi-blast-dbs.rake +23 -11
- data/ncbi-blast-dbs.gemspec +1 -1
- metadata +15 -23
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bcce1b77f891ca2abb26a791de778fb9305d0e8a998f971eff97393e11f6ea87
|
4
|
+
data.tar.gz: 1dc5dc4d9f3d2bf0f6dea3b75bc04f1cefbece636598c941dcf08f9c2affba2a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fb6f3996b8e344a7e98af27c6187bc71a57cdf3996ed74d84243355c6c12aa9e122081b31b2b006d995fea04011c698cde49ef692c8b8eb6b45a790b2490d17f
|
7
|
+
data.tar.gz: bc9f835b65e8bb3fd7923d697a9b98096f2d5b5b8e9f7e5c0482f0996aece512397fecc70db78426e93bb91b99a199f7b9cd20bb48c0f46b915e5e580b7f7ef5
|
data/README.md
CHANGED
@@ -8,8 +8,8 @@ or re-downloaded if corrupt. Aborted downloads are safely resumed.
|
|
8
8
|
|
9
9
|
`ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
|
10
10
|
`update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
|
11
|
-
download and checksum verification to `wget` and `md5sum` and is thus
|
12
|
-
universal.
|
11
|
+
download and checksum verification to `wget` and `md5sum` / `md5` and is thus
|
12
|
+
not as universal.
|
13
13
|
|
14
14
|
### Installation
|
15
15
|
|
data/bin/ncbi-blast-dbs
CHANGED
@@ -1,13 +1,20 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rake'
|
4
|
-
import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
|
5
4
|
|
6
5
|
trap :INT do
|
7
6
|
puts "Quitting ..."
|
8
7
|
exit!
|
9
8
|
end
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
Rake.application.
|
10
|
+
if ARGV.include? "http";
|
11
|
+
import "#{File.dirname(__FILE__)}/../lib/http-ncbi-blast-dbs.rake"
|
12
|
+
Rake.application.init 'http-ncbi-blast-dbs'
|
13
|
+
Rake.application.load_imports
|
14
|
+
Rake.application.top_level
|
15
|
+
else;
|
16
|
+
import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
|
17
|
+
Rake.application.init 'ncbi-blast-dbs'
|
18
|
+
Rake.application.load_imports
|
19
|
+
Rake.application.top_level
|
20
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
puts "using http-ncbi-dbs-dgs.rake"
|
4
|
+
# Downloads tarball at the given URL if a local copy does not exist, or if the
|
5
|
+
# local copy is older than at the given URL, or if the local copy is corrupt.
|
6
|
+
def download(url, last_to_do)
|
7
|
+
file = File.basename(url)
|
8
|
+
|
9
|
+
# # Resume an interrupted download or fetch the file for the first time. If
|
10
|
+
# # the file on the server is newer, then it is downloaded from start.
|
11
|
+
|
12
|
+
sh "wget -Nc --no-verbose #{url}"
|
13
|
+
# If the local copy is already fully retrieved, then the previous command
|
14
|
+
# ignores the timestamp. So we check with the server again if the file on
|
15
|
+
# the server is newer and if so download the new copy.
|
16
|
+
sh "wget -N --no-verbose #{url}"
|
17
|
+
sh "wget -Nc --no-verbose #{url}.md5"
|
18
|
+
sh "wget -N --no-verbose #{url}.md5"
|
19
|
+
# Immediately download md5 and verify the tarball. Re-download tarball if
|
20
|
+
# corrupt; extract otherwise.
|
21
|
+
sh "md5sum -c #{file}.md5" do |matched, _|
|
22
|
+
if !matched
|
23
|
+
sh "rm #{file} #{file}.md5"; download(url)
|
24
|
+
# too many tar instances unzipping the same file clutter the system
|
25
|
+
elsif file == last_to_do;
|
26
|
+
sh "tar xfov #{file}"
|
27
|
+
else
|
28
|
+
# at least nr and nt tarballs have identical files .?al; unsure of others
|
29
|
+
sh "tar xfov #{file} --exclude='*.?al' --exclude='taxdb*'"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def databases
|
36
|
+
method = 'https://'
|
37
|
+
host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
|
38
|
+
uri = URI.parse(method + host + "/" + dir + "/")
|
39
|
+
|
40
|
+
response = Net::HTTP.get_response(uri)
|
41
|
+
body = response.body.split
|
42
|
+
|
43
|
+
array_of_files = []
|
44
|
+
body.each do |line|
|
45
|
+
# regex takes the raw http response, matches lines such as:
|
46
|
+
# href="tsa_nt.06.tar.gz.md5">tsa_nt.06.tar.gz</a>
|
47
|
+
# Returns:
|
48
|
+
# tsa_nt.06.tar.gz
|
49
|
+
filenames_and_newlines = line[/(^href=".*">)(.*tar.gz|.*md5)(<\/a>)$/, 2]
|
50
|
+
array_of_files.append(filenames_and_newlines) unless filenames_and_newlines.nil?
|
51
|
+
end
|
52
|
+
|
53
|
+
# append the full path to file for downstream wget
|
54
|
+
array_of_files.map! { |string| "".concat("/blast/db/", string ) }
|
55
|
+
array_of_files.
|
56
|
+
map { |file| File.join(host, file) }.
|
57
|
+
select { |file| file.match(/\.tar\.gz$/) }.
|
58
|
+
group_by { |file| File.basename(file).split('.')[0] }
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
# Create user-facing task for each database to drive the download of its
|
63
|
+
# volumes in parallel.
|
64
|
+
databases.each do |name, files|
|
65
|
+
last = { name => files.last }
|
66
|
+
multitask(name => files.map { |file| task(file) { download(file, last.values.uniq) } })
|
67
|
+
end
|
68
|
+
|
69
|
+
# List name of all databases that can be downloaded if executed without
|
70
|
+
# any arguments.
|
71
|
+
task :default do
|
72
|
+
databases
|
73
|
+
puts databases.keys.push('taxdump').join(', ')
|
74
|
+
end
|
75
|
+
|
76
|
+
task :taxdump do
|
77
|
+
download('https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', "nil")
|
78
|
+
end
|
79
|
+
|
80
|
+
# Ruby being over my head, this is my quick-and-dirty way to trick it ignoring
|
81
|
+
# "http" as a task rather than a specification. Happy for an expert to fix it up!
|
82
|
+
task :http do
|
83
|
+
puts "using http method"
|
84
|
+
end
|
data/lib/ncbi-blast-dbs.rake
CHANGED
@@ -6,20 +6,32 @@ def download(url)
|
|
6
6
|
file = File.basename(url)
|
7
7
|
# Resume an interrupted download or fetch the file for the first time. If
|
8
8
|
# the file on the server is newer, then it is downloaded from start.
|
9
|
-
sh "wget -Nc #{url}"
|
9
|
+
sh "wget -Nc --no-verbose #{url}"
|
10
10
|
# If the local copy is already fully retrieved, then the previous command
|
11
11
|
# ignores the timestamp. So we check with the server again if the file on
|
12
12
|
# the server is newer and if so download the new copy.
|
13
|
-
sh "wget -N #{url}"
|
14
|
-
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
13
|
+
sh "wget -N --no-verbose #{url}"
|
14
|
+
|
15
|
+
# Download Md5
|
16
|
+
sh "wget --no-verbose #{url}.md5"
|
17
|
+
|
18
|
+
# Verify the tarball using md5sum or md5
|
19
|
+
if system("which md5sum > /dev/null")
|
20
|
+
matched = system("md5sum -c #{file}.md5")
|
21
|
+
elsif system("which md5 > /dev/null")
|
22
|
+
md5_out = %x[md5 -q #{file}].chomp
|
23
|
+
md5_actual = File.read("#{file}.md5").split[0]
|
24
|
+
matched = md5_out == md5_actual
|
25
|
+
else
|
26
|
+
puts "Cannot find md5sum or md5. Please install md5sum or md5 and try again"
|
27
|
+
exit 1
|
28
|
+
end
|
29
|
+
|
30
|
+
# Re-download tarball if corrupt; extract otherwise.
|
31
|
+
if !matched
|
32
|
+
sh "rm #{file} #{file}.md5"; download(url)
|
33
|
+
else
|
34
|
+
sh "tar xf #{file}"
|
23
35
|
end
|
24
36
|
end
|
25
37
|
|
data/ncbi-blast-dbs.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ['Anurag Priyam']
|
3
3
|
s.email = ['anurag08priyam@gmail.com']
|
4
4
|
s.name = 'ncbi-blast-dbs'
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.7'
|
6
6
|
s.summary = 'Fast download BLAST databases from NCBI.'
|
7
7
|
s.description = <<DESC
|
8
8
|
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
metadata
CHANGED
@@ -1,46 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ncbi-blast-dbs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.7
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Anurag Priyam
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2021-06-16 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rake
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '10.3'
|
22
|
-
- -
|
20
|
+
- - ">="
|
23
21
|
- !ruby/object:Gem::Version
|
24
22
|
version: 10.3.2
|
25
23
|
type: :runtime
|
26
24
|
prerelease: false
|
27
25
|
version_requirements: !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
26
|
requirements:
|
30
|
-
- - ~>
|
27
|
+
- - "~>"
|
31
28
|
- !ruby/object:Gem::Version
|
32
29
|
version: '10.3'
|
33
|
-
- -
|
30
|
+
- - ">="
|
34
31
|
- !ruby/object:Gem::Version
|
35
32
|
version: 10.3.2
|
36
|
-
description:
|
37
|
-
downloaded in
|
38
|
-
|
33
|
+
description: |
|
34
|
+
Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
|
39
35
|
parallel; number of threads to use is determined automatically. Database files
|
40
|
-
|
41
36
|
are verified and extracted upon download.
|
42
|
-
|
43
|
-
'
|
44
37
|
email:
|
45
38
|
- anurag08priyam@gmail.com
|
46
39
|
executables:
|
@@ -48,36 +41,35 @@ executables:
|
|
48
41
|
extensions: []
|
49
42
|
extra_rdoc_files: []
|
50
43
|
files:
|
51
|
-
- .ruby-version
|
44
|
+
- ".ruby-version"
|
52
45
|
- Gemfile
|
53
46
|
- LICENSE.txt
|
54
47
|
- README.md
|
55
48
|
- bin/ncbi-blast-dbs
|
49
|
+
- lib/http-ncbi-blast-dbs.rake
|
56
50
|
- lib/ncbi-blast-dbs.rake
|
57
51
|
- ncbi-blast-dbs.gemspec
|
58
52
|
homepage: http://github.com/yeban/ncbi-blast-dbs
|
59
53
|
licenses:
|
60
54
|
- MIT
|
55
|
+
metadata: {}
|
61
56
|
post_install_message:
|
62
57
|
rdoc_options: []
|
63
58
|
require_paths:
|
64
59
|
- lib
|
65
60
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
61
|
requirements:
|
68
|
-
- -
|
62
|
+
- - ">="
|
69
63
|
- !ruby/object:Gem::Version
|
70
64
|
version: '0'
|
71
65
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
-
none: false
|
73
66
|
requirements:
|
74
|
-
- -
|
67
|
+
- - ">="
|
75
68
|
- !ruby/object:Gem::Version
|
76
69
|
version: '0'
|
77
70
|
requirements: []
|
78
|
-
|
79
|
-
rubygems_version: 1.8.23.2
|
71
|
+
rubygems_version: 3.0.3
|
80
72
|
signing_key:
|
81
|
-
specification_version:
|
73
|
+
specification_version: 4
|
82
74
|
summary: Fast download BLAST databases from NCBI.
|
83
75
|
test_files: []
|