ncbi-blast-dbs 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f2e68b292d54a54bc7963a04a9debdd55c187b4a572a12b3bab8d9aa0edbc6cf
4
+ data.tar.gz: c86d2164148df0565da00a7dfb356615ab2aef4839c3810f36fe47237aaa9b38
5
+ SHA512:
6
+ metadata.gz: f18535e773eef6a3f05907af6b6a8a7dcc29d658a35226e9e894c9dab712811a1c4cfad88336d461c374c79260c0cbfda732b85a76124158f0bac890e82e8bfd
7
+ data.tar.gz: 617a66f3796f233c134c9c6a3419b4fe4646b989d967b1c01d0e2ddcfd9b8bea16776f662a6975cf6f2f5fc51ecd741babb82746526481159b6611747f5663ee
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 1.9
1
+ 3.2.2
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.2.2
data/README.md CHANGED
@@ -8,8 +8,8 @@ or re-downloaded if corrupt. Aborted downloads are safely resumed.
8
8
 
9
9
  `ncbi-blast-dbs` is faster than NCBI's `update_blastdb.pl`. But unlike
10
10
  `update_blastdb.pl`, which is a pure Perl script, `ncbi-blast-dbs` delegates
11
- download and checksum verification to `wget` and `md5sum` and is thus not as
12
- universal.
11
+ download and checksum verification to `wget` and `md5sum` / `md5` and is thus
12
+ not as universal.
13
13
 
14
14
  ### Installation
15
15
 
data/bin/ncbi-blast-dbs CHANGED
@@ -1,13 +1,20 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rake'
4
- import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
5
4
 
6
5
  trap :INT do
7
6
  puts "Quitting ..."
8
7
  exit!
9
8
  end
10
9
 
11
- Rake.application.init 'ncbi-blast-dbs'
12
- Rake.application.load_imports
13
- Rake.application.top_level
10
+ if ARGV.include? "http";
11
+ import "#{File.dirname(__FILE__)}/../lib/http-ncbi-blast-dbs.rake"
12
+ Rake.application.init 'http-ncbi-blast-dbs'
13
+ Rake.application.load_imports
14
+ Rake.application.top_level
15
+ else;
16
+ import "#{File.dirname(__FILE__)}/../lib/ncbi-blast-dbs.rake"
17
+ Rake.application.init 'ncbi-blast-dbs'
18
+ Rake.application.load_imports
19
+ Rake.application.top_level
20
+ end
@@ -0,0 +1,84 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ puts "using http-ncbi-dbs-dgs.rake"
4
+ # Downloads tarball at the given URL if a local copy does not exist, or if the
5
+ # local copy is older than at the given URL, or if the local copy is corrupt.
6
+ def download(url, last_to_do)
7
+ file = File.basename(url)
8
+
9
+ # # Resume an interrupted download or fetch the file for the first time. If
10
+ # # the file on the server is newer, then it is downloaded from start.
11
+
12
+ sh "wget -Nc --no-verbose #{url}"
13
+ # If the local copy is already fully retrieved, then the previous command
14
+ # ignores the timestamp. So we check with the server again if the file on
15
+ # the server is newer and if so download the new copy.
16
+ sh "wget -N --no-verbose #{url}"
17
+ sh "wget -Nc --no-verbose #{url}.md5"
18
+ sh "wget -N --no-verbose #{url}.md5"
19
+ # Immediately download md5 and verify the tarball. Re-download tarball if
20
+ # corrupt; extract otherwise.
21
+ sh "md5sum -c #{file}.md5" do |matched, _|
22
+ if !matched
23
+ sh "rm #{file} #{file}.md5"; download(url)
24
+ # too many tar instances unzipping the same file clutter the system
25
+ elsif file == last_to_do;
26
+ sh "tar xfov #{file}"
27
+ else
28
+ # at least nr and nt tarballs have identical files .?al; unsure of others
29
+ sh "tar xfov #{file} --exclude='*.?al' --exclude='taxdb*'"
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ def databases
36
+ method = 'https://'
37
+ host, dir = 'ftp.ncbi.nlm.nih.gov', 'blast/db'
38
+ uri = URI.parse(method + host + "/" + dir + "/")
39
+
40
+ response = Net::HTTP.get_response(uri)
41
+ body = response.body.split
42
+
43
+ array_of_files = []
44
+ body.each do |line|
45
+ # regex takes the raw http response, matches lines such as:
46
+ # href="tsa_nt.06.tar.gz.md5">tsa_nt.06.tar.gz</a>
47
+ # Returns:
48
+ # tsa_nt.06.tar.gz
49
+ filenames_and_newlines = line[/(^href=".*">)(.*tar.gz|.*md5)(<\/a>)$/, 2]
50
+ array_of_files.append(filenames_and_newlines) unless filenames_and_newlines.nil?
51
+ end
52
+
53
+ # append the full path to file for downstream wget
54
+ array_of_files.map! { |string| "".concat("/blast/db/", string ) }
55
+ array_of_files.
56
+ map { |file| File.join(host, file) }.
57
+ select { |file| file.match(/\.tar\.gz$/) }.
58
+ group_by { |file| File.basename(file).split('.')[0] }
59
+ end
60
+
61
+
62
+ # Create user-facing task for each database to drive the download of its
63
+ # volumes in parallel.
64
+ databases.each do |name, files|
65
+ last = { name => files.last }
66
+ multitask(name => files.map { |file| task(file) { download(file, last.values.uniq) } })
67
+ end
68
+
69
+ # List name of all databases that can be downloaded if executed without
70
+ # any arguments.
71
+ task :default do
72
+ databases
73
+ puts databases.keys.push('taxdump').join(', ')
74
+ end
75
+
76
+ task :taxdump do
77
+ download('https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', "nil")
78
+ end
79
+
80
+ # Ruby being over my head, this is my quick-and-dirty way to trick it ignoring
81
+ # "http" as a task rather than a specification. Happy for an expert to fix it up!
82
+ task :http do
83
+ puts "using http method"
84
+ end
@@ -6,20 +6,32 @@ def download(url)
6
6
  file = File.basename(url)
7
7
  # Resume an interrupted download or fetch the file for the first time. If
8
8
  # the file on the server is newer, then it is downloaded from start.
9
- sh "wget -Nc #{url}"
9
+ sh "wget -Nc --no-verbose #{url}"
10
10
  # If the local copy is already fully retrieved, then the previous command
11
11
  # ignores the timestamp. So we check with the server again if the file on
12
12
  # the server is newer and if so download the new copy.
13
- sh "wget -N #{url}"
14
-
15
- # Immediately download md5 and verify the tarball. Re-download tarball if
16
- # corrupt; extract otherwise.
17
- sh "wget #{url}.md5 && md5sum -c #{file}.md5" do |matched, _|
18
- if !matched
19
- sh "rm #{file} #{file}.md5"; download(url)
20
- else
21
- sh "tar xvf #{file}"
22
- end
13
+ sh "wget -N --no-verbose #{url}"
14
+
15
+ # Download Md5
16
+ sh "wget --no-verbose #{url}.md5"
17
+
18
+ # Verify the tarball using md5sum or md5
19
+ if system("which md5sum > /dev/null")
20
+ matched = system("md5sum -c #{file}.md5")
21
+ elsif system("which md5 > /dev/null")
22
+ md5_out = %x[md5 -q #{file}].chomp
23
+ md5_actual = File.read("#{file}.md5").split[0]
24
+ matched = md5_out == md5_actual
25
+ else
26
+ puts "Cannot find md5sum or md5. Please install md5sum or md5 and try again"
27
+ exit 1
28
+ end
29
+
30
+ # Re-download tarball if corrupt; extract otherwise.
31
+ if !matched
32
+ sh "rm #{file} #{file}.md5"; download(url)
33
+ else
34
+ sh "tar xf #{file}"
23
35
  end
24
36
  end
25
37
 
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
2
2
  s.authors = ['Anurag Priyam']
3
3
  s.email = ['anurag08priyam@gmail.com']
4
4
  s.name = 'ncbi-blast-dbs'
5
- s.version = '0.0.6'
5
+ s.version = '0.1.0'
6
6
  s.summary = 'Fast download BLAST databases from NCBI.'
7
7
  s.description = <<DESC
8
8
  Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
@@ -14,6 +14,7 @@ DESC
14
14
 
15
15
  s.files = `git ls-files`.split
16
16
  s.require_paths = ['lib']
17
- s.add_dependency('rake', '~> 10.3', '>= 10.3.2')
17
+ s.add_dependency('rake', '~> 13.0')
18
+ s.add_dependency('net-ftp', '~> 0.1.3')
18
19
  s.executables = ['ncbi-blast-dbs']
19
20
  end
metadata CHANGED
@@ -1,46 +1,47 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ncbi-blast-dbs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Anurag Priyam
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2017-04-30 00:00:00.000000000 Z
11
+ date: 2024-02-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '10.3'
22
- - - ! '>='
23
- - !ruby/object:Gem::Version
24
- version: 10.3.2
19
+ version: '13.0'
25
20
  type: :runtime
26
21
  prerelease: false
27
22
  version_requirements: !ruby/object:Gem::Requirement
28
- none: false
29
23
  requirements:
30
- - - ~>
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: net-ftp
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: '10.3'
33
- - - ! '>='
33
+ version: 0.1.3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
34
39
  - !ruby/object:Gem::Version
35
- version: 10.3.2
36
- description: ! 'Downloads BLAST databases from NCBI. Database files (volumes) are
37
- downloaded in
38
-
40
+ version: 0.1.3
41
+ description: |
42
+ Downloads BLAST databases from NCBI. Database files (volumes) are downloaded in
39
43
  parallel; number of threads to use is determined automatically. Database files
40
-
41
44
  are verified and extracted upon download.
42
-
43
- '
44
45
  email:
45
46
  - anurag08priyam@gmail.com
46
47
  executables:
@@ -48,36 +49,36 @@ executables:
48
49
  extensions: []
49
50
  extra_rdoc_files: []
50
51
  files:
51
- - .ruby-version
52
+ - ".ruby-version"
53
+ - ".tool-versions"
52
54
  - Gemfile
53
55
  - LICENSE.txt
54
56
  - README.md
55
57
  - bin/ncbi-blast-dbs
58
+ - lib/http-ncbi-blast-dbs.rake
56
59
  - lib/ncbi-blast-dbs.rake
57
60
  - ncbi-blast-dbs.gemspec
58
61
  homepage: http://github.com/yeban/ncbi-blast-dbs
59
62
  licenses:
60
63
  - MIT
64
+ metadata: {}
61
65
  post_install_message:
62
66
  rdoc_options: []
63
67
  require_paths:
64
68
  - lib
65
69
  required_ruby_version: !ruby/object:Gem::Requirement
66
- none: false
67
70
  requirements:
68
- - - ! '>='
71
+ - - ">="
69
72
  - !ruby/object:Gem::Version
70
73
  version: '0'
71
74
  required_rubygems_version: !ruby/object:Gem::Requirement
72
- none: false
73
75
  requirements:
74
- - - ! '>='
76
+ - - ">="
75
77
  - !ruby/object:Gem::Version
76
78
  version: '0'
77
79
  requirements: []
78
- rubyforge_project:
79
- rubygems_version: 1.8.23.2
80
+ rubygems_version: 3.0.3.1
80
81
  signing_key:
81
- specification_version: 3
82
+ specification_version: 4
82
83
  summary: Fast download BLAST databases from NCBI.
83
84
  test_files: []