miga-base 1.3.12.2 → 1.3.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf660d951441269a671b7fb57d3e91a572fdb62d5ffbaeda9ff412042798759b
4
- data.tar.gz: 1721194dcce4a70e4cad66ca4cea5db88f521323022b62c2c93740458a1b0bea
3
+ metadata.gz: 8659bae6ee7d40a76a464840a56b4aff77f9a848d777349e98d8c316717b0021
4
+ data.tar.gz: dab1cbd8ee24503b4dfbe0e8b034da195120251c258c798bbba92668a52d3cfa
5
5
  SHA512:
6
- metadata.gz: ff0b9c3a8db37fa9b0d75a0b884154b416669e068eab9f22f406facf9822345a38baccc224138a15fb5e9e1dff786e55e2ebc0d870573b892f056bf7e18e275a
7
- data.tar.gz: af7dcea647e3ead8fdbde6f917264efd93335cc9c7878bd7a4b43b8dbb27434882dff76728fa984c5724802a581421ddd333f6cfe46ab1d6c75fb4bb953bc378
6
+ metadata.gz: '09a916e1db7bb4912111e83c2d832a779a245c8eb326aee3cfcead8611d484f737a1cbcae1a1279a2673a2f31a8252c859b533b7ae9b6c74c8881b41e63e7c40'
7
+ data.tar.gz: 43e7065632d2642abb0c6bb5fac8ae96e0696c0f48b128376fa5d1ac929c1c7b787df61af78bee5a4824049da7b2baf976f9bb3d3f083184ca02deefc166114e
@@ -5,6 +5,9 @@ require 'miga/cli/action'
5
5
  require 'miga/remote_dataset'
6
6
 
7
7
  class MiGA::Cli::Action::Get < MiGA::Cli::Action
8
+ require 'miga/cli/action/download/base'
9
+ include MiGA::Cli::Action::Download::Base
10
+
8
11
  def parse_cli
9
12
  cli.defaults = {
10
13
  query: false, universe: :ncbi, db: :nuccore, get_md: false, only_md: false
@@ -69,12 +72,18 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
69
72
  '--api-key STRING',
70
73
  'API key for the given universe'
71
74
  ) { |v| cli[:api_key] = v }
75
+ opt.on(
76
+ '--ncbi-taxonomy-dump [path]',
77
+ 'Path to an NCBI Taxonomy dump directory to query instead of API calls',
78
+ 'If the path is not passed, the dump is automatically downloaded'
79
+ ) { |v| cli[:ncbi_taxonomy_dump] = v || true }
72
80
  end
73
81
  end
74
82
 
75
83
  def perform
76
84
  glob = get_sub_cli
77
85
  p = cli.load_project
86
+ load_ncbi_taxonomy_dump
78
87
  glob.each do |sub_cli|
79
88
  rd = create_remote_dataset(sub_cli, p)
80
89
  next if rd.nil?
@@ -96,9 +96,21 @@ class MiGA::RemoteDataset
96
96
  o = download_rest(opts.merge(universe: :ncbi, db: :nuccore))
97
97
  return o unless o.strip.empty?
98
98
 
99
- MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
100
- opts[:format] = :fasta
101
- ncbi_asm_get(opts)
99
+ begin
100
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download as NCBI assembly'
101
+ opts[:format] = :fasta
102
+ ncbi_asm_get(opts)
103
+ rescue => e
104
+ raise e unless opts[:obj]&.metadata&.dig(:ncbi_wgs)
105
+ MiGA::MiGA.DEBUG e.to_s
106
+ end
107
+
108
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download as WGS records'
109
+ a, b = opts[:obj].metadata[:ncbi_wgs].split('-', 2)
110
+ pref = longest_common_prefix([a, b])
111
+ rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
112
+ ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
113
+ download_rest(opts.merge(universe: :ncbi, db: :nuccore, ids: ids))
102
114
  end
103
115
 
104
116
  ##
@@ -184,6 +196,15 @@ class MiGA::RemoteDataset
184
196
  end
185
197
  tree
186
198
  end
199
+
200
+ ##
201
+ # From: https://github.com/isisAnchalee/Algorithms
202
+ def longest_common_prefix(strs)
203
+ return '' if strs.empty?
204
+ min, max = strs.minmax
205
+ idx = min.size.times { |i| break i if min[i] != max[i] }
206
+ min[0...idx]
207
+ end
187
208
  end
188
209
  end
189
210
 
@@ -322,9 +322,16 @@ class MiGA::RemoteDataset < MiGA::MiGA
322
322
  taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
323
323
  return taxid if taxid
324
324
 
325
- # Try from GenBank document (obtain it)
325
+ # Get GenBank document
326
326
  doc = self.class.download(:ncbi, db, ids, :gb, nil, {}, self).split(/\n/)
327
- ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
327
+
328
+ # Since we're here, try to recover WGS for synthetic records
329
+ ln = doc.grep(/^WGS\s+\S+-\S+/).first
330
+ wgs = ln&.gsub(/^WGS\s+(\S+-\S+).*/, '\1')
331
+ @metadata[:ncbi_wgs] = wgs if wgs
332
+
333
+ # Now try to extract taxid from GenBank
334
+ ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
328
335
  return nil if ln.nil?
329
336
 
330
337
  ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 12, 2].freeze
15
+ VERSION = [1.3, 13, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 3, 8)
23
+ VERSION_DATE = Date.new(2024, 3, 10)
24
24
 
25
25
  ##
26
26
  # References of MiGA
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.12.2
4
+ version: 1.3.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-08 00:00:00.000000000 Z
11
+ date: 2024-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons