miga-base 1.3.12.2 → 1.3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf660d951441269a671b7fb57d3e91a572fdb62d5ffbaeda9ff412042798759b
4
- data.tar.gz: 1721194dcce4a70e4cad66ca4cea5db88f521323022b62c2c93740458a1b0bea
3
+ metadata.gz: 8659bae6ee7d40a76a464840a56b4aff77f9a848d777349e98d8c316717b0021
4
+ data.tar.gz: dab1cbd8ee24503b4dfbe0e8b034da195120251c258c798bbba92668a52d3cfa
5
5
  SHA512:
6
- metadata.gz: ff0b9c3a8db37fa9b0d75a0b884154b416669e068eab9f22f406facf9822345a38baccc224138a15fb5e9e1dff786e55e2ebc0d870573b892f056bf7e18e275a
7
- data.tar.gz: af7dcea647e3ead8fdbde6f917264efd93335cc9c7878bd7a4b43b8dbb27434882dff76728fa984c5724802a581421ddd333f6cfe46ab1d6c75fb4bb953bc378
6
+ metadata.gz: '09a916e1db7bb4912111e83c2d832a779a245c8eb326aee3cfcead8611d484f737a1cbcae1a1279a2673a2f31a8252c859b533b7ae9b6c74c8881b41e63e7c40'
7
+ data.tar.gz: 43e7065632d2642abb0c6bb5fac8ae96e0696c0f48b128376fa5d1ac929c1c7b787df61af78bee5a4824049da7b2baf976f9bb3d3f083184ca02deefc166114e
@@ -5,6 +5,9 @@ require 'miga/cli/action'
5
5
  require 'miga/remote_dataset'
6
6
 
7
7
  class MiGA::Cli::Action::Get < MiGA::Cli::Action
8
+ require 'miga/cli/action/download/base'
9
+ include MiGA::Cli::Action::Download::Base
10
+
8
11
  def parse_cli
9
12
  cli.defaults = {
10
13
  query: false, universe: :ncbi, db: :nuccore, get_md: false, only_md: false
@@ -69,12 +72,18 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
69
72
  '--api-key STRING',
70
73
  'API key for the given universe'
71
74
  ) { |v| cli[:api_key] = v }
75
+ opt.on(
76
+ '--ncbi-taxonomy-dump [path]',
77
+ 'Path to an NCBI Taxonomy dump directory to query instead of API calls',
78
+ 'If the path is not passed, the dump is automatically downloaded'
79
+ ) { |v| cli[:ncbi_taxonomy_dump] = v || true }
72
80
  end
73
81
  end
74
82
 
75
83
  def perform
76
84
  glob = get_sub_cli
77
85
  p = cli.load_project
86
+ load_ncbi_taxonomy_dump
78
87
  glob.each do |sub_cli|
79
88
  rd = create_remote_dataset(sub_cli, p)
80
89
  next if rd.nil?
@@ -96,9 +96,21 @@ class MiGA::RemoteDataset
96
96
  o = download_rest(opts.merge(universe: :ncbi, db: :nuccore))
97
97
  return o unless o.strip.empty?
98
98
 
99
- MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
100
- opts[:format] = :fasta
101
- ncbi_asm_get(opts)
99
+ begin
100
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download as NCBI assembly'
101
+ opts[:format] = :fasta
102
+ ncbi_asm_get(opts)
103
+ rescue => e
104
+ raise e unless opts[:obj]&.metadata&.dig(:ncbi_wgs)
105
+ MiGA::MiGA.DEBUG e.to_s
106
+ end
107
+
108
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download as WGS records'
109
+ a, b = opts[:obj].metadata[:ncbi_wgs].split('-', 2)
110
+ pref = longest_common_prefix([a, b])
111
+ rang = a[pref.size .. -1].to_i .. b[pref.size .. -1].to_i
112
+ ids = rang.map { |k| "%s%0#{a.size - pref.size}i" % [pref, k] }
113
+ download_rest(opts.merge(universe: :ncbi, db: :nuccore, ids: ids))
102
114
  end
103
115
 
104
116
  ##
@@ -184,6 +196,15 @@ class MiGA::RemoteDataset
184
196
  end
185
197
  tree
186
198
  end
199
+
200
+ ##
201
+ # From: https://github.com/isisAnchalee/Algorithms
202
+ def longest_common_prefix(strs)
203
+ return '' if strs.empty?
204
+ min, max = strs.minmax
205
+ idx = min.size.times { |i| break i if min[i] != max[i] }
206
+ min[0...idx]
207
+ end
187
208
  end
188
209
  end
189
210
 
@@ -322,9 +322,16 @@ class MiGA::RemoteDataset < MiGA::MiGA
322
322
  taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
323
323
  return taxid if taxid
324
324
 
325
- # Try from GenBank document (obtain it)
325
+ # Get GenBank document
326
326
  doc = self.class.download(:ncbi, db, ids, :gb, nil, {}, self).split(/\n/)
327
- ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
327
+
328
+ # Since we're here, try to recover WGS for synthetic records
329
+ ln = doc.grep(/^WGS\s+\S+-\S+/).first
330
+ wgs = ln&.gsub(/^WGS\s+(\S+-\S+).*/, '\1')
331
+ @metadata[:ncbi_wgs] = wgs if wgs
332
+
333
+ # Now try to extract taxid from GenBank
334
+ ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
328
335
  return nil if ln.nil?
329
336
 
330
337
  ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 12, 2].freeze
15
+ VERSION = [1.3, 13, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 3, 8)
23
+ VERSION_DATE = Date.new(2024, 3, 10)
24
24
 
25
25
  ##
26
26
  # References of MiGA
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.12.2
4
+ version: 1.3.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-08 00:00:00.000000000 Z
11
+ date: 2024-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons