miga-base 1.2.10.2 → 1.2.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/base.rb +109 -0
- data/lib/miga/cli/action/download/gtdb.rb +56 -0
- data/lib/miga/cli/action/{ncbi_get/downloads.rb → download/ncbi.rb} +4 -102
- data/lib/miga/cli/action/get.rb +2 -1
- data/lib/miga/cli/action/gtdb_get.rb +61 -0
- data/lib/miga/cli/action/ncbi_get.rb +2 -2
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/remote_dataset/base.rb +20 -1
- data/lib/miga/remote_dataset.rb +29 -5
- data/lib/miga/version.rb +2 -2
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bca3141b79964a880f22ba6e4e724b4821466321942a02f6443dcb1e5ba3b6cc
|
4
|
+
data.tar.gz: 45af5251d8bced591ba493ca7b9cac5ae45d2c527f19902773a122f436b93808
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e648e5f3068348bd0064f8c3f8e3691c9c978aa9c06bb35bcf1b9a3b9a8d8eb0d1d67b5520e52537cf3ef6940414f88d24602cfaa47129fa39face66a95b375
|
7
|
+
data.tar.gz: aab0fb4e5764d76eb78a908e6997761e346d8027323bf305528ccf5abcd258b3b84d6ef356352a270b2e273e8563a28e6e9e0f6993bd7bd648ac28c4a6bce6d7
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/remote_dataset'
|
4
|
+
module MiGA::Cli::Action::Download
|
5
|
+
end
|
6
|
+
|
7
|
+
##
|
8
|
+
# Helper module including download functions for the *_get actions
|
9
|
+
module MiGA::Cli::Action::Download::Base
|
10
|
+
def cli_filters(opt)
|
11
|
+
opt.on(
|
12
|
+
'--blacklist PATH',
|
13
|
+
'A file with dataset names to blacklist'
|
14
|
+
) { |v| cli[:blacklist] = v }
|
15
|
+
cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
|
16
|
+
opt.on(
|
17
|
+
'--ignore-until STRING',
|
18
|
+
'Ignores all datasets until a name is found (useful for large reruns)'
|
19
|
+
) { |v| cli[:ignore_until] = v }
|
20
|
+
cli.opt_flag(
|
21
|
+
opt, 'get-metadata',
|
22
|
+
'Only download and update metadata for existing datasets', :get_md
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def cli_save_actions(opt)
|
27
|
+
cli.opt_flag(
|
28
|
+
opt, '--only-metadata',
|
29
|
+
'Create datasets without input data but retrieve all metadata',
|
30
|
+
:only_md
|
31
|
+
)
|
32
|
+
opt.on(
|
33
|
+
'--save-every INT', Integer,
|
34
|
+
'Save project every this many downloaded datasets',
|
35
|
+
'If zero, it saves the project only once upon completion',
|
36
|
+
"By default: #{cli[:save_every]}"
|
37
|
+
) { |v| cli[:save_every] = v }
|
38
|
+
opt.on(
|
39
|
+
'-q', '--query',
|
40
|
+
'Register the datasets as queries, not reference datasets'
|
41
|
+
) { |v| cli[:query] = v }
|
42
|
+
opt.on(
|
43
|
+
'-u', '--unlink',
|
44
|
+
'Unlink all datasets in the project missing from the download list'
|
45
|
+
) { |v| cli[:unlink] = v }
|
46
|
+
opt.on(
|
47
|
+
'-R', '--remote-list PATH',
|
48
|
+
'Path to an output file with the list of all datasets listed remotely'
|
49
|
+
) { |v| cli[:remote_list] = v }
|
50
|
+
end
|
51
|
+
|
52
|
+
def discard_blacklisted(ds)
|
53
|
+
unless cli[:blacklist].nil?
|
54
|
+
cli.say "Discarding datasets in #{cli[:blacklist]}"
|
55
|
+
File.readlines(cli[:blacklist])
|
56
|
+
.select { |i| i !~ /^#/ }
|
57
|
+
.map(&:chomp)
|
58
|
+
.each { |i| ds.delete i }
|
59
|
+
end
|
60
|
+
ds
|
61
|
+
end
|
62
|
+
|
63
|
+
def impose_limit(ds)
|
64
|
+
max = cli[:max_datasets].to_i
|
65
|
+
if !max.zero? && max < ds.size
|
66
|
+
cli.say "Subsampling list from #{ds.size} to #{max} datasets"
|
67
|
+
sample = ds.keys.sample(max)
|
68
|
+
ds.select! { |k, _| sample.include? k }
|
69
|
+
end
|
70
|
+
ds
|
71
|
+
end
|
72
|
+
|
73
|
+
def download_entries(ds, p)
|
74
|
+
cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
|
75
|
+
p.do_not_save = true if cli[:save_every] != 1
|
76
|
+
ignore = !cli[:ignore_until].nil?
|
77
|
+
downloaded = 0
|
78
|
+
d = []
|
79
|
+
ds.each do |name, body|
|
80
|
+
d << name
|
81
|
+
cli.puts name
|
82
|
+
ignore = false if ignore && name == cli[:ignore_until]
|
83
|
+
next if ignore || p.dataset(name).nil? == cli[:get_md]
|
84
|
+
|
85
|
+
downloaded += 1
|
86
|
+
unless cli[:dry]
|
87
|
+
save_entry(name, body, p)
|
88
|
+
p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
|
89
|
+
end
|
90
|
+
end
|
91
|
+
p.do_not_save = false
|
92
|
+
p.save! if cli[:save_every] != 1
|
93
|
+
[d, downloaded]
|
94
|
+
end
|
95
|
+
|
96
|
+
def save_entry(name, body, p)
|
97
|
+
cli.say ' Locating remote dataset'
|
98
|
+
body[:md][:metadata_only] = true if cli[:only_md]
|
99
|
+
rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
|
100
|
+
if cli[:get_md]
|
101
|
+
cli.say ' Updating dataset'
|
102
|
+
rd.update_metadata(p.dataset(name), body[:md])
|
103
|
+
else
|
104
|
+
cli.say ' Creating dataset'
|
105
|
+
rd.save_to(p, name, !cli[:query], body[:md])
|
106
|
+
cli.add_metadata(p.add_dataset(name))
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action/download/base'
|
4
|
+
|
5
|
+
##
|
6
|
+
# Helper module including download functions for the gtdb_get action
|
7
|
+
module MiGA::Cli::Action::Download::Gtdb
|
8
|
+
include MiGA::Cli::Action::Download::Base
|
9
|
+
|
10
|
+
def cli_task_flags(opt)
|
11
|
+
cli.opt_flag(
|
12
|
+
opt, 'reference',
|
13
|
+
'Download only reference genomes. By default: download all'
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
def cli_name_modifiers(opt)
|
18
|
+
opt.on(
|
19
|
+
'--no-version-name',
|
20
|
+
'Do not add sequence version to the dataset name'
|
21
|
+
) { |v| cli[:add_version] = v }
|
22
|
+
end
|
23
|
+
|
24
|
+
def sanitize_cli
|
25
|
+
cli.ensure_par(taxon: '-T')
|
26
|
+
cli[:save_every] = 1 if cli[:dry]
|
27
|
+
end
|
28
|
+
|
29
|
+
def remote_list
|
30
|
+
cli.say 'Downloading genome list'
|
31
|
+
ds = {}
|
32
|
+
extra = ['sp_reps_only=' + cli[:reference].to_s]
|
33
|
+
json = MiGA::RemoteDataset.download(
|
34
|
+
:gtdb, :taxon, cli[:taxon], :genomes, nil, extra
|
35
|
+
)
|
36
|
+
doc = MiGA::Json.parse(json, contents: true)
|
37
|
+
|
38
|
+
Hash[
|
39
|
+
doc.map do |acc|
|
40
|
+
[
|
41
|
+
remote_row_name(acc),
|
42
|
+
{
|
43
|
+
ids: [acc], db: :assembly, universe: :gtdb,
|
44
|
+
md: { type: :genome, gtdb_assembly: acc }
|
45
|
+
}
|
46
|
+
]
|
47
|
+
end
|
48
|
+
]
|
49
|
+
end
|
50
|
+
|
51
|
+
def remote_row_name(asm)
|
52
|
+
acc = "#{asm}"
|
53
|
+
acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
|
54
|
+
acc.miga_name
|
55
|
+
end
|
56
|
+
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'miga/
|
3
|
+
require 'miga/cli/action/download/base'
|
4
4
|
require 'csv'
|
5
5
|
|
6
6
|
##
|
7
7
|
# Helper module including download functions for the ncbi_get action
|
8
|
-
module MiGA::Cli::Action::
|
8
|
+
module MiGA::Cli::Action::Download::Ncbi
|
9
|
+
include MiGA::Cli::Action::Download::Base
|
10
|
+
|
9
11
|
def cli_task_flags(opt)
|
10
12
|
cli.opt_flag(
|
11
13
|
opt, 'reference',
|
@@ -39,48 +41,6 @@ module MiGA::Cli::Action::NcbiGet::Downloads
|
|
39
41
|
)
|
40
42
|
end
|
41
43
|
|
42
|
-
def cli_filters(opt)
|
43
|
-
opt.on(
|
44
|
-
'--blacklist PATH',
|
45
|
-
'A file with dataset names to blacklist'
|
46
|
-
) { |v| cli[:blacklist] = v }
|
47
|
-
cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
|
48
|
-
opt.on(
|
49
|
-
'--ignore-until STRING',
|
50
|
-
'Ignores all datasets until a name is found (useful for large reruns)'
|
51
|
-
) { |v| cli[:ignore_until] = v }
|
52
|
-
cli.opt_flag(
|
53
|
-
opt, 'get-metadata',
|
54
|
-
'Only download and update metadata for existing datasets', :get_md
|
55
|
-
)
|
56
|
-
end
|
57
|
-
|
58
|
-
def cli_save_actions(opt)
|
59
|
-
cli.opt_flag(
|
60
|
-
opt, 'only-metadata',
|
61
|
-
'Create datasets without input data but retrieve all metadata',
|
62
|
-
:only_md
|
63
|
-
)
|
64
|
-
opt.on(
|
65
|
-
'--save-every INT', Integer,
|
66
|
-
'Save project every this many downloaded datasets',
|
67
|
-
'If zero, it saves the project only once upon completion',
|
68
|
-
"By default: #{cli[:save_every]}"
|
69
|
-
) { |v| cli[:save_every] = v }
|
70
|
-
opt.on(
|
71
|
-
'-q', '--query',
|
72
|
-
'Register the datasets as queries, not reference datasets'
|
73
|
-
) { |v| cli[:query] = v }
|
74
|
-
opt.on(
|
75
|
-
'-u', '--unlink',
|
76
|
-
'Unlink all datasets in the project missing from the download list'
|
77
|
-
) { |v| cli[:unlink] = v }
|
78
|
-
opt.on(
|
79
|
-
'-R', '--remote-list PATH',
|
80
|
-
'Path to an output file with the list of all datasets listed remotely'
|
81
|
-
) { |v| cli[:remote_list] = v }
|
82
|
-
end
|
83
|
-
|
84
44
|
def sanitize_cli
|
85
45
|
cli.ensure_par(taxon: '-T')
|
86
46
|
tasks = %w[reference complete chromosome scaffold contig]
|
@@ -169,62 +129,4 @@ module MiGA::Cli::Action::NcbiGet::Downloads
|
|
169
129
|
url_param[:q] += ')'
|
170
130
|
url_base + URI.encode_www_form(url_param)
|
171
131
|
end
|
172
|
-
|
173
|
-
def discard_blacklisted(ds)
|
174
|
-
unless cli[:blacklist].nil?
|
175
|
-
cli.say "Discarding datasets in #{cli[:blacklist]}"
|
176
|
-
File.readlines(cli[:blacklist])
|
177
|
-
.select { |i| i !~ /^#/ }
|
178
|
-
.map(&:chomp)
|
179
|
-
.each { |i| ds.delete i }
|
180
|
-
end
|
181
|
-
ds
|
182
|
-
end
|
183
|
-
|
184
|
-
def impose_limit(ds)
|
185
|
-
max = cli[:max_datasets].to_i
|
186
|
-
if !max.zero? && max < ds.size
|
187
|
-
cli.say "Subsampling list from #{ds.size} to #{max} datasets"
|
188
|
-
sample = ds.keys.sample(max)
|
189
|
-
ds.select! { |k, _| sample.include? k }
|
190
|
-
end
|
191
|
-
ds
|
192
|
-
end
|
193
|
-
|
194
|
-
def download_entries(ds, p)
|
195
|
-
cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
|
196
|
-
p.do_not_save = true if cli[:save_every] != 1
|
197
|
-
ignore = !cli[:ignore_until].nil?
|
198
|
-
downloaded = 0
|
199
|
-
d = []
|
200
|
-
ds.each do |name, body|
|
201
|
-
d << name
|
202
|
-
cli.puts name
|
203
|
-
ignore = false if ignore && name == cli[:ignore_until]
|
204
|
-
next if ignore || p.dataset(name).nil? == cli[:get_md]
|
205
|
-
|
206
|
-
downloaded += 1
|
207
|
-
unless cli[:dry]
|
208
|
-
save_entry(name, body, p)
|
209
|
-
p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
|
210
|
-
end
|
211
|
-
end
|
212
|
-
p.do_not_save = false
|
213
|
-
p.save! if cli[:save_every] != 1
|
214
|
-
[d, downloaded]
|
215
|
-
end
|
216
|
-
|
217
|
-
def save_entry(name, body, p)
|
218
|
-
cli.say ' Locating remote dataset'
|
219
|
-
body[:md][:metadata_only] = true if cli[:only_md]
|
220
|
-
rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
|
221
|
-
if cli[:get_md]
|
222
|
-
cli.say ' Updating dataset'
|
223
|
-
rd.update_metadata(p.dataset(name), body[:md])
|
224
|
-
else
|
225
|
-
cli.say ' Creating dataset'
|
226
|
-
rd.save_to(p, name, !cli[:query], body[:md])
|
227
|
-
cli.add_metadata(p.add_dataset(name))
|
228
|
-
end
|
229
|
-
end
|
230
132
|
end
|
data/lib/miga/cli/action/get.rb
CHANGED
@@ -17,7 +17,8 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
17
17
|
) { |v| cli[:ids] = v }
|
18
18
|
opt.on(
|
19
19
|
'-U', '--universe STRING',
|
20
|
-
"Universe of the remote database. By default: #{cli[:universe]}"
|
20
|
+
"Universe of the remote database. By default: #{cli[:universe]}",
|
21
|
+
"Supported: #{MiGA::RemoteDataset.UNIVERSE.keys.join(', ')}"
|
21
22
|
) { |v| cli[:universe] = v.to_sym }
|
22
23
|
opt.on(
|
23
24
|
'--db STRING',
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action'
|
4
|
+
|
5
|
+
class MiGA::Cli::Action::GtdbGet < MiGA::Cli::Action
|
6
|
+
require 'miga/cli/action/download/gtdb'
|
7
|
+
include MiGA::Cli::Action::Download::Gtdb
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {
|
11
|
+
query: false, unlink: false,
|
12
|
+
reference: false, add_version: true, dry: false,
|
13
|
+
get_md: false, only_md: false, save_every: 1
|
14
|
+
}
|
15
|
+
cli.parse do |opt|
|
16
|
+
cli.opt_object(opt, [:project])
|
17
|
+
opt.on(
|
18
|
+
'-T', '--taxon STRING',
|
19
|
+
'(Mandatory) Taxon name in GTDB format (e.g., g__Escherichia)'
|
20
|
+
) { |v| cli[:taxon] = v }
|
21
|
+
opt.on(
|
22
|
+
'--max INT', Integer,
|
23
|
+
'Maximum number of datasets to download (by default: unlimited)'
|
24
|
+
) { |v| cli[:max_datasets] = v }
|
25
|
+
opt.on(
|
26
|
+
'-m', '--metadata STRING',
|
27
|
+
'Metadata as key-value pairs separated by = and delimited by comma',
|
28
|
+
'Values are saved as strings except for booleans (true / false) or nil'
|
29
|
+
) { |v| cli[:metadata] = v }
|
30
|
+
cli_task_flags(opt)
|
31
|
+
cli_name_modifiers(opt)
|
32
|
+
cli_filters(opt)
|
33
|
+
cli_save_actions(opt)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def perform
|
38
|
+
sanitize_cli
|
39
|
+
p = cli.load_project
|
40
|
+
ds = remote_list
|
41
|
+
ds = discard_blacklisted(ds)
|
42
|
+
ds = impose_limit(ds)
|
43
|
+
d, downloaded = download_entries(ds, p)
|
44
|
+
|
45
|
+
# Finalize
|
46
|
+
cli.say "Datasets listed: #{d.size}"
|
47
|
+
act = cli[:dry] ? 'to download' : 'downloaded'
|
48
|
+
cli.say "Datasets #{act}: #{downloaded}"
|
49
|
+
unless cli[:remote_list].nil?
|
50
|
+
File.open(cli[:remote_list], 'w') do |fh|
|
51
|
+
d.each { |i| fh.puts i }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
return unless cli[:unlink]
|
55
|
+
|
56
|
+
unlink = p.dataset_names - d
|
57
|
+
unlink.each { |i| p.unlink_dataset(i).remove! }
|
58
|
+
cli.say "Datasets unlinked: #{unlink.size}"
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
@@ -3,8 +3,8 @@
|
|
3
3
|
require 'miga/cli/action'
|
4
4
|
|
5
5
|
class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
6
|
-
require 'miga/cli/action/
|
7
|
-
include MiGA::Cli::Action::
|
6
|
+
require 'miga/cli/action/download/ncbi'
|
7
|
+
include MiGA::Cli::Action::Download::Ncbi
|
8
8
|
|
9
9
|
def parse_cli
|
10
10
|
cli.defaults = {
|
data/lib/miga/cli/base.rb
CHANGED
@@ -19,6 +19,7 @@ module MiGA::Cli::Base
|
|
19
19
|
add: 'Create a dataset in a MiGA project',
|
20
20
|
get: 'Download a dataset from public databases into a MiGA project',
|
21
21
|
ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
|
22
|
+
gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
|
22
23
|
rm: 'Remove a dataset from a MiGA project',
|
23
24
|
find: 'Find unregistered datasets based on result files',
|
24
25
|
ln: 'Link datasets (including results) from one project to another',
|
@@ -12,6 +12,8 @@ end
|
|
12
12
|
|
13
13
|
module MiGA::RemoteDataset::Base
|
14
14
|
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
15
|
+
@@_EBI_API = 'https://www.ebi.ac.uk/Tools'
|
16
|
+
@@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
|
15
17
|
@@_NCBI_API_KEY = lambda { |url|
|
16
18
|
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
17
19
|
}
|
@@ -43,9 +45,26 @@ module MiGA::RemoteDataset::Base
|
|
43
45
|
},
|
44
46
|
ebi: {
|
45
47
|
dbs: { embl: { stage: :assembly, format: :fasta } },
|
46
|
-
url:
|
48
|
+
url: "#{@@_EBI_API}/dbfetch/dbfetch/%1$s/%2$s/%3$s",
|
47
49
|
method: :rest
|
48
50
|
},
|
51
|
+
gtdb: {
|
52
|
+
dbs: {
|
53
|
+
# This is a dummy entry plugged directly to +ncbi_asm_rest+
|
54
|
+
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
55
|
+
# The 'taxon' namespace actually returns a list of genomes (+format+)
|
56
|
+
taxon: {
|
57
|
+
stage: :metadata, format: :genomes, map_to: [:assembly],
|
58
|
+
extra: ['sp_reps_only=false']
|
59
|
+
},
|
60
|
+
# The 'genome' namespace actually returns the taxonomy (+format+)
|
61
|
+
genome: { stage: :metadata, format: 'taxon-history' }
|
62
|
+
},
|
63
|
+
url: "#{@@_GTDB_API}/%1$s/%2$s/%3$s?%4$s",
|
64
|
+
method: :rest,
|
65
|
+
map_to_universe: :ncbi,
|
66
|
+
headers: 'accept: application/json' # < TODO not currently supported
|
67
|
+
},
|
49
68
|
ncbi: {
|
50
69
|
dbs: {
|
51
70
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -49,7 +49,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
49
49
|
@@UNIVERSE.keys.include?(@universe) or
|
50
50
|
raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
|
51
51
|
@@UNIVERSE[@universe][:dbs].include?(@db) or
|
52
|
-
raise "Unknown Database: #{@db}. Try: #{@@UNIVERSE[@universe][:dbs]}"
|
52
|
+
raise "Unknown Database: #{@db}. Try: #{@@UNIVERSE[@universe][:dbs].keys}"
|
53
53
|
@_ncbi_asm_json_doc = nil
|
54
54
|
# FIXME: Part of the +map_to+ support:
|
55
55
|
# unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
|
@@ -104,6 +104,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
104
104
|
when :ebi, :ncbi, :web
|
105
105
|
# Get taxonomy
|
106
106
|
@metadata[:tax] = get_ncbi_taxonomy
|
107
|
+
when :gtdb
|
108
|
+
# Get taxonomy
|
109
|
+
@metadata[:tax] = get_gtdb_taxonomy
|
107
110
|
end
|
108
111
|
@metadata = get_type_status(metadata)
|
109
112
|
end
|
@@ -129,10 +132,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
129
132
|
end
|
130
133
|
|
131
134
|
##
|
132
|
-
# Get NCBI taxonomy as MiGA::Taxonomy
|
135
|
+
# Get NCBI taxonomy as MiGA::Taxonomy
|
133
136
|
def get_ncbi_taxonomy
|
134
|
-
tax_id = get_ncbi_taxid
|
135
|
-
return nil if tax_id.nil?
|
137
|
+
tax_id = get_ncbi_taxid or return
|
136
138
|
|
137
139
|
lineage = { ns: 'ncbi' }
|
138
140
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
@@ -147,12 +149,34 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
147
149
|
MiGA::Taxonomy.new(lineage)
|
148
150
|
end
|
149
151
|
|
152
|
+
##
|
153
|
+
# Get GTDB taxonomy as MiGA::Taxonomy
|
154
|
+
def get_gtdb_taxonomy
|
155
|
+
gtdb_genome = metadata[:gtdb_assembly] or return
|
156
|
+
|
157
|
+
doc = MiGA::Json.parse(
|
158
|
+
MiGA::RemoteDataset.download(
|
159
|
+
:gtdb, :genome, gtdb_genome, 'taxon-history', nil, ['']
|
160
|
+
),
|
161
|
+
contents: true
|
162
|
+
)
|
163
|
+
lineage = { ns: 'gtdb' }
|
164
|
+
lineage.merge!(doc.first) # Get only the latest available classification
|
165
|
+
release = lineage.delete(:release)
|
166
|
+
@metadata[:gtdb_release] = release
|
167
|
+
lineage.transform_values! { |v| v.gsub(/^\S__/, '') }
|
168
|
+
MiGA.DEBUG "Got lineage from #{release}: #{lineage}"
|
169
|
+
MiGA::Taxonomy.new(lineage)
|
170
|
+
end
|
171
|
+
|
150
172
|
##
|
151
173
|
# Get the JSON document describing an NCBI assembly entry.
|
152
174
|
def ncbi_asm_json_doc
|
153
175
|
return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
|
154
176
|
|
155
|
-
|
177
|
+
if db == :assembly && %i[ncbi gtdb].include?(universe)
|
178
|
+
metadata[:ncbi_asm] ||= ids.first
|
179
|
+
end
|
156
180
|
return nil unless metadata[:ncbi_asm]
|
157
181
|
|
158
182
|
ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.2,
|
15
|
+
VERSION = [1.2, 11, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2022, 12,
|
23
|
+
VERSION_DATE = Date.new(2022, 12, 28)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -141,6 +141,9 @@ files:
|
|
141
141
|
- lib/miga/cli/action/derep_wf.rb
|
142
142
|
- lib/miga/cli/action/doctor.rb
|
143
143
|
- lib/miga/cli/action/doctor/base.rb
|
144
|
+
- lib/miga/cli/action/download/base.rb
|
145
|
+
- lib/miga/cli/action/download/gtdb.rb
|
146
|
+
- lib/miga/cli/action/download/ncbi.rb
|
144
147
|
- lib/miga/cli/action/edit.rb
|
145
148
|
- lib/miga/cli/action/env.rb
|
146
149
|
- lib/miga/cli/action/files.rb
|
@@ -148,6 +151,7 @@ files:
|
|
148
151
|
- lib/miga/cli/action/generic.rb
|
149
152
|
- lib/miga/cli/action/get.rb
|
150
153
|
- lib/miga/cli/action/get_db.rb
|
154
|
+
- lib/miga/cli/action/gtdb_get.rb
|
151
155
|
- lib/miga/cli/action/index_wf.rb
|
152
156
|
- lib/miga/cli/action/init.rb
|
153
157
|
- lib/miga/cli/action/init/daemon_helper.rb
|
@@ -156,7 +160,6 @@ files:
|
|
156
160
|
- lib/miga/cli/action/ln.rb
|
157
161
|
- lib/miga/cli/action/ls.rb
|
158
162
|
- lib/miga/cli/action/ncbi_get.rb
|
159
|
-
- lib/miga/cli/action/ncbi_get/downloads.rb
|
160
163
|
- lib/miga/cli/action/new.rb
|
161
164
|
- lib/miga/cli/action/next_step.rb
|
162
165
|
- lib/miga/cli/action/option.rb
|