miga-base 1.2.16.1 → 1.2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +2 -2
- data/lib/miga/cli/action/download/seqcode.rb +69 -0
- data/lib/miga/cli/action/seqcode_get.rb +37 -0
- data/lib/miga/cli/action/wf.rb +36 -14
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/common/net.rb +9 -3
- data/lib/miga/remote_dataset/base.rb +13 -0
- data/lib/miga/remote_dataset.rb +4 -1
- data/lib/miga/version.rb +2 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a33ac43db8c09ad7cad760c1bc82102ca1629beba12bea7c15d5391569210d95
|
4
|
+
data.tar.gz: 5104348e8471729c6ce59612a88e624f44d382480e313e00329e8766ead0e8a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd9c726cc1da17dc2146772491a580d471739876f036368fb5a2c2fab475604f0ae858b3e02cebcb7dd22c029d01ade2010a68acb86e752a85e7723dabcebec0
|
7
|
+
data.tar.gz: 898c7b240b5d1c8e8f911f2111d9e326752cdb25b36c646aa21402c140c52187382fc9bf4cf621b5a0b3cfc715f0d596beefaaece3827822b4ad515d6e420aac
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
The Artistic License 2.0
|
2
2
|
|
3
|
-
|
3
|
+
Copyright (c) 2016-2023 Luis M Rodriguez-R (Universität Innsbruck)
|
4
4
|
|
5
5
|
Everyone is permitted to copy and distribute verbatim copies
|
6
6
|
of this license document, but changing it is not allowed.
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action/download/base'
|
4
|
+
|
5
|
+
##
|
6
|
+
# Helper module including download functions for the seqcode_get action
|
7
|
+
module MiGA::Cli::Action::Download::Seqcode
|
8
|
+
include MiGA::Cli::Action::Download::Base
|
9
|
+
|
10
|
+
def cli_task_flags(_opt)
|
11
|
+
end
|
12
|
+
|
13
|
+
def cli_name_modifiers(opt)
|
14
|
+
opt.on(
|
15
|
+
'--no-version-name',
|
16
|
+
'Do not add sequence version to the dataset name'
|
17
|
+
) { |v| cli[:add_version] = v }
|
18
|
+
end
|
19
|
+
|
20
|
+
def sanitize_cli
|
21
|
+
cli[:save_every] = 1 if cli[:dry]
|
22
|
+
end
|
23
|
+
|
24
|
+
def remote_list
|
25
|
+
cli.say 'Downloading genome list'
|
26
|
+
current_page = 1
|
27
|
+
total_pages = 1
|
28
|
+
ds = {}
|
29
|
+
|
30
|
+
while current_page <= total_pages
|
31
|
+
json = MiGA::RemoteDataset.download(
|
32
|
+
:seqcode, :'type-genomes', nil, :json, nil,
|
33
|
+
["page=#{current_page}"]
|
34
|
+
)
|
35
|
+
doc = MiGA::Json.parse(json, contents: true)
|
36
|
+
current_page = doc[:current_page] + 1
|
37
|
+
total_pages = doc[:total_pages]
|
38
|
+
|
39
|
+
doc[:values].each do |name|
|
40
|
+
next unless name[:type_material]
|
41
|
+
acc = name[:type_material].values.first
|
42
|
+
db = name[:type_material].keys.first
|
43
|
+
next unless %i[assembly nuccore].include?(db) # No INSDC genome, ignore
|
44
|
+
|
45
|
+
classif = name[:classification] || {}
|
46
|
+
tax = MiGA::Taxonomy.new(Hash[classif.map { |i| [i[:rank], i[:name]] }])
|
47
|
+
tax << { 'ns' => 'seqcode', name[:rank] => name[:name] }
|
48
|
+
d = {
|
49
|
+
ids: [acc], db: db, universe: :seqcode,
|
50
|
+
md: {
|
51
|
+
type: :genome, tax: tax, is_type: true,
|
52
|
+
type_rel: 'SeqCode type genome',
|
53
|
+
seqcode_url: "https://seqco.de/i:#{name[:id]}"
|
54
|
+
}
|
55
|
+
}
|
56
|
+
d[:md][:get_ncbi_taxonomy] = true if cli[:get_ncbi_taxonomy]
|
57
|
+
ds[remote_row_name(tax, db, acc)] = d
|
58
|
+
end
|
59
|
+
end
|
60
|
+
ds
|
61
|
+
end
|
62
|
+
|
63
|
+
def remote_row_name(tax, db, asm)
|
64
|
+
acc = asm.to_s
|
65
|
+
acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
|
66
|
+
db_short = { assembly: 'asm', nuccore: 'gb' }[db]
|
67
|
+
"#{tax.lowest[1]}_#{db_short}_#{acc}".miga_name
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'miga/cli/action'
|
4
|
+
|
5
|
+
class MiGA::Cli::Action::SeqcodeGet < MiGA::Cli::Action
|
6
|
+
require 'miga/cli/action/download/seqcode'
|
7
|
+
include MiGA::Cli::Action::Download::Seqcode
|
8
|
+
|
9
|
+
def parse_cli
|
10
|
+
cli.defaults = {
|
11
|
+
query: false, unlink: false,
|
12
|
+
reference: false, add_version: true, dry: false,
|
13
|
+
get_md: false, only_md: false, save_every: 1
|
14
|
+
}
|
15
|
+
cli.parse do |opt|
|
16
|
+
cli.opt_object(opt, [:project])
|
17
|
+
cli_base_flags(opt)
|
18
|
+
opt.on(
|
19
|
+
'--ncbi-taxonomy',
|
20
|
+
'Retrieve NCBI taxonomy instead of SeqCode taxonomy'
|
21
|
+
) { |v| cli[:get_ncbi_taxonomy] = v }
|
22
|
+
cli_task_flags(opt)
|
23
|
+
cli_name_modifiers(opt)
|
24
|
+
cli_filters(opt)
|
25
|
+
cli_save_actions(opt)
|
26
|
+
opt.on(
|
27
|
+
'--ncbi-api-key STRING',
|
28
|
+
'NCBI API key'
|
29
|
+
) { |v| ENV['NCBI_API_KEY'] = v }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def perform
|
34
|
+
generic_perform
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -47,10 +47,15 @@ module MiGA::Cli::Action::Wf
|
|
47
47
|
'--gtdb-ref',
|
48
48
|
'Only download reference anchor genomes in GTDB (requires -G)'
|
49
49
|
) { |v| cli[:gtdb_ref] = v }
|
50
|
+
opt.on(
|
51
|
+
'-S', '--seqcode-type',
|
52
|
+
'Download all type genomes from the SeqCode Registry'
|
53
|
+
) { |v| cli[:seqcode_type] = v }
|
50
54
|
opt.on(
|
51
55
|
'--max-download INT', Integer,
|
52
|
-
'Maximum number of genomes to download (by default: unlimited)'
|
53
|
-
|
56
|
+
'Maximum number of genomes to download (by default: unlimited)',
|
57
|
+
'It applies independently to -T, -G and --S'
|
58
|
+
) { |v| cli[:max_download] = v }
|
54
59
|
end
|
55
60
|
if params[:qual]
|
56
61
|
opt.on(
|
@@ -131,24 +136,39 @@ module MiGA::Cli::Action::Wf
|
|
131
136
|
paired = cli[:input_type].to_s.include?('_paired')
|
132
137
|
cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
|
133
138
|
|
134
|
-
# Create empty project
|
139
|
+
# Create empty project and populate with datasets
|
140
|
+
p = initialize_empty_project(p_metadata)
|
141
|
+
download_datasets
|
142
|
+
import_datasets(stage)
|
143
|
+
|
144
|
+
# Define datasets metadata
|
145
|
+
p.load
|
146
|
+
d_metadata[:type] = cli[:dataset_type]
|
147
|
+
p.each_dataset { |d| transfer_metadata(d, d_metadata) }
|
148
|
+
p
|
149
|
+
end
|
150
|
+
|
151
|
+
def initialize_empty_project(metadata)
|
135
152
|
call_cli(
|
136
153
|
['new', '-P', cli[:outdir], '-t', cli[:project_type]]
|
137
154
|
) unless MiGA::Project.exist? cli[:outdir]
|
138
155
|
|
139
156
|
# Define project metadata
|
140
157
|
p = cli.load_project(:outdir, '-o')
|
141
|
-
|
142
|
-
transfer_metadata(p,
|
158
|
+
metadata[:type] = cli[:project_type]
|
159
|
+
transfer_metadata(p, metadata)
|
143
160
|
%i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
|
144
161
|
p.set_option(i, cli[i])
|
145
162
|
end
|
163
|
+
p
|
164
|
+
end
|
146
165
|
|
166
|
+
def download_datasets
|
147
167
|
# Download datasets from NCBI
|
148
168
|
unless cli[:ncbi_taxon].nil?
|
149
169
|
what = cli[:ncbi_draft] ? '--all' : '--complete'
|
150
170
|
cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
|
151
|
-
cmd += ['--max', cli[:
|
171
|
+
cmd += ['--max', cli[:max_download]] if cli[:max_download]
|
152
172
|
call_cli(cmd)
|
153
173
|
end
|
154
174
|
|
@@ -156,11 +176,19 @@ module MiGA::Cli::Action::Wf
|
|
156
176
|
unless cli[:gtdb_taxon].nil?
|
157
177
|
cmd = ['gtdb_get', '-P', cli[:outdir], '-T', cli[:gtdb_taxon]]
|
158
178
|
cmd << '--reference' if cli[:gtdb_ref]
|
159
|
-
cmd += ['--max', cli[:
|
179
|
+
cmd += ['--max', cli[:max_download]] if cli[:max_download]
|
160
180
|
call_cli(cmd)
|
161
181
|
end
|
162
182
|
|
163
|
-
#
|
183
|
+
# Download datasets from SeqCode Registry
|
184
|
+
if cli[:seqcode_type]
|
185
|
+
cmd = ['seqcode_get', '-P', cli[:outdir]]
|
186
|
+
cmd += ['--max', cli[:max_download]] if cli[:max_download]
|
187
|
+
call_cli(cmd)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def import_datasets(stage)
|
164
192
|
call_cli(
|
165
193
|
[
|
166
194
|
'add',
|
@@ -171,12 +199,6 @@ module MiGA::Cli::Action::Wf
|
|
171
199
|
'-R', cli[:regexp]
|
172
200
|
] + cli.files
|
173
201
|
) unless cli.files.empty?
|
174
|
-
|
175
|
-
# Define datasets metadata
|
176
|
-
p.load
|
177
|
-
d_metadata[:type] = cli[:dataset_type]
|
178
|
-
p.each_dataset { |d| transfer_metadata(d, d_metadata) }
|
179
|
-
p
|
180
202
|
end
|
181
203
|
|
182
204
|
def summarize(which = %w[cds assembly essential_genes ssu])
|
data/lib/miga/cli/base.rb
CHANGED
@@ -20,6 +20,7 @@ module MiGA::Cli::Base
|
|
20
20
|
get: 'Download a dataset from public databases into a MiGA project',
|
21
21
|
ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
|
22
22
|
gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
|
23
|
+
seqcode_get: 'Download all type genomes from SeqCode into a MiGA project',
|
23
24
|
rm: 'Remove a dataset from a MiGA project',
|
24
25
|
find: 'Find unregistered datasets based on result files',
|
25
26
|
ln: 'Link datasets (including results) from one project to another',
|
data/lib/miga/common/net.rb
CHANGED
@@ -15,16 +15,22 @@ module MiGA::Common::Net
|
|
15
15
|
def known_hosts(name)
|
16
16
|
case name.to_sym
|
17
17
|
when :miga_online_ftp
|
18
|
-
|
18
|
+
"ftp://#{main_server}//" # <- // to simplify chdir in connection
|
19
19
|
when :miga_db
|
20
|
-
|
20
|
+
"ftp://#{main_server}/db"
|
21
21
|
when :miga_dist
|
22
|
-
|
22
|
+
"ftp://#{main_server}/dist"
|
23
23
|
else
|
24
24
|
raise "Unrecognized server name: #{host}"
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
+
##
|
29
|
+
# Returns the address of the main MiGA server
|
30
|
+
def main_server
|
31
|
+
'gatech.microbial-genomes.org'
|
32
|
+
end
|
33
|
+
|
28
34
|
##
|
29
35
|
# Connect to an FTP +host+ (String) or a known host name (Symbol, see
|
30
36
|
# +.known_hosts+)
|
@@ -14,6 +14,7 @@ module MiGA::RemoteDataset::Base
|
|
14
14
|
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
15
15
|
@@_EBI_API = 'https://www.ebi.ac.uk/Tools'
|
16
16
|
@@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
|
17
|
+
@@_SEQCODE_API = 'https://disc-genomics.uibk.ac.at/seqcode'
|
17
18
|
@@_NCBI_API_KEY = lambda { |url|
|
18
19
|
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
19
20
|
}
|
@@ -65,6 +66,18 @@ module MiGA::RemoteDataset::Base
|
|
65
66
|
map_to_universe: :ncbi,
|
66
67
|
headers: 'accept: application/json' # < TODO not currently supported
|
67
68
|
},
|
69
|
+
seqcode: {
|
70
|
+
dbs: {
|
71
|
+
# These are dummy entries plugged directly to +ncbi_*_rest+
|
72
|
+
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
73
|
+
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
74
|
+
# This is the list of type genomes
|
75
|
+
:'type-genomes' => { stage: :metadata, format: :json }
|
76
|
+
},
|
77
|
+
url: "#{@@_SEQCODE_API}/%1$s.json?%4$s",
|
78
|
+
method: :rest,
|
79
|
+
map_to_universe: :ncbi
|
80
|
+
},
|
68
81
|
ncbi: {
|
69
82
|
dbs: {
|
70
83
|
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -133,6 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
133
133
|
when :gtdb
|
134
134
|
# Get taxonomy
|
135
135
|
@metadata[:tax] = get_gtdb_taxonomy
|
136
|
+
when :seqcode
|
137
|
+
# Do nothing, taxonomy already defined
|
136
138
|
end
|
137
139
|
|
138
140
|
if metadata[:get_ncbi_taxonomy]
|
@@ -206,7 +208,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
206
208
|
def ncbi_asm_json_doc
|
207
209
|
return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
|
208
210
|
|
209
|
-
if db == :assembly && %i[ncbi gtdb].include?(universe)
|
211
|
+
if db == :assembly && %i[ncbi gtdb seqcode].include?(universe)
|
210
212
|
metadata[:ncbi_asm] ||= ids.first
|
211
213
|
end
|
212
214
|
return nil unless metadata[:ncbi_asm]
|
@@ -244,6 +246,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
244
246
|
ln
|
245
247
|
end
|
246
248
|
|
249
|
+
alias :get_ncbi_taxid_from_seqcode :get_ncbi_taxid_from_ncbi
|
247
250
|
alias :get_ncbi_taxid_from_gtdb :get_ncbi_taxid_from_ncbi
|
248
251
|
|
249
252
|
def get_ncbi_taxid_from_ebi
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.2,
|
15
|
+
VERSION = [1.2, 17, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2023, 2,
|
23
|
+
VERSION_DATE = Date.new(2023, 2, 14)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -147,6 +147,7 @@ files:
|
|
147
147
|
- lib/miga/cli/action/download/base.rb
|
148
148
|
- lib/miga/cli/action/download/gtdb.rb
|
149
149
|
- lib/miga/cli/action/download/ncbi.rb
|
150
|
+
- lib/miga/cli/action/download/seqcode.rb
|
150
151
|
- lib/miga/cli/action/edit.rb
|
151
152
|
- lib/miga/cli/action/env.rb
|
152
153
|
- lib/miga/cli/action/files.rb
|
@@ -170,6 +171,7 @@ files:
|
|
170
171
|
- lib/miga/cli/action/quality_wf.rb
|
171
172
|
- lib/miga/cli/action/rm.rb
|
172
173
|
- lib/miga/cli/action/run.rb
|
174
|
+
- lib/miga/cli/action/seqcode_get.rb
|
173
175
|
- lib/miga/cli/action/stats.rb
|
174
176
|
- lib/miga/cli/action/summary.rb
|
175
177
|
- lib/miga/cli/action/tax_dist.rb
|