miga-base 1.2.16.1 → 1.2.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a66176a9b04ef20b3c415e195d4acdb38c18f681b3f7ee52899d79fe0672b570
4
- data.tar.gz: d6b98ea8416dec7abd29d1255f512d5ca352efd58dd002616377b88079beb09b
3
+ metadata.gz: a33ac43db8c09ad7cad760c1bc82102ca1629beba12bea7c15d5391569210d95
4
+ data.tar.gz: 5104348e8471729c6ce59612a88e624f44d382480e313e00329e8766ead0e8a5
5
5
  SHA512:
6
- metadata.gz: dea24c16ab06cb21d0f8130cccbfe8d9edddbf96f7de9329a10218e086bcd90c25d1f672f61b94ae115e8cdc4fbf24d3febf3009d3c11f0f0e3129c134100d39
7
- data.tar.gz: 7a40aa1e7fc58efd768aec30e9e21110d2b8ab7ab938b8ccc8f6ff5737ef9b44624e926f955d05239099289407f990dcd0f13da36ae065064dbf301bbe355091
6
+ metadata.gz: fd9c726cc1da17dc2146772491a580d471739876f036368fb5a2c2fab475604f0ae858b3e02cebcb7dd22c029d01ade2010a68acb86e752a85e7723dabcebec0
7
+ data.tar.gz: 898c7b240b5d1c8e8f911f2111d9e326752cdb25b36c646aa21402c140c52187382fc9bf4cf621b5a0b3cfc715f0d596beefaaece3827822b4ad515d6e420aac
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
- The Artistic License 2.0
1
+ The Artistic License 2.0
2
2
 
3
- Copyright (c) 2016 Luis M Rodriguez-R
3
+ Copyright (c) 2016-2023 Luis M Rodriguez-R (Universität Innsbruck)
4
4
 
5
5
  Everyone is permitted to copy and distribute verbatim copies
6
6
  of this license document, but changing it is not allowed.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action/download/base'
4
+
5
+ ##
6
+ # Helper module including download functions for the seqcode_get action
7
+ module MiGA::Cli::Action::Download::Seqcode
8
+ include MiGA::Cli::Action::Download::Base
9
+
10
+ def cli_task_flags(_opt)
11
+ end
12
+
13
+ def cli_name_modifiers(opt)
14
+ opt.on(
15
+ '--no-version-name',
16
+ 'Do not add sequence version to the dataset name'
17
+ ) { |v| cli[:add_version] = v }
18
+ end
19
+
20
+ def sanitize_cli
21
+ cli[:save_every] = 1 if cli[:dry]
22
+ end
23
+
24
+ def remote_list
25
+ cli.say 'Downloading genome list'
26
+ current_page = 1
27
+ total_pages = 1
28
+ ds = {}
29
+
30
+ while current_page <= total_pages
31
+ json = MiGA::RemoteDataset.download(
32
+ :seqcode, :'type-genomes', nil, :json, nil,
33
+ ["page=#{current_page}"]
34
+ )
35
+ doc = MiGA::Json.parse(json, contents: true)
36
+ current_page = doc[:current_page] + 1
37
+ total_pages = doc[:total_pages]
38
+
39
+ doc[:values].each do |name|
40
+ next unless name[:type_material]
41
+ acc = name[:type_material].values.first
42
+ db = name[:type_material].keys.first
43
+ next unless %i[assembly nuccore].include?(db) # No INSDC genome, ignore
44
+
45
+ classif = name[:classification] || {}
46
+ tax = MiGA::Taxonomy.new(Hash[classif.map { |i| [i[:rank], i[:name]] }])
47
+ tax << { 'ns' => 'seqcode', name[:rank] => name[:name] }
48
+ d = {
49
+ ids: [acc], db: db, universe: :seqcode,
50
+ md: {
51
+ type: :genome, tax: tax, is_type: true,
52
+ type_rel: 'SeqCode type genome',
53
+ seqcode_url: "https://seqco.de/i:#{name[:id]}"
54
+ }
55
+ }
56
+ d[:md][:get_ncbi_taxonomy] = true if cli[:get_ncbi_taxonomy]
57
+ ds[remote_row_name(tax, db, acc)] = d
58
+ end
59
+ end
60
+ ds
61
+ end
62
+
63
+ def remote_row_name(tax, db, asm)
64
+ acc = asm.to_s
65
+ acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
66
+ db_short = { assembly: 'asm', nuccore: 'gb' }[db]
67
+ "#{tax.lowest[1]}_#{db_short}_#{acc}".miga_name
68
+ end
69
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ class MiGA::Cli::Action::SeqcodeGet < MiGA::Cli::Action
6
+ require 'miga/cli/action/download/seqcode'
7
+ include MiGA::Cli::Action::Download::Seqcode
8
+
9
+ def parse_cli
10
+ cli.defaults = {
11
+ query: false, unlink: false,
12
+ reference: false, add_version: true, dry: false,
13
+ get_md: false, only_md: false, save_every: 1
14
+ }
15
+ cli.parse do |opt|
16
+ cli.opt_object(opt, [:project])
17
+ cli_base_flags(opt)
18
+ opt.on(
19
+ '--ncbi-taxonomy',
20
+ 'Retrieve NCBI taxonomy instead of SeqCode taxonomy'
21
+ ) { |v| cli[:get_ncbi_taxonomy] = v }
22
+ cli_task_flags(opt)
23
+ cli_name_modifiers(opt)
24
+ cli_filters(opt)
25
+ cli_save_actions(opt)
26
+ opt.on(
27
+ '--ncbi-api-key STRING',
28
+ 'NCBI API key'
29
+ ) { |v| ENV['NCBI_API_KEY'] = v }
30
+ end
31
+ end
32
+
33
+ def perform
34
+ generic_perform
35
+ end
36
+
37
+ end
@@ -47,10 +47,15 @@ module MiGA::Cli::Action::Wf
47
47
  '--gtdb-ref',
48
48
  'Only download reference anchor genomes in GTDB (requires -G)'
49
49
  ) { |v| cli[:gtdb_ref] = v }
50
+ opt.on(
51
+ '-S', '--seqcode-type',
52
+ 'Download all type genomes from the SeqCode Registry'
53
+ ) { |v| cli[:seqcode_type] = v }
50
54
  opt.on(
51
55
  '--max-download INT', Integer,
52
- 'Maximum number of genomes to download (by default: unlimited)'
53
- ) { |v| cli[:ncbi_max] = v }
56
+ 'Maximum number of genomes to download (by default: unlimited)',
57
+ 'It applies independently to -T, -G and --S'
58
+ ) { |v| cli[:max_download] = v }
54
59
  end
55
60
  if params[:qual]
56
61
  opt.on(
@@ -131,24 +136,39 @@ module MiGA::Cli::Action::Wf
131
136
  paired = cli[:input_type].to_s.include?('_paired')
132
137
  cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
133
138
 
134
- # Create empty project
139
+ # Create empty project and populate with datasets
140
+ p = initialize_empty_project(p_metadata)
141
+ download_datasets
142
+ import_datasets(stage)
143
+
144
+ # Define datasets metadata
145
+ p.load
146
+ d_metadata[:type] = cli[:dataset_type]
147
+ p.each_dataset { |d| transfer_metadata(d, d_metadata) }
148
+ p
149
+ end
150
+
151
+ def initialize_empty_project(metadata)
135
152
  call_cli(
136
153
  ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
137
154
  ) unless MiGA::Project.exist? cli[:outdir]
138
155
 
139
156
  # Define project metadata
140
157
  p = cli.load_project(:outdir, '-o')
141
- p_metadata[:type] = cli[:project_type]
142
- transfer_metadata(p, p_metadata)
158
+ metadata[:type] = cli[:project_type]
159
+ transfer_metadata(p, metadata)
143
160
  %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
144
161
  p.set_option(i, cli[i])
145
162
  end
163
+ p
164
+ end
146
165
 
166
+ def download_datasets
147
167
  # Download datasets from NCBI
148
168
  unless cli[:ncbi_taxon].nil?
149
169
  what = cli[:ncbi_draft] ? '--all' : '--complete'
150
170
  cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
151
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
171
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
152
172
  call_cli(cmd)
153
173
  end
154
174
 
@@ -156,11 +176,19 @@ module MiGA::Cli::Action::Wf
156
176
  unless cli[:gtdb_taxon].nil?
157
177
  cmd = ['gtdb_get', '-P', cli[:outdir], '-T', cli[:gtdb_taxon]]
158
178
  cmd << '--reference' if cli[:gtdb_ref]
159
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
179
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
160
180
  call_cli(cmd)
161
181
  end
162
182
 
163
- # Add datasets
183
+ # Download datasets from SeqCode Registry
184
+ if cli[:seqcode_type]
185
+ cmd = ['seqcode_get', '-P', cli[:outdir]]
186
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
187
+ call_cli(cmd)
188
+ end
189
+ end
190
+
191
+ def import_datasets(stage)
164
192
  call_cli(
165
193
  [
166
194
  'add',
@@ -171,12 +199,6 @@ module MiGA::Cli::Action::Wf
171
199
  '-R', cli[:regexp]
172
200
  ] + cli.files
173
201
  ) unless cli.files.empty?
174
-
175
- # Define datasets metadata
176
- p.load
177
- d_metadata[:type] = cli[:dataset_type]
178
- p.each_dataset { |d| transfer_metadata(d, d_metadata) }
179
- p
180
202
  end
181
203
 
182
204
  def summarize(which = %w[cds assembly essential_genes ssu])
data/lib/miga/cli/base.rb CHANGED
@@ -20,6 +20,7 @@ module MiGA::Cli::Base
20
20
  get: 'Download a dataset from public databases into a MiGA project',
21
21
  ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
22
22
  gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
23
+ seqcode_get: 'Download all type genomes from SeqCode into a MiGA project',
23
24
  rm: 'Remove a dataset from a MiGA project',
24
25
  find: 'Find unregistered datasets based on result files',
25
26
  ln: 'Link datasets (including results) from one project to another',
@@ -15,16 +15,22 @@ module MiGA::Common::Net
15
15
  def known_hosts(name)
16
16
  case name.to_sym
17
17
  when :miga_online_ftp
18
- 'ftp://microbial-genomes.org//' # <- // to simplify chdir in connection
18
+ "ftp://#{main_server}//" # <- // to simplify chdir in connection
19
19
  when :miga_db
20
- 'ftp://microbial-genomes.org/db'
20
+ "ftp://#{main_server}/db"
21
21
  when :miga_dist
22
- 'ftp://microbial-genomes.org/dist'
22
+ "ftp://#{main_server}/dist"
23
23
  else
24
24
  raise "Unrecognized server name: #{host}"
25
25
  end
26
26
  end
27
27
 
28
+ ##
29
+ # Returns the address of the main MiGA server
30
+ def main_server
31
+ 'gatech.microbial-genomes.org'
32
+ end
33
+
28
34
  ##
29
35
  # Connect to an FTP +host+ (String) or a known host name (Symbol, see
30
36
  # +.known_hosts+)
@@ -14,6 +14,7 @@ module MiGA::RemoteDataset::Base
14
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
15
15
  @@_EBI_API = 'https://www.ebi.ac.uk/Tools'
16
16
  @@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
17
+ @@_SEQCODE_API = 'https://disc-genomics.uibk.ac.at/seqcode'
17
18
  @@_NCBI_API_KEY = lambda { |url|
18
19
  ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
19
20
  }
@@ -65,6 +66,18 @@ module MiGA::RemoteDataset::Base
65
66
  map_to_universe: :ncbi,
66
67
  headers: 'accept: application/json' # < TODO not currently supported
67
68
  },
69
+ seqcode: {
70
+ dbs: {
71
+ # These are dummy entries plugged directly to +ncbi_*_rest+
72
+ assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
73
+ nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
74
+ # This is the list of type genomes
75
+ :'type-genomes' => { stage: :metadata, format: :json }
76
+ },
77
+ url: "#{@@_SEQCODE_API}/%1$s.json?%4$s",
78
+ method: :rest,
79
+ map_to_universe: :ncbi
80
+ },
68
81
  ncbi: {
69
82
  dbs: {
70
83
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
@@ -133,6 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
133
133
  when :gtdb
134
134
  # Get taxonomy
135
135
  @metadata[:tax] = get_gtdb_taxonomy
136
+ when :seqcode
137
+ # Do nothing, taxonomy already defined
136
138
  end
137
139
 
138
140
  if metadata[:get_ncbi_taxonomy]
@@ -206,7 +208,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
206
208
  def ncbi_asm_json_doc
207
209
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
208
210
 
209
- if db == :assembly && %i[ncbi gtdb].include?(universe)
211
+ if db == :assembly && %i[ncbi gtdb seqcode].include?(universe)
210
212
  metadata[:ncbi_asm] ||= ids.first
211
213
  end
212
214
  return nil unless metadata[:ncbi_asm]
@@ -244,6 +246,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
244
246
  ln
245
247
  end
246
248
 
249
+ alias :get_ncbi_taxid_from_seqcode :get_ncbi_taxid_from_ncbi
247
250
  alias :get_ncbi_taxid_from_gtdb :get_ncbi_taxid_from_ncbi
248
251
 
249
252
  def get_ncbi_taxid_from_ebi
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 16, 1].freeze
15
+ VERSION = [1.2, 17, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2023, 2, 13)
23
+ VERSION_DATE = Date.new(2023, 2, 14)
24
24
 
25
25
  ##
26
26
  # References of MiGA
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.16.1
4
+ version: 1.2.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-13 00:00:00.000000000 Z
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -147,6 +147,7 @@ files:
147
147
  - lib/miga/cli/action/download/base.rb
148
148
  - lib/miga/cli/action/download/gtdb.rb
149
149
  - lib/miga/cli/action/download/ncbi.rb
150
+ - lib/miga/cli/action/download/seqcode.rb
150
151
  - lib/miga/cli/action/edit.rb
151
152
  - lib/miga/cli/action/env.rb
152
153
  - lib/miga/cli/action/files.rb
@@ -170,6 +171,7 @@ files:
170
171
  - lib/miga/cli/action/quality_wf.rb
171
172
  - lib/miga/cli/action/rm.rb
172
173
  - lib/miga/cli/action/run.rb
174
+ - lib/miga/cli/action/seqcode_get.rb
173
175
  - lib/miga/cli/action/stats.rb
174
176
  - lib/miga/cli/action/summary.rb
175
177
  - lib/miga/cli/action/tax_dist.rb