miga-base 1.2.16.1 → 1.2.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a66176a9b04ef20b3c415e195d4acdb38c18f681b3f7ee52899d79fe0672b570
4
- data.tar.gz: d6b98ea8416dec7abd29d1255f512d5ca352efd58dd002616377b88079beb09b
3
+ metadata.gz: a33ac43db8c09ad7cad760c1bc82102ca1629beba12bea7c15d5391569210d95
4
+ data.tar.gz: 5104348e8471729c6ce59612a88e624f44d382480e313e00329e8766ead0e8a5
5
5
  SHA512:
6
- metadata.gz: dea24c16ab06cb21d0f8130cccbfe8d9edddbf96f7de9329a10218e086bcd90c25d1f672f61b94ae115e8cdc4fbf24d3febf3009d3c11f0f0e3129c134100d39
7
- data.tar.gz: 7a40aa1e7fc58efd768aec30e9e21110d2b8ab7ab938b8ccc8f6ff5737ef9b44624e926f955d05239099289407f990dcd0f13da36ae065064dbf301bbe355091
6
+ metadata.gz: fd9c726cc1da17dc2146772491a580d471739876f036368fb5a2c2fab475604f0ae858b3e02cebcb7dd22c029d01ade2010a68acb86e752a85e7723dabcebec0
7
+ data.tar.gz: 898c7b240b5d1c8e8f911f2111d9e326752cdb25b36c646aa21402c140c52187382fc9bf4cf621b5a0b3cfc715f0d596beefaaece3827822b4ad515d6e420aac
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
- The Artistic License 2.0
1
+ The Artistic License 2.0
2
2
 
3
- Copyright (c) 2016 Luis M Rodriguez-R
3
+ Copyright (c) 2016-2023 Luis M Rodriguez-R (Universität Innsbruck)
4
4
 
5
5
  Everyone is permitted to copy and distribute verbatim copies
6
6
  of this license document, but changing it is not allowed.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action/download/base'
4
+
5
+ ##
6
+ # Helper module including download functions for the seqcode_get action
7
+ module MiGA::Cli::Action::Download::Seqcode
8
+ include MiGA::Cli::Action::Download::Base
9
+
10
+ def cli_task_flags(_opt)
11
+ end
12
+
13
+ def cli_name_modifiers(opt)
14
+ opt.on(
15
+ '--no-version-name',
16
+ 'Do not add sequence version to the dataset name'
17
+ ) { |v| cli[:add_version] = v }
18
+ end
19
+
20
+ def sanitize_cli
21
+ cli[:save_every] = 1 if cli[:dry]
22
+ end
23
+
24
+ def remote_list
25
+ cli.say 'Downloading genome list'
26
+ current_page = 1
27
+ total_pages = 1
28
+ ds = {}
29
+
30
+ while current_page <= total_pages
31
+ json = MiGA::RemoteDataset.download(
32
+ :seqcode, :'type-genomes', nil, :json, nil,
33
+ ["page=#{current_page}"]
34
+ )
35
+ doc = MiGA::Json.parse(json, contents: true)
36
+ current_page = doc[:current_page] + 1
37
+ total_pages = doc[:total_pages]
38
+
39
+ doc[:values].each do |name|
40
+ next unless name[:type_material]
41
+ acc = name[:type_material].values.first
42
+ db = name[:type_material].keys.first
43
+ next unless %i[assembly nuccore].include?(db) # No INSDC genome, ignore
44
+
45
+ classif = name[:classification] || {}
46
+ tax = MiGA::Taxonomy.new(Hash[classif.map { |i| [i[:rank], i[:name]] }])
47
+ tax << { 'ns' => 'seqcode', name[:rank] => name[:name] }
48
+ d = {
49
+ ids: [acc], db: db, universe: :seqcode,
50
+ md: {
51
+ type: :genome, tax: tax, is_type: true,
52
+ type_rel: 'SeqCode type genome',
53
+ seqcode_url: "https://seqco.de/i:#{name[:id]}"
54
+ }
55
+ }
56
+ d[:md][:get_ncbi_taxonomy] = true if cli[:get_ncbi_taxonomy]
57
+ ds[remote_row_name(tax, db, acc)] = d
58
+ end
59
+ end
60
+ ds
61
+ end
62
+
63
+ def remote_row_name(tax, db, asm)
64
+ acc = asm.to_s
65
+ acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
66
+ db_short = { assembly: 'asm', nuccore: 'gb' }[db]
67
+ "#{tax.lowest[1]}_#{db_short}_#{acc}".miga_name
68
+ end
69
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ class MiGA::Cli::Action::SeqcodeGet < MiGA::Cli::Action
6
+ require 'miga/cli/action/download/seqcode'
7
+ include MiGA::Cli::Action::Download::Seqcode
8
+
9
+ def parse_cli
10
+ cli.defaults = {
11
+ query: false, unlink: false,
12
+ reference: false, add_version: true, dry: false,
13
+ get_md: false, only_md: false, save_every: 1
14
+ }
15
+ cli.parse do |opt|
16
+ cli.opt_object(opt, [:project])
17
+ cli_base_flags(opt)
18
+ opt.on(
19
+ '--ncbi-taxonomy',
20
+ 'Retrieve NCBI taxonomy instead of SeqCode taxonomy'
21
+ ) { |v| cli[:get_ncbi_taxonomy] = v }
22
+ cli_task_flags(opt)
23
+ cli_name_modifiers(opt)
24
+ cli_filters(opt)
25
+ cli_save_actions(opt)
26
+ opt.on(
27
+ '--ncbi-api-key STRING',
28
+ 'NCBI API key'
29
+ ) { |v| ENV['NCBI_API_KEY'] = v }
30
+ end
31
+ end
32
+
33
+ def perform
34
+ generic_perform
35
+ end
36
+
37
+ end
@@ -47,10 +47,15 @@ module MiGA::Cli::Action::Wf
47
47
  '--gtdb-ref',
48
48
  'Only download reference anchor genomes in GTDB (requires -G)'
49
49
  ) { |v| cli[:gtdb_ref] = v }
50
+ opt.on(
51
+ '-S', '--seqcode-type',
52
+ 'Download all type genomes from the SeqCode Registry'
53
+ ) { |v| cli[:seqcode_type] = v }
50
54
  opt.on(
51
55
  '--max-download INT', Integer,
52
- 'Maximum number of genomes to download (by default: unlimited)'
53
- ) { |v| cli[:ncbi_max] = v }
56
+ 'Maximum number of genomes to download (by default: unlimited)',
57
+ 'It applies independently to -T, -G and --S'
58
+ ) { |v| cli[:max_download] = v }
54
59
  end
55
60
  if params[:qual]
56
61
  opt.on(
@@ -131,24 +136,39 @@ module MiGA::Cli::Action::Wf
131
136
  paired = cli[:input_type].to_s.include?('_paired')
132
137
  cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
133
138
 
134
- # Create empty project
139
+ # Create empty project and populate with datasets
140
+ p = initialize_empty_project(p_metadata)
141
+ download_datasets
142
+ import_datasets(stage)
143
+
144
+ # Define datasets metadata
145
+ p.load
146
+ d_metadata[:type] = cli[:dataset_type]
147
+ p.each_dataset { |d| transfer_metadata(d, d_metadata) }
148
+ p
149
+ end
150
+
151
+ def initialize_empty_project(metadata)
135
152
  call_cli(
136
153
  ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
137
154
  ) unless MiGA::Project.exist? cli[:outdir]
138
155
 
139
156
  # Define project metadata
140
157
  p = cli.load_project(:outdir, '-o')
141
- p_metadata[:type] = cli[:project_type]
142
- transfer_metadata(p, p_metadata)
158
+ metadata[:type] = cli[:project_type]
159
+ transfer_metadata(p, metadata)
143
160
  %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
144
161
  p.set_option(i, cli[i])
145
162
  end
163
+ p
164
+ end
146
165
 
166
+ def download_datasets
147
167
  # Download datasets from NCBI
148
168
  unless cli[:ncbi_taxon].nil?
149
169
  what = cli[:ncbi_draft] ? '--all' : '--complete'
150
170
  cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
151
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
171
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
152
172
  call_cli(cmd)
153
173
  end
154
174
 
@@ -156,11 +176,19 @@ module MiGA::Cli::Action::Wf
156
176
  unless cli[:gtdb_taxon].nil?
157
177
  cmd = ['gtdb_get', '-P', cli[:outdir], '-T', cli[:gtdb_taxon]]
158
178
  cmd << '--reference' if cli[:gtdb_ref]
159
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
179
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
160
180
  call_cli(cmd)
161
181
  end
162
182
 
163
- # Add datasets
183
+ # Download datasets from SeqCode Registry
184
+ if cli[:seqcode_type]
185
+ cmd = ['seqcode_get', '-P', cli[:outdir]]
186
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
187
+ call_cli(cmd)
188
+ end
189
+ end
190
+
191
+ def import_datasets(stage)
164
192
  call_cli(
165
193
  [
166
194
  'add',
@@ -171,12 +199,6 @@ module MiGA::Cli::Action::Wf
171
199
  '-R', cli[:regexp]
172
200
  ] + cli.files
173
201
  ) unless cli.files.empty?
174
-
175
- # Define datasets metadata
176
- p.load
177
- d_metadata[:type] = cli[:dataset_type]
178
- p.each_dataset { |d| transfer_metadata(d, d_metadata) }
179
- p
180
202
  end
181
203
 
182
204
  def summarize(which = %w[cds assembly essential_genes ssu])
data/lib/miga/cli/base.rb CHANGED
@@ -20,6 +20,7 @@ module MiGA::Cli::Base
20
20
  get: 'Download a dataset from public databases into a MiGA project',
21
21
  ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
22
22
  gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
23
+ seqcode_get: 'Download all type genomes from SeqCode into a MiGA project',
23
24
  rm: 'Remove a dataset from a MiGA project',
24
25
  find: 'Find unregistered datasets based on result files',
25
26
  ln: 'Link datasets (including results) from one project to another',
@@ -15,16 +15,22 @@ module MiGA::Common::Net
15
15
  def known_hosts(name)
16
16
  case name.to_sym
17
17
  when :miga_online_ftp
18
- 'ftp://microbial-genomes.org//' # <- // to simplify chdir in connection
18
+ "ftp://#{main_server}//" # <- // to simplify chdir in connection
19
19
  when :miga_db
20
- 'ftp://microbial-genomes.org/db'
20
+ "ftp://#{main_server}/db"
21
21
  when :miga_dist
22
- 'ftp://microbial-genomes.org/dist'
22
+ "ftp://#{main_server}/dist"
23
23
  else
24
24
  raise "Unrecognized server name: #{host}"
25
25
  end
26
26
  end
27
27
 
28
+ ##
29
+ # Returns the address of the main MiGA server
30
+ def main_server
31
+ 'gatech.microbial-genomes.org'
32
+ end
33
+
28
34
  ##
29
35
  # Connect to an FTP +host+ (String) or a known host name (Symbol, see
30
36
  # +.known_hosts+)
@@ -14,6 +14,7 @@ module MiGA::RemoteDataset::Base
14
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
15
15
  @@_EBI_API = 'https://www.ebi.ac.uk/Tools'
16
16
  @@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
17
+ @@_SEQCODE_API = 'https://disc-genomics.uibk.ac.at/seqcode'
17
18
  @@_NCBI_API_KEY = lambda { |url|
18
19
  ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
19
20
  }
@@ -65,6 +66,18 @@ module MiGA::RemoteDataset::Base
65
66
  map_to_universe: :ncbi,
66
67
  headers: 'accept: application/json' # < TODO not currently supported
67
68
  },
69
+ seqcode: {
70
+ dbs: {
71
+ # These are dummy entries plugged directly to +ncbi_*_rest+
72
+ assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
73
+ nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
74
+ # This is the list of type genomes
75
+ :'type-genomes' => { stage: :metadata, format: :json }
76
+ },
77
+ url: "#{@@_SEQCODE_API}/%1$s.json?%4$s",
78
+ method: :rest,
79
+ map_to_universe: :ncbi
80
+ },
68
81
  ncbi: {
69
82
  dbs: {
70
83
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
@@ -133,6 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
133
133
  when :gtdb
134
134
  # Get taxonomy
135
135
  @metadata[:tax] = get_gtdb_taxonomy
136
+ when :seqcode
137
+ # Do nothing, taxonomy already defined
136
138
  end
137
139
 
138
140
  if metadata[:get_ncbi_taxonomy]
@@ -206,7 +208,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
206
208
  def ncbi_asm_json_doc
207
209
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
208
210
 
209
- if db == :assembly && %i[ncbi gtdb].include?(universe)
211
+ if db == :assembly && %i[ncbi gtdb seqcode].include?(universe)
210
212
  metadata[:ncbi_asm] ||= ids.first
211
213
  end
212
214
  return nil unless metadata[:ncbi_asm]
@@ -244,6 +246,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
244
246
  ln
245
247
  end
246
248
 
249
+ alias :get_ncbi_taxid_from_seqcode :get_ncbi_taxid_from_ncbi
247
250
  alias :get_ncbi_taxid_from_gtdb :get_ncbi_taxid_from_ncbi
248
251
 
249
252
  def get_ncbi_taxid_from_ebi
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 16, 1].freeze
15
+ VERSION = [1.2, 17, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2023, 2, 13)
23
+ VERSION_DATE = Date.new(2023, 2, 14)
24
24
 
25
25
  ##
26
26
  # References of MiGA
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.16.1
4
+ version: 1.2.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-13 00:00:00.000000000 Z
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -147,6 +147,7 @@ files:
147
147
  - lib/miga/cli/action/download/base.rb
148
148
  - lib/miga/cli/action/download/gtdb.rb
149
149
  - lib/miga/cli/action/download/ncbi.rb
150
+ - lib/miga/cli/action/download/seqcode.rb
150
151
  - lib/miga/cli/action/edit.rb
151
152
  - lib/miga/cli/action/env.rb
152
153
  - lib/miga/cli/action/files.rb
@@ -170,6 +171,7 @@ files:
170
171
  - lib/miga/cli/action/quality_wf.rb
171
172
  - lib/miga/cli/action/rm.rb
172
173
  - lib/miga/cli/action/run.rb
174
+ - lib/miga/cli/action/seqcode_get.rb
173
175
  - lib/miga/cli/action/stats.rb
174
176
  - lib/miga/cli/action/summary.rb
175
177
  - lib/miga/cli/action/tax_dist.rb