miga-base 1.2.16.2 → 1.2.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 77f8bfc9a80f71c72e391d1a87f5ecadf2fd072106a9b00b59f5435e838dc1e7
4
- data.tar.gz: 91d2ea45896ced1ac520ff9e8ee64328bd4c2507875b996f1d36598fd5cb16f7
3
+ metadata.gz: a33ac43db8c09ad7cad760c1bc82102ca1629beba12bea7c15d5391569210d95
4
+ data.tar.gz: 5104348e8471729c6ce59612a88e624f44d382480e313e00329e8766ead0e8a5
5
5
  SHA512:
6
- metadata.gz: 0a637803294e81239018c73840aae43f3313d7d9fd1c9ce95ea2b47063adb0c349193479af7072aa369aea66030b40e52533f494199dc29601f11aa8393c3217
7
- data.tar.gz: 81246c2bd29b4dd1de96183ccd366b6b77f9067f3a0d138c19aba3e1b09e22f1e54d564762670e4418d388dd380ca40f8c9dc1853e65460e0484b93cac03c59a
6
+ metadata.gz: fd9c726cc1da17dc2146772491a580d471739876f036368fb5a2c2fab475604f0ae858b3e02cebcb7dd22c029d01ade2010a68acb86e752a85e7723dabcebec0
7
+ data.tar.gz: 898c7b240b5d1c8e8f911f2111d9e326752cdb25b36c646aa21402c140c52187382fc9bf4cf621b5a0b3cfc715f0d596beefaaece3827822b4ad515d6e420aac
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
- The Artistic License 2.0
1
+ The Artistic License 2.0
2
2
 
3
- Copyright (c) 2016 Luis M Rodriguez-R
3
+ Copyright (c) 2016-2023 Luis M Rodriguez-R (Universität Innsbruck)
4
4
 
5
5
  Everyone is permitted to copy and distribute verbatim copies
6
6
  of this license document, but changing it is not allowed.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action/download/base'
4
+
5
+ ##
6
+ # Helper module including download functions for the seqcode_get action
7
+ module MiGA::Cli::Action::Download::Seqcode
8
+ include MiGA::Cli::Action::Download::Base
9
+
10
+ def cli_task_flags(_opt)
11
+ end
12
+
13
+ def cli_name_modifiers(opt)
14
+ opt.on(
15
+ '--no-version-name',
16
+ 'Do not add sequence version to the dataset name'
17
+ ) { |v| cli[:add_version] = v }
18
+ end
19
+
20
+ def sanitize_cli
21
+ cli[:save_every] = 1 if cli[:dry]
22
+ end
23
+
24
+ def remote_list
25
+ cli.say 'Downloading genome list'
26
+ current_page = 1
27
+ total_pages = 1
28
+ ds = {}
29
+
30
+ while current_page <= total_pages
31
+ json = MiGA::RemoteDataset.download(
32
+ :seqcode, :'type-genomes', nil, :json, nil,
33
+ ["page=#{current_page}"]
34
+ )
35
+ doc = MiGA::Json.parse(json, contents: true)
36
+ current_page = doc[:current_page] + 1
37
+ total_pages = doc[:total_pages]
38
+
39
+ doc[:values].each do |name|
40
+ next unless name[:type_material]
41
+ acc = name[:type_material].values.first
42
+ db = name[:type_material].keys.first
43
+ next unless %i[assembly nuccore].include?(db) # No INSDC genome, ignore
44
+
45
+ classif = name[:classification] || {}
46
+ tax = MiGA::Taxonomy.new(Hash[classif.map { |i| [i[:rank], i[:name]] }])
47
+ tax << { 'ns' => 'seqcode', name[:rank] => name[:name] }
48
+ d = {
49
+ ids: [acc], db: db, universe: :seqcode,
50
+ md: {
51
+ type: :genome, tax: tax, is_type: true,
52
+ type_rel: 'SeqCode type genome',
53
+ seqcode_url: "https://seqco.de/i:#{name[:id]}"
54
+ }
55
+ }
56
+ d[:md][:get_ncbi_taxonomy] = true if cli[:get_ncbi_taxonomy]
57
+ ds[remote_row_name(tax, db, acc)] = d
58
+ end
59
+ end
60
+ ds
61
+ end
62
+
63
+ def remote_row_name(tax, db, asm)
64
+ acc = asm.to_s
65
+ acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
66
+ db_short = { assembly: 'asm', nuccore: 'gb' }[db]
67
+ "#{tax.lowest[1]}_#{db_short}_#{acc}".miga_name
68
+ end
69
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ class MiGA::Cli::Action::SeqcodeGet < MiGA::Cli::Action
6
+ require 'miga/cli/action/download/seqcode'
7
+ include MiGA::Cli::Action::Download::Seqcode
8
+
9
+ def parse_cli
10
+ cli.defaults = {
11
+ query: false, unlink: false,
12
+ reference: false, add_version: true, dry: false,
13
+ get_md: false, only_md: false, save_every: 1
14
+ }
15
+ cli.parse do |opt|
16
+ cli.opt_object(opt, [:project])
17
+ cli_base_flags(opt)
18
+ opt.on(
19
+ '--ncbi-taxonomy',
20
+ 'Retrieve NCBI taxonomy instead of SeqCode taxonomy'
21
+ ) { |v| cli[:get_ncbi_taxonomy] = v }
22
+ cli_task_flags(opt)
23
+ cli_name_modifiers(opt)
24
+ cli_filters(opt)
25
+ cli_save_actions(opt)
26
+ opt.on(
27
+ '--ncbi-api-key STRING',
28
+ 'NCBI API key'
29
+ ) { |v| ENV['NCBI_API_KEY'] = v }
30
+ end
31
+ end
32
+
33
+ def perform
34
+ generic_perform
35
+ end
36
+
37
+ end
@@ -47,10 +47,15 @@ module MiGA::Cli::Action::Wf
47
47
  '--gtdb-ref',
48
48
  'Only download reference anchor genomes in GTDB (requires -G)'
49
49
  ) { |v| cli[:gtdb_ref] = v }
50
+ opt.on(
51
+ '-S', '--seqcode-type',
52
+ 'Download all type genomes from the SeqCode Registry'
53
+ ) { |v| cli[:seqcode_type] = v }
50
54
  opt.on(
51
55
  '--max-download INT', Integer,
52
- 'Maximum number of genomes to download (by default: unlimited)'
53
- ) { |v| cli[:ncbi_max] = v }
56
+ 'Maximum number of genomes to download (by default: unlimited)',
57
+ 'It applies independently to -T, -G and --S'
58
+ ) { |v| cli[:max_download] = v }
54
59
  end
55
60
  if params[:qual]
56
61
  opt.on(
@@ -131,24 +136,39 @@ module MiGA::Cli::Action::Wf
131
136
  paired = cli[:input_type].to_s.include?('_paired')
132
137
  cli[:regexp] ||= MiGA::Cli.FILE_REGEXP(paired)
133
138
 
134
- # Create empty project
139
+ # Create empty project and populate with datasets
140
+ p = initialize_empty_project(p_metadata)
141
+ download_datasets
142
+ import_datasets(stage)
143
+
144
+ # Define datasets metadata
145
+ p.load
146
+ d_metadata[:type] = cli[:dataset_type]
147
+ p.each_dataset { |d| transfer_metadata(d, d_metadata) }
148
+ p
149
+ end
150
+
151
+ def initialize_empty_project(metadata)
135
152
  call_cli(
136
153
  ['new', '-P', cli[:outdir], '-t', cli[:project_type]]
137
154
  ) unless MiGA::Project.exist? cli[:outdir]
138
155
 
139
156
  # Define project metadata
140
157
  p = cli.load_project(:outdir, '-o')
141
- p_metadata[:type] = cli[:project_type]
142
- transfer_metadata(p, p_metadata)
158
+ metadata[:type] = cli[:project_type]
159
+ transfer_metadata(p, metadata)
143
160
  %i[haai_p aai_p ani_p ess_coll min_qual].each do |i|
144
161
  p.set_option(i, cli[i])
145
162
  end
163
+ p
164
+ end
146
165
 
166
+ def download_datasets
147
167
  # Download datasets from NCBI
148
168
  unless cli[:ncbi_taxon].nil?
149
169
  what = cli[:ncbi_draft] ? '--all' : '--complete'
150
170
  cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
151
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
171
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
152
172
  call_cli(cmd)
153
173
  end
154
174
 
@@ -156,11 +176,19 @@ module MiGA::Cli::Action::Wf
156
176
  unless cli[:gtdb_taxon].nil?
157
177
  cmd = ['gtdb_get', '-P', cli[:outdir], '-T', cli[:gtdb_taxon]]
158
178
  cmd << '--reference' if cli[:gtdb_ref]
159
- cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
179
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
160
180
  call_cli(cmd)
161
181
  end
162
182
 
163
- # Add datasets
183
+ # Download datasets from SeqCode Registry
184
+ if cli[:seqcode_type]
185
+ cmd = ['seqcode_get', '-P', cli[:outdir]]
186
+ cmd += ['--max', cli[:max_download]] if cli[:max_download]
187
+ call_cli(cmd)
188
+ end
189
+ end
190
+
191
+ def import_datasets(stage)
164
192
  call_cli(
165
193
  [
166
194
  'add',
@@ -171,12 +199,6 @@ module MiGA::Cli::Action::Wf
171
199
  '-R', cli[:regexp]
172
200
  ] + cli.files
173
201
  ) unless cli.files.empty?
174
-
175
- # Define datasets metadata
176
- p.load
177
- d_metadata[:type] = cli[:dataset_type]
178
- p.each_dataset { |d| transfer_metadata(d, d_metadata) }
179
- p
180
202
  end
181
203
 
182
204
  def summarize(which = %w[cds assembly essential_genes ssu])
data/lib/miga/cli/base.rb CHANGED
@@ -20,6 +20,7 @@ module MiGA::Cli::Base
20
20
  get: 'Download a dataset from public databases into a MiGA project',
21
21
  ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
22
22
  gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
23
+ seqcode_get: 'Download all type genomes from SeqCode into a MiGA project',
23
24
  rm: 'Remove a dataset from a MiGA project',
24
25
  find: 'Find unregistered datasets based on result files',
25
26
  ln: 'Link datasets (including results) from one project to another',
@@ -14,6 +14,7 @@ module MiGA::RemoteDataset::Base
14
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
15
15
  @@_EBI_API = 'https://www.ebi.ac.uk/Tools'
16
16
  @@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
17
+ @@_SEQCODE_API = 'https://disc-genomics.uibk.ac.at/seqcode'
17
18
  @@_NCBI_API_KEY = lambda { |url|
18
19
  ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
19
20
  }
@@ -65,6 +66,18 @@ module MiGA::RemoteDataset::Base
65
66
  map_to_universe: :ncbi,
66
67
  headers: 'accept: application/json' # < TODO not currently supported
67
68
  },
69
+ seqcode: {
70
+ dbs: {
71
+ # These are dummy entries plugged directly to +ncbi_*_rest+
72
+ assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
73
+ nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
74
+ # This is the list of type genomes
75
+ :'type-genomes' => { stage: :metadata, format: :json }
76
+ },
77
+ url: "#{@@_SEQCODE_API}/%1$s.json?%4$s",
78
+ method: :rest,
79
+ map_to_universe: :ncbi
80
+ },
68
81
  ncbi: {
69
82
  dbs: {
70
83
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
@@ -133,6 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
133
133
  when :gtdb
134
134
  # Get taxonomy
135
135
  @metadata[:tax] = get_gtdb_taxonomy
136
+ when :seqcode
137
+ # Do nothing, taxonomy already defined
136
138
  end
137
139
 
138
140
  if metadata[:get_ncbi_taxonomy]
@@ -206,7 +208,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
206
208
  def ncbi_asm_json_doc
207
209
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
208
210
 
209
- if db == :assembly && %i[ncbi gtdb].include?(universe)
211
+ if db == :assembly && %i[ncbi gtdb seqcode].include?(universe)
210
212
  metadata[:ncbi_asm] ||= ids.first
211
213
  end
212
214
  return nil unless metadata[:ncbi_asm]
@@ -244,6 +246,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
244
246
  ln
245
247
  end
246
248
 
249
+ alias :get_ncbi_taxid_from_seqcode :get_ncbi_taxid_from_ncbi
247
250
  alias :get_ncbi_taxid_from_gtdb :get_ncbi_taxid_from_ncbi
248
251
 
249
252
  def get_ncbi_taxid_from_ebi
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 16, 2].freeze
15
+ VERSION = [1.2, 17, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2023, 2, 13)
23
+ VERSION_DATE = Date.new(2023, 2, 14)
24
24
 
25
25
  ##
26
26
  # References of MiGA
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.16.2
4
+ version: 1.2.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-13 00:00:00.000000000 Z
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -147,6 +147,7 @@ files:
147
147
  - lib/miga/cli/action/download/base.rb
148
148
  - lib/miga/cli/action/download/gtdb.rb
149
149
  - lib/miga/cli/action/download/ncbi.rb
150
+ - lib/miga/cli/action/download/seqcode.rb
150
151
  - lib/miga/cli/action/edit.rb
151
152
  - lib/miga/cli/action/env.rb
152
153
  - lib/miga/cli/action/files.rb
@@ -170,6 +171,7 @@ files:
170
171
  - lib/miga/cli/action/quality_wf.rb
171
172
  - lib/miga/cli/action/rm.rb
172
173
  - lib/miga/cli/action/run.rb
174
+ - lib/miga/cli/action/seqcode_get.rb
173
175
  - lib/miga/cli/action/stats.rb
174
176
  - lib/miga/cli/action/summary.rb
175
177
  - lib/miga/cli/action/tax_dist.rb