miga-base 1.2.10.2 → 1.2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc04109959a53156953d2a20365a0af4555010b119bfcf632558d2a6c22b70e4
4
- data.tar.gz: b7aa96e71d322c590289f72d8735153f23741335c657dab8343f68835da21952
3
+ metadata.gz: 44cd40923e839969a12a0111a071a405d8cab376a0fa6e7102a21cafd33774a9
4
+ data.tar.gz: 5d1d0af6324b8c125a178b316a54e90eff2472aa7731407fc4d315a8b3fe9a36
5
5
  SHA512:
6
- metadata.gz: 5c2485fd256fd2cb2861ddf5510213aac302ec5fdb4e8429e752ea2cf2c184ba0ddb44f84f9f4077cef72be345ef0b204dcf019f7b5403c9f03974ba3ccd785d
7
- data.tar.gz: 84974ec7a124769a7fffa8bdcd703720e0f9bfb9b842d92ac8996b4cad4dbb3667a806fc164adf8e73414db15b8a083376a6ec89cf3aaf40a8773d7b9aa59f43
6
+ metadata.gz: 8f70cb028127bd30f5293005afaf73cf10c02a003cef1e027d85b3c6a2f61377e95be5962c03f6dd199f63da252160b21b7142c135a305aacb7f46b6551c90cf
7
+ data.tar.gz: 50a8676d741894e58a1b4c9b5f53ee665ff938e2814bf4570e204f26d38de7952aab1ee36c01a458021af3bba166432572d79f84f1b5fb430684bf9034506e97
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/remote_dataset'
4
+ module MiGA::Cli::Action::Download
5
+ end
6
+
7
+ ##
8
+ # Helper module including download functions for the *_get actions
9
+ module MiGA::Cli::Action::Download::Base
10
+ def cli_filters(opt)
11
+ opt.on(
12
+ '--blacklist PATH',
13
+ 'A file with dataset names to blacklist'
14
+ ) { |v| cli[:blacklist] = v }
15
+ cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
16
+ opt.on(
17
+ '--ignore-until STRING',
18
+ 'Ignores all datasets until a name is found (useful for large reruns)'
19
+ ) { |v| cli[:ignore_until] = v }
20
+ cli.opt_flag(
21
+ opt, 'get-metadata',
22
+ 'Only download and update metadata for existing datasets', :get_md
23
+ )
24
+ end
25
+
26
+ def cli_save_actions(opt)
27
+ cli.opt_flag(
28
+ opt, '--only-metadata',
29
+ 'Create datasets without input data but retrieve all metadata',
30
+ :only_md
31
+ )
32
+ opt.on(
33
+ '--save-every INT', Integer,
34
+ 'Save project every this many downloaded datasets',
35
+ 'If zero, it saves the project only once upon completion',
36
+ "By default: #{cli[:save_every]}"
37
+ ) { |v| cli[:save_every] = v }
38
+ opt.on(
39
+ '-q', '--query',
40
+ 'Register the datasets as queries, not reference datasets'
41
+ ) { |v| cli[:query] = v }
42
+ opt.on(
43
+ '-u', '--unlink',
44
+ 'Unlink all datasets in the project missing from the download list'
45
+ ) { |v| cli[:unlink] = v }
46
+ opt.on(
47
+ '-R', '--remote-list PATH',
48
+ 'Path to an output file with the list of all datasets listed remotely'
49
+ ) { |v| cli[:remote_list] = v }
50
+ end
51
+
52
+ def discard_blacklisted(ds)
53
+ unless cli[:blacklist].nil?
54
+ cli.say "Discarding datasets in #{cli[:blacklist]}"
55
+ File.readlines(cli[:blacklist])
56
+ .select { |i| i !~ /^#/ }
57
+ .map(&:chomp)
58
+ .each { |i| ds.delete i }
59
+ end
60
+ ds
61
+ end
62
+
63
+ def impose_limit(ds)
64
+ max = cli[:max_datasets].to_i
65
+ if !max.zero? && max < ds.size
66
+ cli.say "Subsampling list from #{ds.size} to #{max} datasets"
67
+ sample = ds.keys.sample(max)
68
+ ds.select! { |k, _| sample.include? k }
69
+ end
70
+ ds
71
+ end
72
+
73
+ def download_entries(ds, p)
74
+ cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
75
+ p.do_not_save = true if cli[:save_every] != 1
76
+ ignore = !cli[:ignore_until].nil?
77
+ downloaded = 0
78
+ d = []
79
+ ds.each do |name, body|
80
+ d << name
81
+ cli.puts name
82
+ ignore = false if ignore && name == cli[:ignore_until]
83
+ next if ignore || p.dataset(name).nil? == cli[:get_md]
84
+
85
+ downloaded += 1
86
+ unless cli[:dry]
87
+ save_entry(name, body, p)
88
+ p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
89
+ end
90
+ end
91
+ p.do_not_save = false
92
+ p.save! if cli[:save_every] != 1
93
+ [d, downloaded]
94
+ end
95
+
96
+ def save_entry(name, body, p)
97
+ cli.say ' Locating remote dataset'
98
+ body[:md][:metadata_only] = true if cli[:only_md]
99
+ rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
100
+ if cli[:get_md]
101
+ cli.say ' Updating dataset'
102
+ rd.update_metadata(p.dataset(name), body[:md])
103
+ else
104
+ cli.say ' Creating dataset'
105
+ rd.save_to(p, name, !cli[:query], body[:md])
106
+ cli.add_metadata(p.add_dataset(name))
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action/download/base'
4
+
5
+ ##
6
+ # Helper module including download functions for the gtdb_get action
7
+ module MiGA::Cli::Action::Download::Gtdb
8
+ include MiGA::Cli::Action::Download::Base
9
+
10
+ def cli_task_flags(opt)
11
+ cli.opt_flag(
12
+ opt, 'reference',
13
+ 'Download only reference genomes. By default: download all'
14
+ )
15
+ end
16
+
17
+ def cli_name_modifiers(opt)
18
+ opt.on(
19
+ '--no-version-name',
20
+ 'Do not add sequence version to the dataset name'
21
+ ) { |v| cli[:add_version] = v }
22
+ end
23
+
24
+ def sanitize_cli
25
+ cli.ensure_par(taxon: '-T')
26
+ cli[:save_every] = 1 if cli[:dry]
27
+ end
28
+
29
+ def remote_list
30
+ cli.say 'Downloading genome list'
31
+ extra = ['sp_reps_only=' + cli[:reference].to_s]
32
+ json = MiGA::RemoteDataset.download(
33
+ :gtdb, :taxon, cli[:taxon], :genomes, nil, extra
34
+ )
35
+ doc = MiGA::Json.parse(json, contents: true)
36
+
37
+ Hash[
38
+ doc.map do |acc|
39
+ [
40
+ remote_row_name(acc),
41
+ {
42
+ ids: [acc], db: :assembly, universe: :gtdb,
43
+ md: { type: :genome, gtdb_assembly: acc }
44
+ }
45
+ ]
46
+ end
47
+ ]
48
+ end
49
+
50
+ def remote_row_name(asm)
51
+ acc = asm.to_s
52
+ acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
53
+ acc.miga_name
54
+ end
55
+ end
@@ -1,11 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'miga/remote_dataset'
3
+ require 'miga/cli/action/download/base'
4
4
  require 'csv'
5
5
 
6
6
  ##
7
7
  # Helper module including download functions for the ncbi_get action
8
- module MiGA::Cli::Action::NcbiGet::Downloads
8
+ module MiGA::Cli::Action::Download::Ncbi
9
+ include MiGA::Cli::Action::Download::Base
10
+
9
11
  def cli_task_flags(opt)
10
12
  cli.opt_flag(
11
13
  opt, 'reference',
@@ -39,48 +41,6 @@ module MiGA::Cli::Action::NcbiGet::Downloads
39
41
  )
40
42
  end
41
43
 
42
- def cli_filters(opt)
43
- opt.on(
44
- '--blacklist PATH',
45
- 'A file with dataset names to blacklist'
46
- ) { |v| cli[:blacklist] = v }
47
- cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
48
- opt.on(
49
- '--ignore-until STRING',
50
- 'Ignores all datasets until a name is found (useful for large reruns)'
51
- ) { |v| cli[:ignore_until] = v }
52
- cli.opt_flag(
53
- opt, 'get-metadata',
54
- 'Only download and update metadata for existing datasets', :get_md
55
- )
56
- end
57
-
58
- def cli_save_actions(opt)
59
- cli.opt_flag(
60
- opt, 'only-metadata',
61
- 'Create datasets without input data but retrieve all metadata',
62
- :only_md
63
- )
64
- opt.on(
65
- '--save-every INT', Integer,
66
- 'Save project every this many downloaded datasets',
67
- 'If zero, it saves the project only once upon completion',
68
- "By default: #{cli[:save_every]}"
69
- ) { |v| cli[:save_every] = v }
70
- opt.on(
71
- '-q', '--query',
72
- 'Register the datasets as queries, not reference datasets'
73
- ) { |v| cli[:query] = v }
74
- opt.on(
75
- '-u', '--unlink',
76
- 'Unlink all datasets in the project missing from the download list'
77
- ) { |v| cli[:unlink] = v }
78
- opt.on(
79
- '-R', '--remote-list PATH',
80
- 'Path to an output file with the list of all datasets listed remotely'
81
- ) { |v| cli[:remote_list] = v }
82
- end
83
-
84
44
  def sanitize_cli
85
45
  cli.ensure_par(taxon: '-T')
86
46
  tasks = %w[reference complete chromosome scaffold contig]
@@ -169,62 +129,4 @@ module MiGA::Cli::Action::NcbiGet::Downloads
169
129
  url_param[:q] += ')'
170
130
  url_base + URI.encode_www_form(url_param)
171
131
  end
172
-
173
- def discard_blacklisted(ds)
174
- unless cli[:blacklist].nil?
175
- cli.say "Discarding datasets in #{cli[:blacklist]}"
176
- File.readlines(cli[:blacklist])
177
- .select { |i| i !~ /^#/ }
178
- .map(&:chomp)
179
- .each { |i| ds.delete i }
180
- end
181
- ds
182
- end
183
-
184
- def impose_limit(ds)
185
- max = cli[:max_datasets].to_i
186
- if !max.zero? && max < ds.size
187
- cli.say "Subsampling list from #{ds.size} to #{max} datasets"
188
- sample = ds.keys.sample(max)
189
- ds.select! { |k, _| sample.include? k }
190
- end
191
- ds
192
- end
193
-
194
- def download_entries(ds, p)
195
- cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
196
- p.do_not_save = true if cli[:save_every] != 1
197
- ignore = !cli[:ignore_until].nil?
198
- downloaded = 0
199
- d = []
200
- ds.each do |name, body|
201
- d << name
202
- cli.puts name
203
- ignore = false if ignore && name == cli[:ignore_until]
204
- next if ignore || p.dataset(name).nil? == cli[:get_md]
205
-
206
- downloaded += 1
207
- unless cli[:dry]
208
- save_entry(name, body, p)
209
- p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
210
- end
211
- end
212
- p.do_not_save = false
213
- p.save! if cli[:save_every] != 1
214
- [d, downloaded]
215
- end
216
-
217
- def save_entry(name, body, p)
218
- cli.say ' Locating remote dataset'
219
- body[:md][:metadata_only] = true if cli[:only_md]
220
- rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
221
- if cli[:get_md]
222
- cli.say ' Updating dataset'
223
- rd.update_metadata(p.dataset(name), body[:md])
224
- else
225
- cli.say ' Creating dataset'
226
- rd.save_to(p, name, !cli[:query], body[:md])
227
- cli.add_metadata(p.add_dataset(name))
228
- end
229
- end
230
132
  end
@@ -17,7 +17,8 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
17
17
  ) { |v| cli[:ids] = v }
18
18
  opt.on(
19
19
  '-U', '--universe STRING',
20
- "Universe of the remote database. By default: #{cli[:universe]}"
20
+ "Universe of the remote database. By default: #{cli[:universe]}",
21
+ "Supported: #{MiGA::RemoteDataset.UNIVERSE.keys.join(', ')}"
21
22
  ) { |v| cli[:universe] = v.to_sym }
22
23
  opt.on(
23
24
  '--db STRING',
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ class MiGA::Cli::Action::GtdbGet < MiGA::Cli::Action
6
+ require 'miga/cli/action/download/gtdb'
7
+ include MiGA::Cli::Action::Download::Gtdb
8
+
9
+ def parse_cli
10
+ cli.defaults = {
11
+ query: false, unlink: false,
12
+ reference: false, add_version: true, dry: false,
13
+ get_md: false, only_md: false, save_every: 1
14
+ }
15
+ cli.parse do |opt|
16
+ cli.opt_object(opt, [:project])
17
+ opt.on(
18
+ '-T', '--taxon STRING',
19
+ '(Mandatory) Taxon name in GTDB format (e.g., g__Escherichia)'
20
+ ) { |v| cli[:taxon] = v }
21
+ opt.on(
22
+ '--max INT', Integer,
23
+ 'Maximum number of datasets to download (by default: unlimited)'
24
+ ) { |v| cli[:max_datasets] = v }
25
+ opt.on(
26
+ '-m', '--metadata STRING',
27
+ 'Metadata as key-value pairs separated by = and delimited by comma',
28
+ 'Values are saved as strings except for booleans (true / false) or nil'
29
+ ) { |v| cli[:metadata] = v }
30
+ cli_task_flags(opt)
31
+ cli_name_modifiers(opt)
32
+ cli_filters(opt)
33
+ cli_save_actions(opt)
34
+ end
35
+ end
36
+
37
+ def perform
38
+ sanitize_cli
39
+ p = cli.load_project
40
+ ds = remote_list
41
+ ds = discard_blacklisted(ds)
42
+ ds = impose_limit(ds)
43
+ d, downloaded = download_entries(ds, p)
44
+
45
+ # Finalize
46
+ cli.say "Datasets listed: #{d.size}"
47
+ act = cli[:dry] ? 'to download' : 'downloaded'
48
+ cli.say "Datasets #{act}: #{downloaded}"
49
+ unless cli[:remote_list].nil?
50
+ File.open(cli[:remote_list], 'w') do |fh|
51
+ d.each { |i| fh.puts i }
52
+ end
53
+ end
54
+ return unless cli[:unlink]
55
+
56
+ unlink = p.dataset_names - d
57
+ unlink.each { |i| p.unlink_dataset(i).remove! }
58
+ cli.say "Datasets unlinked: #{unlink.size}"
59
+ end
60
+
61
+ end
@@ -3,8 +3,8 @@
3
3
  require 'miga/cli/action'
4
4
 
5
5
  class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
6
- require 'miga/cli/action/ncbi_get/downloads'
7
- include MiGA::Cli::Action::NcbiGet::Downloads
6
+ require 'miga/cli/action/download/ncbi'
7
+ include MiGA::Cli::Action::Download::Ncbi
8
8
 
9
9
  def parse_cli
10
10
  cli.defaults = {
data/lib/miga/cli/base.rb CHANGED
@@ -19,6 +19,7 @@ module MiGA::Cli::Base
19
19
  add: 'Create a dataset in a MiGA project',
20
20
  get: 'Download a dataset from public databases into a MiGA project',
21
21
  ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
22
+ gtdb_get: 'Download all genomes in a taxon from GTDB into a MiGA project',
22
23
  rm: 'Remove a dataset from a MiGA project',
23
24
  find: 'Find unregistered datasets based on result files',
24
25
  ln: 'Link datasets (including results) from one project to another',
@@ -384,7 +384,10 @@ module MiGA::Dataset::Result
384
384
  ##
385
385
  # Add result type +:stats+ at +base+ (no +_opts+ supported)
386
386
  def add_result_stats(base, _opts)
387
- MiGA::Result.new("#{base}.json")
387
+ add_files_to_ds_result(
388
+ MiGA::Result.new("#{base}.json"), name,
389
+ trna_list: '.trna.txt'
390
+ )
388
391
  end
389
392
 
390
393
  ##
@@ -12,6 +12,8 @@ end
12
12
 
13
13
  module MiGA::RemoteDataset::Base
14
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
15
+ @@_EBI_API = 'https://www.ebi.ac.uk/Tools'
16
+ @@_GTDB_API = 'https://api.gtdb.ecogenomic.org'
15
17
  @@_NCBI_API_KEY = lambda { |url|
16
18
  ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
17
19
  }
@@ -43,9 +45,26 @@ module MiGA::RemoteDataset::Base
43
45
  },
44
46
  ebi: {
45
47
  dbs: { embl: { stage: :assembly, format: :fasta } },
46
- url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
48
+ url: "#{@@_EBI_API}/dbfetch/dbfetch/%1$s/%2$s/%3$s",
47
49
  method: :rest
48
50
  },
51
+ gtdb: {
52
+ dbs: {
53
+ # This is a dummy entry plugged directly to +ncbi_asm_rest+
54
+ assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
55
+ # The 'taxon' namespace actually returns a list of genomes (+format+)
56
+ taxon: {
57
+ stage: :metadata, format: :genomes, map_to: [:assembly],
58
+ extra: ['sp_reps_only=false']
59
+ },
60
+ # The 'genome' namespace actually returns the taxonomy (+format+)
61
+ genome: { stage: :metadata, format: 'taxon-history' }
62
+ },
63
+ url: "#{@@_GTDB_API}/%1$s/%2$s/%3$s?%4$s",
64
+ method: :rest,
65
+ map_to_universe: :ncbi,
66
+ headers: 'accept: application/json' # < TODO not currently supported
67
+ },
49
68
  ncbi: {
50
69
  dbs: {
51
70
  nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
@@ -49,7 +49,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
49
49
  @@UNIVERSE.keys.include?(@universe) or
50
50
  raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
51
51
  @@UNIVERSE[@universe][:dbs].include?(@db) or
52
- raise "Unknown Database: #{@db}. Try: #{@@UNIVERSE[@universe][:dbs]}"
52
+ raise "Unknown Database: #{@db}. Try: #{@@UNIVERSE[@universe][:dbs].keys}"
53
53
  @_ncbi_asm_json_doc = nil
54
54
  # FIXME: Part of the +map_to+ support:
55
55
  # unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
@@ -104,6 +104,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
104
104
  when :ebi, :ncbi, :web
105
105
  # Get taxonomy
106
106
  @metadata[:tax] = get_ncbi_taxonomy
107
+ when :gtdb
108
+ # Get taxonomy
109
+ @metadata[:tax] = get_gtdb_taxonomy
107
110
  end
108
111
  @metadata = get_type_status(metadata)
109
112
  end
@@ -129,10 +132,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
129
132
  end
130
133
 
131
134
  ##
132
- # Get NCBI taxonomy as MiGA::Taxonomy.
135
+ # Get NCBI taxonomy as MiGA::Taxonomy
133
136
  def get_ncbi_taxonomy
134
- tax_id = get_ncbi_taxid
135
- return nil if tax_id.nil?
137
+ tax_id = get_ncbi_taxid or return
136
138
 
137
139
  lineage = { ns: 'ncbi' }
138
140
  doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
@@ -147,12 +149,34 @@ class MiGA::RemoteDataset < MiGA::MiGA
147
149
  MiGA::Taxonomy.new(lineage)
148
150
  end
149
151
 
152
+ ##
153
+ # Get GTDB taxonomy as MiGA::Taxonomy
154
+ def get_gtdb_taxonomy
155
+ gtdb_genome = metadata[:gtdb_assembly] or return
156
+
157
+ doc = MiGA::Json.parse(
158
+ MiGA::RemoteDataset.download(
159
+ :gtdb, :genome, gtdb_genome, 'taxon-history', nil, ['']
160
+ ),
161
+ contents: true
162
+ )
163
+ lineage = { ns: 'gtdb' }
164
+ lineage.merge!(doc.first) # Get only the latest available classification
165
+ release = lineage.delete(:release)
166
+ @metadata[:gtdb_release] = release
167
+ lineage.transform_values! { |v| v.gsub(/^\S__/, '') }
168
+ MiGA.DEBUG "Got lineage from #{release}: #{lineage}"
169
+ MiGA::Taxonomy.new(lineage)
170
+ end
171
+
150
172
  ##
151
173
  # Get the JSON document describing an NCBI assembly entry.
152
174
  def ncbi_asm_json_doc
153
175
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
154
176
 
155
- metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
177
+ if db == :assembly && %i[ncbi gtdb].include?(universe)
178
+ metadata[:ncbi_asm] ||= ids.first
179
+ end
156
180
  return nil unless metadata[:ncbi_asm]
157
181
 
158
182
  ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
data/lib/miga/taxonomy.rb CHANGED
@@ -63,6 +63,8 @@ class MiGA::Taxonomy < MiGA::MiGA
63
63
  @ranks[rank.to_sym]
64
64
  end
65
65
 
66
+ alias :fetch :[]
67
+
66
68
  ##
67
69
  # Get the alternative taxonomies.
68
70
  # - If +which+ is nil (default), returns all alternative taxonomies as Array
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 10, 2].freeze
15
+ VERSION = [1.2, 12, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2022, 12, 14)
23
+ VERSION_DATE = Date.new(2022, 12, 30)
24
24
 
25
25
  ##
26
26
  # References of MiGA
data/scripts/stats.bash CHANGED
@@ -9,7 +9,32 @@ DIR="$PROJECT/data/90.stats"
9
9
  cd "$DIR"
10
10
 
11
11
  # Initialize
12
- miga date > "$DATASET.start"
12
+ miga date > "${DATASET}.start"
13
+
14
+ # tRNAscan-SE
15
+ fa="../05.assembly/${DATASET}.LargeContigs.fna"
16
+ if [[ -s "$fa" ]] ; then
17
+ d="$(miga ls -P "$PROJECT" -D "$DATASET" -m tax:d | awk '{print $2}')"
18
+ if [[ "$d" == "Bacteria" || "$d" == "Archaea" || "$d" == "Eukaryota" ]] ; then
19
+ dom_opt="-$(echo "$d" | perl -pe 's/(\S).*/$1/')"
20
+ out="${DATASET}.trna.txt"
21
+ # `echo O` is to avoid a hang from a pre-existing output file.
22
+ # This is better than pre-checking (and removing), because it avoids
23
+ # the (unlikely) scenario of a file racing (e.g., a file created right
24
+ # before tRNAscan-SE starts, or a `rm` failure).
25
+ #
26
+ # The trailing `|| true` is to treat failure as non-fatal
27
+ echo O | tRNAscan-SE $dom_opt -o "$out" -q "$fa" || true
28
+ if [[ -s "$out" ]] ; then
29
+ cnt=$(tail -n +4 "$out" | wc -l | awk '{print $1}')
30
+ aa="$(tail -n +4 "$out" | grep -v 'pseudo$' | awk '{print $5}' \
31
+ | grep -v 'Undet' | perl -pe 's/^f?([A-Za-z]+)[0-9]?/$1/' \
32
+ | sort | uniq | wc -l | awk '{print $1}')"
33
+ miga edit -P "$PROJECT" -D "$DATASET" \
34
+ -m "trna_count=Int($cnt),trna_aa=Int($aa)"
35
+ fi
36
+ fi
37
+ fi
13
38
 
14
39
  # Calculate statistics
15
40
  for i in raw_reads trimmed_fasta assembly cds essential_genes ssu distances taxonomy ; do
@@ -20,3 +20,4 @@ Fastp (reads) fastp https://github.com/OpenGene/fastp
20
20
  Temurin (rdp) java https://adoptium.net/ Any Java VM would work
21
21
  MyTaxa (mytaxa) MyTaxa http://enve-omics.ce.gatech.edu/mytaxa
22
22
  Krona (mytaxa) ktImportText https://github.com/marbl/Krona/wiki
23
+ tRNAscan-SE tRNAscan-SE http://trna.ucsc.edu/tRNAscan-SE/ Required version: 2+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.10.2
4
+ version: 1.2.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-14 00:00:00.000000000 Z
11
+ date: 2022-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -141,6 +141,9 @@ files:
141
141
  - lib/miga/cli/action/derep_wf.rb
142
142
  - lib/miga/cli/action/doctor.rb
143
143
  - lib/miga/cli/action/doctor/base.rb
144
+ - lib/miga/cli/action/download/base.rb
145
+ - lib/miga/cli/action/download/gtdb.rb
146
+ - lib/miga/cli/action/download/ncbi.rb
144
147
  - lib/miga/cli/action/edit.rb
145
148
  - lib/miga/cli/action/env.rb
146
149
  - lib/miga/cli/action/files.rb
@@ -148,6 +151,7 @@ files:
148
151
  - lib/miga/cli/action/generic.rb
149
152
  - lib/miga/cli/action/get.rb
150
153
  - lib/miga/cli/action/get_db.rb
154
+ - lib/miga/cli/action/gtdb_get.rb
151
155
  - lib/miga/cli/action/index_wf.rb
152
156
  - lib/miga/cli/action/init.rb
153
157
  - lib/miga/cli/action/init/daemon_helper.rb
@@ -156,7 +160,6 @@ files:
156
160
  - lib/miga/cli/action/ln.rb
157
161
  - lib/miga/cli/action/ls.rb
158
162
  - lib/miga/cli/action/ncbi_get.rb
159
- - lib/miga/cli/action/ncbi_get/downloads.rb
160
163
  - lib/miga/cli/action/new.rb
161
164
  - lib/miga/cli/action/next_step.rb
162
165
  - lib/miga/cli/action/option.rb