miga-base 1.0.5.2 → 1.1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6abc7091229bf09e03d261acce2ca2803bf12b2bed846e10ebbf0a41068bc53
4
- data.tar.gz: 11941d0e8e3e86b214372a8138d94899e04b8257ca82389aa2302a79a8773337
3
+ metadata.gz: 4076b3b3a4a4143ac9100ce4d58fada7615f68ad3e6174445510655f62904867
4
+ data.tar.gz: '0975a5feb4c9eb71a474be87dd14b58297ef1aa7bd8612c20f1ce65febbdf980'
5
5
  SHA512:
6
- metadata.gz: 70f9de1fb0c4db798bb0c934c450a3d300de434bf1c0519be1ec611179094542146f0e36d027eef3371eb68e1e376ef66bc42d90e6fafdfe440d78b9bf7bb6fa
7
- data.tar.gz: feda82ca950ae8d28382ff8addc1482a6907e77b303100eeeea2daf44b59efc1919d7d13db197cfc1f7a8d49868d4e9232afcaa8d5ffdebd79a52b507b832ecb
6
+ metadata.gz: ebcb7fe28d415ca9709433975585518eb1ecd8e8270c584b6579da222e4d3733cc20d810787c3f764f6a6136e1a6f09b7cb6b1c00114c3ea9c0885370654f3a7
7
+ data.tar.gz: '082bd856ed21487e5de709e2067f1d3453f824e0ece7a77716c6fbe70d88a16c4d295196d5c6133e5667142e25f55f7e48e4a785afd160a14e8195a9b7efa6c2'
data/bin/miga-env ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+
3
+ MIGA_MOD="${MIGA_HOME:-"$HOME"}/.miga_modules"
4
+ [[ -s "$MIGA_MOD" ]] && . "$MIGA_MOD"
5
+ "$(dirname "$0")/miga" env
6
+
@@ -46,7 +46,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
46
46
  dist: ['distances', 'Check distance summary tables'],
47
47
  files: ['files', 'Check for outdated files'],
48
48
  cds: ['cds', 'Check for gzipped genes and proteins'],
49
- ess: ['essential-genes', 'Check for unarchived essential genes'],
49
+ ess: ['essential-genes', 'Check for outdated essential genes'],
50
50
  mts: ['mytaxa-scan', 'Check for unarchived MyTaxa scan'],
51
51
  start: ['start', 'Check for lingering .start files'],
52
52
  tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
@@ -252,16 +252,16 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
252
252
  ##
253
253
  # Perform essential-genes operation with MiGA::Cli +cli+
254
254
  def check_ess(cli)
255
- cli.say 'Looking for unarchived essential genes'
255
+ cli.say 'Looking for outdated essential genes'
256
256
  cli.load_project.each_dataset do |d|
257
257
  res = d.result(:essential_genes)
258
258
  next if res.nil?
259
259
 
260
260
  dir = res.file_path(:collection)
261
- if dir.nil?
261
+ if dir.nil? || outdated_fastaai_ess(res)
262
262
  cli.say " > Removing #{d.name}:essential_genes"
263
263
  res.remove!
264
- sr = d.result(:stats) and sr.remove!
264
+ d.result(:stats)&.remove!
265
265
  next
266
266
  end
267
267
  next if Dir["#{dir}/*.faa"].empty?
@@ -272,6 +272,14 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
272
272
  end
273
273
  end
274
274
 
275
+ ##
276
+ # Check if the essential genes result +res+ has an outdated FastAAI index
277
+ def outdated_fastaai_ess(res)
278
+ idx1 = res.file_path(:fastaai_index)
279
+ idx2 = res.file_path(:fastaai_index_2)
280
+ idx2.nil? && !idx1.nil?
281
+ end
282
+
275
283
  ##
276
284
  # Perform mytaxa-scan operation with MiGA::Cli +cli+
277
285
  def check_mts(cli)
@@ -15,7 +15,7 @@ class MiGA::Cli::Action::Env < MiGA::Cli::Action
15
15
  . "$MIGA_HOME/.miga_rc"
16
16
  # Ensure MiGA & submodules are first in PATH
17
17
  export PATH="$MIGA/bin:$PATH"
18
- for util in enveomics/Scripts FastAAI/FastAAI multitrim ; do
18
+ for util in enveomics/Scripts FastAAI/FastAAI FastAAI multitrim ; do
19
19
  export PATH="$MIGA/utils/$util:$PATH"
20
20
  done
21
21
  BASH
@@ -181,7 +181,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
181
181
  req_libraries = {
182
182
  r: %w[ape cluster vegan],
183
183
  ruby: %w[sqlite3 daemons json],
184
- python: %w[numpy]
184
+ python: %w[numpy sqlite3]
185
185
  }
186
186
 
187
187
  req_libraries.each do |language, libraries|
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/remote_dataset'
4
+ require 'csv'
5
+
6
+ ##
7
+ # Helper module including download functions for the ncbi_get action
8
+ module MiGA::Cli::Action::NcbiGet::Downloads
9
+ def cli_task_flags(opt)
10
+ cli.opt_flag(
11
+ opt, 'reference',
12
+ 'Download all reference genomes (ignore any other status)'
13
+ )
14
+ cli.opt_flag(opt, 'complete', 'Download complete genomes')
15
+ cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
16
+ cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
17
+ cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
18
+ opt.on(
19
+ '--all',
20
+ 'Download all genomes (in any status)'
21
+ ) do
22
+ cli[:complete] = true
23
+ cli[:chromosome] = true
24
+ cli[:scaffold] = true
25
+ cli[:contig] = true
26
+ end
27
+ end
28
+
29
+ def cli_name_modifiers(opt)
30
+ opt.on(
31
+ '--no-version-name',
32
+ 'Do not add sequence version to the dataset name',
33
+ 'Only affects --complete and --chromosome'
34
+ ) { |v| cli[:add_version] = v }
35
+ cli.opt_flag(
36
+ opt, 'legacy-name',
37
+ 'Use dataset names based on chromosome entries instead of assembly',
38
+ :legacy_name
39
+ )
40
+ end
41
+
42
+ def cli_filters(opt)
43
+ opt.on(
44
+ '--blacklist PATH',
45
+ 'A file with dataset names to blacklist'
46
+ ) { |v| cli[:blacklist] = v }
47
+ cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
48
+ opt.on(
49
+ '--ignore-until STRING',
50
+ 'Ignores all datasets until a name is found (useful for large reruns)'
51
+ ) { |v| cli[:ignore_until] = v }
52
+ cli.opt_flag(
53
+ opt, 'get-metadata',
54
+ 'Only download and update metadata for existing datasets', :get_md
55
+ )
56
+ end
57
+
58
+ def cli_save_actions(opt)
59
+ cli.opt_flag(
60
+ opt, 'only-metadata',
61
+ 'Create datasets without input data but retrieve all metadata',
62
+ :only_md
63
+ )
64
+ opt.on(
65
+ '--save-every INT', Integer,
66
+ 'Save project every this many downloaded datasets',
67
+ 'If zero, it saves the project only once upon completion',
68
+ "By default: #{cli[:save_every]}"
69
+ ) { |v| cli[:save_every] = v }
70
+ opt.on(
71
+ '-q', '--query',
72
+ 'Register the datasets as queries, not reference datasets'
73
+ ) { |v| cli[:query] = v }
74
+ opt.on(
75
+ '-u', '--unlink',
76
+ 'Unlink all datasets in the project missing from the download list'
77
+ ) { |v| cli[:unlink] = v }
78
+ opt.on(
79
+ '-R', '--remote-list PATH',
80
+ 'Path to an output file with the list of all datasets listed remotely'
81
+ ) { |v| cli[:remote_list] = v }
82
+ end
83
+
84
+ def sanitize_cli
85
+ cli.ensure_par(taxon: '-T')
86
+ tasks = %w[reference complete chromosome scaffold contig]
87
+ unless tasks.any? { |i| cli[i.to_sym] }
88
+ raise 'No action requested: pick at least one type of genome'
89
+ end
90
+
91
+ cli[:save_every] = 1 if cli[:dry]
92
+ end
93
+
94
+ def remote_list
95
+ cli.say 'Downloading genome list'
96
+ ds = {}
97
+ url = remote_list_url
98
+ doc = MiGA::RemoteDataset.download_url(url)
99
+ CSV.parse(doc, headers: true).each do |r|
100
+ asm = r['assembly']
101
+ next if asm.nil? || asm.empty? || asm == '-'
102
+ next unless r['ftp_path_genbank']
103
+
104
+ rep = remote_row_replicons(r)
105
+ n = remote_row_name(r, rep, asm)
106
+
107
+ # Register for download
108
+ fna_url = '%s/%s_genomic.fna.gz' %
109
+ [r['ftp_path_genbank'], File.basename(r['ftp_path_genbank'])]
110
+ ds[n] = {
111
+ ids: [fna_url], db: :assembly_gz, universe: :web,
112
+ md: {
113
+ type: :genome, ncbi_asm: asm, strain: r['strain']
114
+ }
115
+ }
116
+ ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
117
+ unless r['release_date'].nil?
118
+ ds[n][:md][:release_date] = Time.parse(r['release_date']).to_s
119
+ end
120
+ end
121
+ ds
122
+ end
123
+
124
+ def remote_row_replicons(r)
125
+ return if r['replicons'].nil?
126
+
127
+ r['replicons']
128
+ .split('; ')
129
+ .map { |i| i.gsub(/.*:/, '') }
130
+ .map { |i| i.gsub(%r{/.*}, '') }
131
+ end
132
+
133
+ def remote_row_name(r, rep, asm)
134
+ return r['#organism'].miga_name if cli[:legacy_name] && cli[:reference]
135
+
136
+ if cli[:legacy_name] && ['Complete', ' Chromosome'].include?(r['level'])
137
+ acc = rep.nil? ? '' : rep.first
138
+ else
139
+ acc = asm
140
+ end
141
+ acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
142
+ "#{r['#organism']}_#{acc}".miga_name
143
+ end
144
+
145
+ def remote_list_url
146
+ url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
147
+ url_param = {
148
+ q: '[display()].' \
149
+ 'from(GenomeAssemblies).' \
150
+ 'usingschema(/schema/GenomeAssemblies).' \
151
+ 'matching(tab==["Prokaryotes"] and q=="' \
152
+ "#{cli[:taxon]&.tr('"', "'")}\"",
153
+ fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
154
+ 'level|level,ftp_path_genbank|ftp_path_genbank,' \
155
+ 'release_date|release_date,strain|strain',
156
+ nolimit: 'on'
157
+ }
158
+ if cli[:reference]
159
+ url_param[:q] += ' and refseq_category==["representative"]'
160
+ else
161
+ status = {
162
+ complete: 'Complete',
163
+ chromosome: ' Chromosome', # <- The leading space is *VERY* important!
164
+ scaffold: 'Scaffold',
165
+ contig: 'Contig'
166
+ }.map { |k, v| '"' + v + '"' if cli[k] }.compact.join(',')
167
+ url_param[:q] += ' and level==[' + status + ']'
168
+ end
169
+ url_param[:q] += ')'
170
+ url_base + URI.encode_www_form(url_param)
171
+ end
172
+
173
+ def discard_blacklisted(ds)
174
+ unless cli[:blacklist].nil?
175
+ cli.say "Discarding datasets in #{cli[:blacklist]}"
176
+ File.readlines(cli[:blacklist])
177
+ .select { |i| i !~ /^#/ }
178
+ .map(&:chomp)
179
+ .each { |i| ds.delete i }
180
+ end
181
+ ds
182
+ end
183
+
184
+ def impose_limit(ds)
185
+ max = cli[:max_datasets].to_i
186
+ if !max.zero? && max < ds.size
187
+ cli.say "Subsampling list from #{ds.size} to #{max} datasets"
188
+ sample = ds.keys.sample(max)
189
+ ds.select! { |k, _| sample.include? k }
190
+ end
191
+ ds
192
+ end
193
+
194
+ def download_entries(ds, p)
195
+ cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
196
+ p.do_not_save = true if cli[:save_every] != 1
197
+ ignore = !cli[:ignore_until].nil?
198
+ downloaded = 0
199
+ d = []
200
+ ds.each do |name, body|
201
+ d << name
202
+ cli.puts name
203
+ ignore = false if ignore && name == cli[:ignore_until]
204
+ next if ignore || p.dataset(name).nil? == cli[:get_md]
205
+
206
+ downloaded += 1
207
+ unless cli[:dry]
208
+ save_entry(name, body, p)
209
+ p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
210
+ end
211
+ end
212
+ p.do_not_save = false
213
+ p.save! if cli[:save_every] != 1
214
+ [d, downloaded]
215
+ end
216
+
217
+ def save_entry(name, body, p)
218
+ cli.say ' Locating remote dataset'
219
+ body[:md][:metadata_only] = true if cli[:only_md]
220
+ rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
221
+ if cli[:get_md]
222
+ cli.say ' Updating dataset'
223
+ rd.update_metadata(p.dataset(name), body[:md])
224
+ else
225
+ cli.say ' Creating dataset'
226
+ rd.save_to(p, name, !cli[:query], body[:md])
227
+ cli.add_metadata(p.add_dataset(name))
228
+ end
229
+ end
230
+ end
@@ -1,11 +1,11 @@
1
- # @package MiGA
2
- # @license Artistic-2.0
1
+ # frozen_string_literal: true
3
2
 
4
3
  require 'miga/cli/action'
5
- require 'miga/remote_dataset'
6
- require 'csv'
7
4
 
8
5
  class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
6
+ require 'miga/cli/action/ncbi_get/downloads'
7
+ include MiGA::Cli::Action::NcbiGet::Downloads
8
+
9
9
  def parse_cli
10
10
  cli.defaults = {
11
11
  query: false, unlink: false,
@@ -20,6 +20,10 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
20
20
  '-T', '--taxon STRING',
21
21
  '(Mandatory) Taxon name (e.g., a species binomial)'
22
22
  ) { |v| cli[:taxon] = v }
23
+ opt.on(
24
+ '--max INT', Integer,
25
+ 'Maximum number of datasets to download (by default: unlimited)'
26
+ ) { |v| cli[:max_datasets] = v }
23
27
  opt.on(
24
28
  '-m', '--metadata STRING',
25
29
  'Metadata as key-value pairs separated by = and delimited by comma',
@@ -41,6 +45,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
41
45
  p = cli.load_project
42
46
  ds = remote_list
43
47
  ds = discard_blacklisted(ds)
48
+ ds = impose_limit(ds)
44
49
  d, downloaded = download_entries(ds, p)
45
50
 
46
51
  # Finalize
@@ -59,217 +64,4 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
59
64
  cli.say "Datasets unlinked: #{unlink.size}"
60
65
  end
61
66
 
62
- private
63
-
64
- def cli_task_flags(opt)
65
- cli.opt_flag(
66
- opt, 'reference',
67
- 'Download all reference genomes (ignore any other status)'
68
- )
69
- cli.opt_flag(opt, 'complete', 'Download complete genomes')
70
- cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
71
- cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
72
- cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
73
- opt.on(
74
- '--all',
75
- 'Download all genomes (in any status)'
76
- ) do
77
- cli[:complete] = true
78
- cli[:chromosome] = true
79
- cli[:scaffold] = true
80
- cli[:contig] = true
81
- end
82
- end
83
-
84
- def cli_name_modifiers(opt)
85
- opt.on(
86
- '--no-version-name',
87
- 'Do not add sequence version to the dataset name',
88
- 'Only affects --complete and --chromosome'
89
- ) { |v| cli[:add_version] = v }
90
- cli.opt_flag(
91
- opt, 'legacy-name',
92
- 'Use dataset names based on chromosome entries instead of assembly',
93
- :legacy_name
94
- )
95
- end
96
-
97
- def cli_filters(opt)
98
- opt.on(
99
- '--blacklist PATH',
100
- 'A file with dataset names to blacklist'
101
- ) { |v| cli[:blacklist] = v }
102
- cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
103
- opt.on(
104
- '--ignore-until STRING',
105
- 'Ignores all datasets until a name is found (useful for large reruns)'
106
- ) { |v| cli[:ignore_until] = v }
107
- cli.opt_flag(
108
- opt, 'get-metadata',
109
- 'Only download and update metadata for existing datasets', :get_md
110
- )
111
- end
112
-
113
- def cli_save_actions(opt)
114
- cli.opt_flag(
115
- opt, 'only-metadata',
116
- 'Create datasets without input data but retrieve all metadata',
117
- :only_md
118
- )
119
- opt.on(
120
- '--save-every INT', Integer,
121
- 'Save project every this many downloaded datasets',
122
- 'If zero, it saves the project only once upon completion',
123
- "By default: #{cli[:save_every]}"
124
- ) { |v| cli[:save_every] = v }
125
- opt.on(
126
- '-q', '--query',
127
- 'Register the datasets as queries, not reference datasets'
128
- ) { |v| cli[:query] = v }
129
- opt.on(
130
- '-u', '--unlink',
131
- 'Unlink all datasets in the project missing from the download list'
132
- ) { |v| cli[:unlink] = v }
133
- opt.on(
134
- '-R', '--remote-list PATH',
135
- 'Path to an output file with the list of all datasets listed remotely'
136
- ) { |v| cli[:remote_list] = v }
137
- end
138
-
139
- def sanitize_cli
140
- cli.ensure_par(taxon: '-T')
141
- tasks = %w[reference complete chromosome scaffold contig]
142
- unless tasks.any? { |i| cli[i.to_sym] }
143
- raise 'No action requested: pick at least one type of genome'
144
- end
145
-
146
- cli[:save_every] = 1 if cli[:dry]
147
- end
148
-
149
- def remote_list
150
- cli.say 'Downloading genome list'
151
- ds = {}
152
- url = remote_list_url
153
- doc = RemoteDataset.download_url(url)
154
- CSV.parse(doc, headers: true).each do |r|
155
- asm = r['assembly']
156
- next if asm.nil? || asm.empty? || asm == '-'
157
- next unless r['ftp_path_genbank']
158
-
159
- rep = remote_row_replicons(r)
160
- n = remote_row_name(r, rep, asm)
161
-
162
- # Register for download
163
- fna_url = '%s/%s_genomic.fna.gz' %
164
- [r['ftp_path_genbank'], File.basename(r['ftp_path_genbank'])]
165
- ds[n] = {
166
- ids: [fna_url], db: :assembly_gz, universe: :web,
167
- md: {
168
- type: :genome, ncbi_asm: asm, strain: r['strain']
169
- }
170
- }
171
- ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
172
- unless r['release_date'].nil?
173
- ds[n][:md][:release_date] = Time.parse(r['release_date']).to_s
174
- end
175
- end
176
- ds
177
- end
178
-
179
- def remote_row_replicons(r)
180
- return if r['replicons'].nil?
181
-
182
- r['replicons']
183
- .split('; ')
184
- .map { |i| i.gsub(/.*:/, '') }
185
- .map { |i| i.gsub(%r{/.*}, '') }
186
- end
187
-
188
- def remote_row_name(r, rep, asm)
189
- return r['#organism'].miga_name if cli[:legacy_name] && cli[:reference]
190
-
191
- if cli[:legacy_name] && ['Complete', ' Chromosome'].include?(r['level'])
192
- acc = rep.nil? ? '' : rep.first
193
- else
194
- acc = asm
195
- end
196
- acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
197
- "#{r['#organism']}_#{acc}".miga_name
198
- end
199
-
200
- def remote_list_url
201
- url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
202
- url_param = {
203
- q: '[display()].' \
204
- 'from(GenomeAssemblies).' \
205
- 'usingschema(/schema/GenomeAssemblies).' \
206
- 'matching(tab==["Prokaryotes"] and q=="' \
207
- "#{cli[:taxon]&.tr('"', "'")}\"",
208
- fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
209
- 'level|level,ftp_path_genbank|ftp_path_genbank,' \
210
- 'release_date|release_date,strain|strain',
211
- nolimit: 'on'
212
- }
213
- if cli[:reference]
214
- url_param[:q] += ' and refseq_category==["representative"]'
215
- else
216
- status = {
217
- complete: 'Complete',
218
- chromosome: ' Chromosome', # <- The leading space is *VERY* important!
219
- scaffold: 'Scaffold',
220
- contig: 'Contig'
221
- }.map { |k, v| '"' + v + '"' if cli[k] }.compact.join(',')
222
- url_param[:q] += ' and level==[' + status + ']'
223
- end
224
- url_param[:q] += ')'
225
- url_base + URI.encode_www_form(url_param)
226
- end
227
-
228
- def discard_blacklisted(ds)
229
- unless cli[:blacklist].nil?
230
- cli.say "Discarding datasets in #{cli[:blacklist]}"
231
- File.readlines(cli[:blacklist])
232
- .select { |i| i !~ /^#/ }
233
- .map(&:chomp)
234
- .each { |i| ds.delete i }
235
- end
236
- ds
237
- end
238
-
239
- def download_entries(ds, p)
240
- cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
241
- p.do_not_save = true if cli[:save_every] != 1
242
- ignore = !cli[:ignore_until].nil?
243
- downloaded = 0
244
- d = []
245
- ds.each do |name, body|
246
- d << name
247
- cli.puts name
248
- ignore = false if ignore && name == cli[:ignore_until]
249
- next if ignore || p.dataset(name).nil? == cli[:get_md]
250
-
251
- downloaded += 1
252
- unless cli[:dry]
253
- save_entry(name, body, p)
254
- p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
255
- end
256
- end
257
- p.do_not_save = false
258
- p.save! if cli[:save_every] != 1
259
- [d, downloaded]
260
- end
261
-
262
- def save_entry(name, body, p)
263
- cli.say ' Locating remote dataset'
264
- body[:md][:metadata_only] = true if cli[:only_md]
265
- rd = RemoteDataset.new(body[:ids], body[:db], body[:universe])
266
- if cli[:get_md]
267
- cli.say ' Updating dataset'
268
- rd.update_metadata(p.dataset(name), body[:md])
269
- else
270
- cli.say ' Creating dataset'
271
- rd.save_to(p, name, !cli[:query], body[:md])
272
- cli.add_metadata(p.add_dataset(name))
273
- end
274
- end
275
67
  end
@@ -38,6 +38,10 @@ module MiGA::Cli::Action::Wf
38
38
  '--no-draft',
39
39
  'Only download complete genomes, not drafts'
40
40
  ) { |v| cli[:ncbi_draft] = v }
41
+ opt.on(
42
+ '--max-download INT', Integer,
43
+ 'Maximum number of genomes to download (by default: unlimited)'
44
+ ) { |v| cli[:ncbi_max] = v }
41
45
  end
42
46
  if params[:qual]
43
47
  opt.on(
@@ -125,9 +129,9 @@ module MiGA::Cli::Action::Wf
125
129
  # Download datasets
126
130
  unless cli[:ncbi_taxon].nil?
127
131
  what = cli[:ncbi_draft] ? '--all' : '--complete'
128
- call_cli(
129
- ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
130
- )
132
+ cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
133
+ cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
134
+ call_cli(cmd)
131
135
  end
132
136
 
133
137
  # Add datasets
data/lib/miga/common.rb CHANGED
@@ -53,11 +53,11 @@ class MiGA::MiGA
53
53
  # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
54
54
  # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
55
55
  # 1,000 otherwise.
56
- # The report goes to $stderr iff --verborse
56
+ # The report goes to $stderr iff --verbose
57
57
  def advance(step, n = 0, total = nil, bin = true)
58
58
  # Initialize advance timing
59
59
  @_advance_time ||= { last: nil, n: 0, avg: nil }
60
- if n <= 1 || @_advance_time[:n] > n
60
+ if @_advance_time[:n] > n
61
61
  @_advance_time[:last] = nil
62
62
  @_advance_time[:n] = 0
63
63
  @_advance_time[:avg] = nil
@@ -65,16 +65,17 @@ class MiGA::MiGA
65
65
 
66
66
  # Estimate timing
67
67
  adv_n = n - @_advance_time[:n]
68
- unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
69
- if adv_n.to_f/n > 0.001
70
- this_time = (Time.now - @_advance_time[:last]).to_f
71
- this_avg = this_time / adv_n
72
- @_advance_time[:avg] ||= this_avg
73
- @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
74
- end
68
+ if total.nil? || @_advance_time[:last].nil? || adv_n.negative?
69
+ @_advance_time[:last] = Time.now
70
+ @_advance_time[:n] = n
71
+ elsif adv_n > 0.001 * total
72
+ this_time = (Time.now - @_advance_time[:last]).to_f
73
+ this_avg = this_time / adv_n
74
+ @_advance_time[:avg] ||= this_avg
75
+ @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
76
+ @_advance_time[:last] = Time.now
77
+ @_advance_time[:n] = n
75
78
  end
76
- @_advance_time[:last] = Time.now
77
- @_advance_time[:n] = n
78
79
 
79
80
  # Report
80
81
  adv =
@@ -281,7 +281,8 @@ module MiGA::Dataset::Result
281
281
  collection: '.ess',
282
282
  report: '.ess/log',
283
283
  alignments: '.ess/proteins.aln',
284
- fastaai_index: '.faix.db.gz'
284
+ fastaai_index: '.faix.db.gz',
285
+ fastaai_index_2: '.faix'
285
286
  )
286
287
  end
287
288
 
data/lib/miga/version.rb CHANGED
@@ -12,15 +12,15 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.0, 5, 2].freeze
15
+ VERSION = [1.1, 0, 0].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
19
19
  VERSION_NAME = 'prima'
20
20
 
21
21
  ##
22
- # Date of the current gem release.
23
- VERSION_DATE = Date.new(2021, 8, 26)
22
+ # Date of the current gem relese.
23
+ VERSION_DATE = Date.new(2021, 10, 28)
24
24
 
25
25
  ##
26
26
  # References of MiGA