RubyGems - miga-base - Versions diffs - 1.0.5.2 → 1.1.0.0 - Mend

miga-base 1.0.5.2 → 1.1.0.0

Files changed (20) hide show

checksums.yaml +4 -4
data/bin/miga-env +6 -0
data/lib/miga/cli/action/doctor.rb +12 -4
data/lib/miga/cli/action/env.rb +1 -1
data/lib/miga/cli/action/init.rb +1 -1
data/lib/miga/cli/action/ncbi_get/downloads.rb +230 -0
data/lib/miga/cli/action/ncbi_get.rb +9 -217
data/lib/miga/cli/action/wf.rb +7 -3
data/lib/miga/common.rb +12 -11
data/lib/miga/dataset/result.rb +2 -1
data/lib/miga/version.rb +3 -3
data/scripts/essential_genes.bash +7 -11
data/scripts/miga.bash +2 -2
data/test/common_test.rb +7 -7
data/utils/FastAAI/FastAAI +3630 -0
data/utils/FastAAI/{FastAAI → FastAAI-legacy}/FastAAI +1 -1
data/utils/FastAAI/{kAAI_v1.0_virus.py → FastAAI-legacy/kAAI_v1.0_virus.py} +0 -0
data/utils/distance/commands.rb +24 -13
data/utils/requirements.txt +7 -7
metadata +7 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f6abc7091229bf09e03d261acce2ca2803bf12b2bed846e10ebbf0a41068bc53
-  data.tar.gz: 11941d0e8e3e86b214372a8138d94899e04b8257ca82389aa2302a79a8773337
+  metadata.gz: 4076b3b3a4a4143ac9100ce4d58fada7615f68ad3e6174445510655f62904867
+  data.tar.gz: '0975a5feb4c9eb71a474be87dd14b58297ef1aa7bd8612c20f1ce65febbdf980'
 SHA512:
-  metadata.gz: 70f9de1fb0c4db798bb0c934c450a3d300de434bf1c0519be1ec611179094542146f0e36d027eef3371eb68e1e376ef66bc42d90e6fafdfe440d78b9bf7bb6fa
-  data.tar.gz: feda82ca950ae8d28382ff8addc1482a6907e77b303100eeeea2daf44b59efc1919d7d13db197cfc1f7a8d49868d4e9232afcaa8d5ffdebd79a52b507b832ecb
+  metadata.gz: ebcb7fe28d415ca9709433975585518eb1ecd8e8270c584b6579da222e4d3733cc20d810787c3f764f6a6136e1a6f09b7cb6b1c00114c3ea9c0885370654f3a7
+  data.tar.gz: '082bd856ed21487e5de709e2067f1d3453f824e0ece7a77716c6fbe70d88a16c4d295196d5c6133e5667142e25f55f7e48e4a785afd160a14e8195a9b7efa6c2'

data/bin/miga-env ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+MIGA_MOD="${MIGA_HOME:-"$HOME"}/.miga_modules"
+[[ -s "$MIGA_MOD" ]] && . "$MIGA_MOD"
+"$(dirname "$0")/miga" env

data/lib/miga/cli/action/doctor.rb CHANGED Viewed

@@ -46,7 +46,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
     dist: ['distances', 'Check distance summary tables'],
     files: ['files', 'Check for outdated files'],
     cds: ['cds', 'Check for gzipped genes and proteins'],
-    ess: ['essential-genes', 'Check for unarchived essential genes'],
+    ess: ['essential-genes', 'Check for outdated essential genes'],
     mts: ['mytaxa-scan', 'Check for unarchived MyTaxa scan'],
     start: ['start', 'Check for lingering .start files'],
     tax: ['taxonomy', 'Check for taxonomy consistency (not yet implemented)']
@@ -252,16 +252,16 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
   ##
   # Perform essential-genes operation with MiGA::Cli +cli+
   def check_ess(cli)
-    cli.say 'Looking for unarchived essential genes'
+    cli.say 'Looking for outdated essential genes'
     cli.load_project.each_dataset do |d|
       res = d.result(:essential_genes)
       next if res.nil?
       dir = res.file_path(:collection)
-      if dir.nil?
+      if dir.nil? || outdated_fastaai_ess(res)
         cli.say "  > Removing #{d.name}:essential_genes"
         res.remove!
-        sr = d.result(:stats) and sr.remove!
+        d.result(:stats)&.remove!
         next
       end
       next if Dir["#{dir}/*.faa"].empty?
@@ -272,6 +272,14 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
     end
   end
+  ##
+  # Check if the essential genes result +res+ has an outdated FastAAI index
+  def outdated_fastaai_ess(res)
+    idx1 = res.file_path(:fastaai_index)
+    idx2 = res.file_path(:fastaai_index_2)
+    idx2.nil? && !idx1.nil?
+  end
   ##
   # Perform mytaxa-scan operation with MiGA::Cli +cli+
   def check_mts(cli)

data/lib/miga/cli/action/env.rb CHANGED Viewed

@@ -15,7 +15,7 @@ class MiGA::Cli::Action::Env < MiGA::Cli::Action
       . "$MIGA_HOME/.miga_rc"
       # Ensure MiGA & submodules are first in PATH
       export PATH="$MIGA/bin:$PATH"
-      for util in enveomics/Scripts FastAAI/FastAAI multitrim ; do
+      for util in enveomics/Scripts FastAAI/FastAAI FastAAI multitrim ; do
         export PATH="$MIGA/utils/$util:$PATH"
       done
     BASH

data/lib/miga/cli/action/init.rb CHANGED Viewed

@@ -181,7 +181,7 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
     req_libraries = {
       r: %w[ape cluster vegan],
       ruby: %w[sqlite3 daemons json],
-      python: %w[numpy]
+      python: %w[numpy sqlite3]
     }
     req_libraries.each do |language, libraries|

data/lib/miga/cli/action/ncbi_get/downloads.rb ADDED Viewed

@@ -0,0 +1,230 @@
+# frozen_string_literal: true
+require 'miga/remote_dataset'
+require 'csv'
+##
+# Helper module including download functions for the ncbi_get action
+module MiGA::Cli::Action::NcbiGet::Downloads
+  def cli_task_flags(opt)
+    cli.opt_flag(
+      opt, 'reference',
+      'Download all reference genomes (ignore any other status)'
+    )
+    cli.opt_flag(opt, 'complete', 'Download complete genomes')
+    cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
+    cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
+    cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
+    opt.on(
+      '--all',
+      'Download all genomes (in any status)'
+    ) do
+      cli[:complete] = true
+      cli[:chromosome] = true
+      cli[:scaffold] = true
+      cli[:contig] = true
+    end
+  end
+  def cli_name_modifiers(opt)
+    opt.on(
+      '--no-version-name',
+      'Do not add sequence version to the dataset name',
+      'Only affects --complete and --chromosome'
+    ) { |v| cli[:add_version] = v }
+    cli.opt_flag(
+      opt, 'legacy-name',
+      'Use dataset names based on chromosome entries instead of assembly',
+      :legacy_name
+    )
+  end
+  def cli_filters(opt)
+    opt.on(
+      '--blacklist PATH',
+      'A file with dataset names to blacklist'
+    ) { |v| cli[:blacklist] = v }
+    cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
+    opt.on(
+      '--ignore-until STRING',
+      'Ignores all datasets until a name is found (useful for large reruns)'
+    ) { |v| cli[:ignore_until] = v }
+    cli.opt_flag(
+      opt, 'get-metadata',
+      'Only download and update metadata for existing datasets', :get_md
+    )
+  end
+  def cli_save_actions(opt)
+    cli.opt_flag(
+      opt, 'only-metadata',
+      'Create datasets without input data but retrieve all metadata',
+      :only_md
+    )
+    opt.on(
+      '--save-every INT', Integer,
+      'Save project every this many downloaded datasets',
+      'If zero, it saves the project only once upon completion',
+      "By default: #{cli[:save_every]}"
+    ) { |v| cli[:save_every] = v }
+    opt.on(
+      '-q', '--query',
+      'Register the datasets as queries, not reference datasets'
+    ) { |v| cli[:query] = v }
+    opt.on(
+      '-u', '--unlink',
+      'Unlink all datasets in the project missing from the download list'
+    ) { |v| cli[:unlink] = v }
+    opt.on(
+      '-R', '--remote-list PATH',
+      'Path to an output file with the list of all datasets listed remotely'
+    ) { |v| cli[:remote_list] = v }
+  end
+  def sanitize_cli
+    cli.ensure_par(taxon: '-T')
+    tasks = %w[reference complete chromosome scaffold contig]
+    unless tasks.any? { |i| cli[i.to_sym] }
+      raise 'No action requested: pick at least one type of genome'
+    end
+    cli[:save_every] = 1 if cli[:dry]
+  end
+  def remote_list
+    cli.say 'Downloading genome list'
+    ds = {}
+    url = remote_list_url
+    doc = MiGA::RemoteDataset.download_url(url)
+    CSV.parse(doc, headers: true).each do |r|
+      asm = r['assembly']
+      next if asm.nil? || asm.empty? || asm == '-'
+      next unless r['ftp_path_genbank']
+      rep = remote_row_replicons(r)
+      n = remote_row_name(r, rep, asm)
+      # Register for download
+      fna_url = '%s/%s_genomic.fna.gz' %
+                [r['ftp_path_genbank'], File.basename(r['ftp_path_genbank'])]
+      ds[n] = {
+        ids: [fna_url], db: :assembly_gz, universe: :web,
+        md: {
+          type: :genome, ncbi_asm: asm, strain: r['strain']
+        }
+      }
+      ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
+      unless r['release_date'].nil?
+        ds[n][:md][:release_date] = Time.parse(r['release_date']).to_s
+      end
+    end
+    ds
+  end
+  def remote_row_replicons(r)
+    return if r['replicons'].nil?
+    r['replicons']
+      .split('; ')
+      .map { |i| i.gsub(/.*:/, '') }
+      .map { |i| i.gsub(%r{/.*}, '') }
+  end
+  def remote_row_name(r, rep, asm)
+    return r['#organism'].miga_name if cli[:legacy_name] && cli[:reference]
+    if cli[:legacy_name] && ['Complete', ' Chromosome'].include?(r['level'])
+      acc = rep.nil? ? '' : rep.first
+    else
+      acc = asm
+    end
+    acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
+    "#{r['#organism']}_#{acc}".miga_name
+  end
+  def remote_list_url
+    url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
+    url_param = {
+      q: '[display()].' \
+        'from(GenomeAssemblies).' \
+        'usingschema(/schema/GenomeAssemblies).' \
+        'matching(tab==["Prokaryotes"] and q=="' \
+          "#{cli[:taxon]&.tr('"', "'")}\"",
+      fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
+        'level|level,ftp_path_genbank|ftp_path_genbank,' \
+        'release_date|release_date,strain|strain',
+      nolimit: 'on'
+    }
+    if cli[:reference]
+      url_param[:q] += ' and refseq_category==["representative"]'
+    else
+      status = {
+        complete: 'Complete',
+        chromosome: ' Chromosome', # <- The leading space is *VERY* important!
+        scaffold: 'Scaffold',
+        contig: 'Contig'
+      }.map { |k, v| '"' + v + '"' if cli[k] }.compact.join(',')
+      url_param[:q] += ' and level==[' + status + ']'
+    end
+    url_param[:q] += ')'
+    url_base + URI.encode_www_form(url_param)
+  end
+  def discard_blacklisted(ds)
+    unless cli[:blacklist].nil?
+      cli.say "Discarding datasets in #{cli[:blacklist]}"
+      File.readlines(cli[:blacklist])
+          .select { |i| i !~ /^#/ }
+          .map(&:chomp)
+          .each { |i| ds.delete i }
+    end
+    ds
+  end
+  def impose_limit(ds)
+    max = cli[:max_datasets].to_i
+    if !max.zero? && max < ds.size
+      cli.say "Subsampling list from #{ds.size} to #{max} datasets"
+      sample = ds.keys.sample(max)
+      ds.select! { |k, _| sample.include? k }
+    end
+    ds
+  end
+  def download_entries(ds, p)
+    cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
+    p.do_not_save = true if cli[:save_every] != 1
+    ignore = !cli[:ignore_until].nil?
+    downloaded = 0
+    d = []
+    ds.each do |name, body|
+      d << name
+      cli.puts name
+      ignore = false if ignore && name == cli[:ignore_until]
+      next if ignore || p.dataset(name).nil? == cli[:get_md]
+      downloaded += 1
+      unless cli[:dry]
+        save_entry(name, body, p)
+        p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
+      end
+    end
+    p.do_not_save = false
+    p.save! if cli[:save_every] != 1
+    [d, downloaded]
+  end
+  def save_entry(name, body, p)
+    cli.say '  Locating remote dataset'
+    body[:md][:metadata_only] = true if cli[:only_md]
+    rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
+    if cli[:get_md]
+      cli.say '  Updating dataset'
+      rd.update_metadata(p.dataset(name), body[:md])
+    else
+      cli.say '  Creating dataset'
+      rd.save_to(p, name, !cli[:query], body[:md])
+      cli.add_metadata(p.add_dataset(name))
+    end
+  end
+end

data/lib/miga/cli/action/ncbi_get.rb CHANGED Viewed

@@ -1,11 +1,11 @@
-# @package MiGA
-# @license Artistic-2.0
+# frozen_string_literal: true
 require 'miga/cli/action'
-require 'miga/remote_dataset'
-require 'csv'
 class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
+  require 'miga/cli/action/ncbi_get/downloads'
+  include MiGA::Cli::Action::NcbiGet::Downloads
   def parse_cli
     cli.defaults = {
       query: false, unlink: false,
@@ -20,6 +20,10 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
         '-T', '--taxon STRING',
         '(Mandatory) Taxon name (e.g., a species binomial)'
       ) { |v| cli[:taxon] = v }
+      opt.on(
+        '--max INT', Integer,
+        'Maximum number of datasets to download (by default: unlimited)'
+      ) { |v| cli[:max_datasets] = v }
       opt.on(
         '-m', '--metadata STRING',
         'Metadata as key-value pairs separated by = and delimited by comma',
@@ -41,6 +45,7 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
     p = cli.load_project
     ds = remote_list
     ds = discard_blacklisted(ds)
+    ds = impose_limit(ds)
     d, downloaded = download_entries(ds, p)
     # Finalize
@@ -59,217 +64,4 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
     cli.say "Datasets unlinked: #{unlink.size}"
   end
-  private
-  def cli_task_flags(opt)
-    cli.opt_flag(
-      opt, 'reference',
-      'Download all reference genomes (ignore any other status)'
-    )
-    cli.opt_flag(opt, 'complete', 'Download complete genomes')
-    cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
-    cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
-    cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
-    opt.on(
-      '--all',
-      'Download all genomes (in any status)'
-    ) do
-      cli[:complete] = true
-      cli[:chromosome] = true
-      cli[:scaffold] = true
-      cli[:contig] = true
-    end
-  end
-  def cli_name_modifiers(opt)
-    opt.on(
-      '--no-version-name',
-      'Do not add sequence version to the dataset name',
-      'Only affects --complete and --chromosome'
-    ) { |v| cli[:add_version] = v }
-    cli.opt_flag(
-      opt, 'legacy-name',
-      'Use dataset names based on chromosome entries instead of assembly',
-      :legacy_name
-    )
-  end
-  def cli_filters(opt)
-    opt.on(
-      '--blacklist PATH',
-      'A file with dataset names to blacklist'
-    ) { |v| cli[:blacklist] = v }
-    cli.opt_flag(opt, 'dry', 'Do not download or save the datasets')
-    opt.on(
-      '--ignore-until STRING',
-      'Ignores all datasets until a name is found (useful for large reruns)'
-    ) { |v| cli[:ignore_until] = v }
-    cli.opt_flag(
-      opt, 'get-metadata',
-      'Only download and update metadata for existing datasets', :get_md
-    )
-  end
-  def cli_save_actions(opt)
-    cli.opt_flag(
-      opt, 'only-metadata',
-      'Create datasets without input data but retrieve all metadata',
-      :only_md
-    )
-    opt.on(
-      '--save-every INT', Integer,
-      'Save project every this many downloaded datasets',
-      'If zero, it saves the project only once upon completion',
-      "By default: #{cli[:save_every]}"
-    ) { |v| cli[:save_every] = v }
-    opt.on(
-      '-q', '--query',
-      'Register the datasets as queries, not reference datasets'
-    ) { |v| cli[:query] = v }
-    opt.on(
-      '-u', '--unlink',
-      'Unlink all datasets in the project missing from the download list'
-    ) { |v| cli[:unlink] = v }
-    opt.on(
-      '-R', '--remote-list PATH',
-      'Path to an output file with the list of all datasets listed remotely'
-    ) { |v| cli[:remote_list] = v }
-  end
-  def sanitize_cli
-    cli.ensure_par(taxon: '-T')
-    tasks = %w[reference complete chromosome scaffold contig]
-    unless tasks.any? { |i| cli[i.to_sym] }
-      raise 'No action requested: pick at least one type of genome'
-    end
-    cli[:save_every] = 1 if cli[:dry]
-  end
-  def remote_list
-    cli.say 'Downloading genome list'
-    ds = {}
-    url = remote_list_url
-    doc = RemoteDataset.download_url(url)
-    CSV.parse(doc, headers: true).each do |r|
-      asm = r['assembly']
-      next if asm.nil? || asm.empty? || asm == '-'
-      next unless r['ftp_path_genbank']
-      rep = remote_row_replicons(r)
-      n = remote_row_name(r, rep, asm)
-      # Register for download
-      fna_url = '%s/%s_genomic.fna.gz' %
-                [r['ftp_path_genbank'], File.basename(r['ftp_path_genbank'])]
-      ds[n] = {
-        ids: [fna_url], db: :assembly_gz, universe: :web,
-        md: {
-          type: :genome, ncbi_asm: asm, strain: r['strain']
-        }
-      }
-      ds[n][:md][:ncbi_nuccore] = rep.join(',') unless rep.nil?
-      unless r['release_date'].nil?
-        ds[n][:md][:release_date] = Time.parse(r['release_date']).to_s
-      end
-    end
-    ds
-  end
-  def remote_row_replicons(r)
-    return if r['replicons'].nil?
-    r['replicons']
-      .split('; ')
-      .map { |i| i.gsub(/.*:/, '') }
-      .map { |i| i.gsub(%r{/.*}, '') }
-  end
-  def remote_row_name(r, rep, asm)
-    return r['#organism'].miga_name if cli[:legacy_name] && cli[:reference]
-    if cli[:legacy_name] && ['Complete', ' Chromosome'].include?(r['level'])
-      acc = rep.nil? ? '' : rep.first
-    else
-      acc = asm
-    end
-    acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
-    "#{r['#organism']}_#{acc}".miga_name
-  end
-  def remote_list_url
-    url_base = 'https://www.ncbi.nlm.nih.gov/genomes/solr2txt.cgi?'
-    url_param = {
-      q: '[display()].' \
-        'from(GenomeAssemblies).' \
-        'usingschema(/schema/GenomeAssemblies).' \
-        'matching(tab==["Prokaryotes"] and q=="' \
-          "#{cli[:taxon]&.tr('"', "'")}\"",
-      fields: 'organism|organism,assembly|assembly,replicons|replicons,' \
-        'level|level,ftp_path_genbank|ftp_path_genbank,' \
-        'release_date|release_date,strain|strain',
-      nolimit: 'on'
-    }
-    if cli[:reference]
-      url_param[:q] += ' and refseq_category==["representative"]'
-    else
-      status = {
-        complete: 'Complete',
-        chromosome: ' Chromosome', # <- The leading space is *VERY* important!
-        scaffold: 'Scaffold',
-        contig: 'Contig'
-      }.map { |k, v| '"' + v + '"' if cli[k] }.compact.join(',')
-      url_param[:q] += ' and level==[' + status + ']'
-    end
-    url_param[:q] += ')'
-    url_base + URI.encode_www_form(url_param)
-  end
-  def discard_blacklisted(ds)
-    unless cli[:blacklist].nil?
-      cli.say "Discarding datasets in #{cli[:blacklist]}"
-      File.readlines(cli[:blacklist])
-          .select { |i| i !~ /^#/ }
-          .map(&:chomp)
-          .each { |i| ds.delete i }
-    end
-    ds
-  end
-  def download_entries(ds, p)
-    cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries')
-    p.do_not_save = true if cli[:save_every] != 1
-    ignore = !cli[:ignore_until].nil?
-    downloaded = 0
-    d = []
-    ds.each do |name, body|
-      d << name
-      cli.puts name
-      ignore = false if ignore && name == cli[:ignore_until]
-      next if ignore || p.dataset(name).nil? == cli[:get_md]
-      downloaded += 1
-      unless cli[:dry]
-        save_entry(name, body, p)
-        p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero?
-      end
-    end
-    p.do_not_save = false
-    p.save! if cli[:save_every] != 1
-    [d, downloaded]
-  end
-  def save_entry(name, body, p)
-    cli.say '  Locating remote dataset'
-    body[:md][:metadata_only] = true if cli[:only_md]
-    rd = RemoteDataset.new(body[:ids], body[:db], body[:universe])
-    if cli[:get_md]
-      cli.say '  Updating dataset'
-      rd.update_metadata(p.dataset(name), body[:md])
-    else
-      cli.say '  Creating dataset'
-      rd.save_to(p, name, !cli[:query], body[:md])
-      cli.add_metadata(p.add_dataset(name))
-    end
-  end
 end

data/lib/miga/cli/action/wf.rb CHANGED Viewed

@@ -38,6 +38,10 @@ module MiGA::Cli::Action::Wf
         '--no-draft',
         'Only download complete genomes, not drafts'
       ) { |v| cli[:ncbi_draft] = v }
+      opt.on(
+        '--max-download INT', Integer,
+        'Maximum number of genomes to download (by default: unlimited)'
+      ) { |v| cli[:ncbi_max] = v }
     end
     if params[:qual]
       opt.on(
@@ -125,9 +129,9 @@ module MiGA::Cli::Action::Wf
     # Download datasets
     unless cli[:ncbi_taxon].nil?
       what = cli[:ncbi_draft] ? '--all' : '--complete'
-      call_cli(
-        ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
-      )
+      cmd = ['ncbi_get', '-P', cli[:outdir], '-T', cli[:ncbi_taxon], what]
+      cmd += ['--max', cli[:ncbi_max]] if cli[:ncbi_max]
+      call_cli(cmd)
     end
     # Add datasets

data/lib/miga/common.rb CHANGED Viewed

@@ -53,11 +53,11 @@ class MiGA::MiGA
   # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
   # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
   # 1,000 otherwise.
-  # The report goes to $stderr iff --verborse
+  # The report goes to $stderr iff --verbose
   def advance(step, n = 0, total = nil, bin = true)
     # Initialize advance timing
     @_advance_time ||= { last: nil, n: 0, avg: nil }
-    if n <= 1 || @_advance_time[:n] > n
+    if @_advance_time[:n] > n
       @_advance_time[:last] = nil
       @_advance_time[:n] = 0
       @_advance_time[:avg]  = nil
@@ -65,16 +65,17 @@ class MiGA::MiGA
     # Estimate timing
     adv_n = n - @_advance_time[:n]
-    unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
-      if adv_n.to_f/n > 0.001
-        this_time = (Time.now - @_advance_time[:last]).to_f
-        this_avg = this_time / adv_n
-        @_advance_time[:avg] ||= this_avg
-        @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
-      end
+    if total.nil? || @_advance_time[:last].nil? || adv_n.negative?
+      @_advance_time[:last] = Time.now
+      @_advance_time[:n] = n
+    elsif adv_n > 0.001 * total
+      this_time = (Time.now - @_advance_time[:last]).to_f
+      this_avg = this_time / adv_n
+      @_advance_time[:avg] ||= this_avg
+      @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
+      @_advance_time[:last] = Time.now
+      @_advance_time[:n] = n
     end
-    @_advance_time[:last] = Time.now
-    @_advance_time[:n] = n
     # Report
     adv =

data/lib/miga/dataset/result.rb CHANGED Viewed

@@ -281,7 +281,8 @@ module MiGA::Dataset::Result
       collection: '.ess',
       report: '.ess/log',
       alignments: '.ess/proteins.aln',
-      fastaai_index: '.faix.db.gz'
+      fastaai_index: '.faix.db.gz',
+      fastaai_index_2: '.faix'
     )
   end

data/lib/miga/version.rb CHANGED Viewed

@@ -12,15 +12,15 @@ module MiGA
   # - String indicating release status:
   #   - rc* release candidate, not released as gem
   #   - [0-9]+ stable release, released as gem
-  VERSION = [1.0, 5, 2].freeze
+  VERSION = [1.1, 0, 0].freeze
   ##
   # Nickname for the current major.minor version.
   VERSION_NAME = 'prima'
   ##
-  # Date of the current gem release.
-  VERSION_DATE = Date.new(2021, 8, 26)
+  # Date of the current gem relese.
+  VERSION_DATE = Date.new(2021, 10, 28)
   ##
   # References of MiGA