RubyGems - miga-base - Versions diffs - 0.3.1.6 → 0.3.1.7 - Mend

miga-base 0.3.1.6 → 0.3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/actions/ncbi_get.rb +57 -42
data/lib/miga/result/base.rb +7 -0
data/lib/miga/result/dates.rb +42 -0
data/lib/miga/result.rb +4 -0
data/lib/miga/version.rb +1 -1
data/scripts/essential_genes.bash +5 -4
data/utils/enveomics/Makefile +1 -1
data/utils/enveomics/Manifest/Tasks/aasubs.json +75 -75
data/utils/enveomics/Manifest/Tasks/blasttab.json +194 -185
data/utils/enveomics/Manifest/Tasks/distances.json +130 -130
data/utils/enveomics/Manifest/Tasks/fasta.json +51 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +161 -126
data/utils/enveomics/Manifest/Tasks/graphics.json +111 -111
data/utils/enveomics/Manifest/Tasks/mapping.json +30 -0
data/utils/enveomics/Manifest/Tasks/ogs.json +308 -265
data/utils/enveomics/Manifest/Tasks/other.json +451 -449
data/utils/enveomics/Manifest/Tasks/remote.json +1 -1
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +18 -10
data/utils/enveomics/Manifest/Tasks/tables.json +250 -250
data/utils/enveomics/Manifest/Tasks/trees.json +52 -52
data/utils/enveomics/Manifest/Tasks/variants.json +4 -4
data/utils/enveomics/Manifest/categories.json +12 -4
data/utils/enveomics/Manifest/examples.json +1 -1
data/utils/enveomics/Scripts/BedGraph.tad.rb +71 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +23 -22
data/utils/enveomics/Scripts/FastA.split.rb +79 -0
data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +272 -258
data/utils/enveomics/Scripts/aai.rb +13 -6
data/utils/enveomics/Scripts/ani.rb +2 -2
data/utils/enveomics/Scripts/clust.rand.rb +102 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +12 -14
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +2 -2
data/utils/enveomics/Scripts/rbm.rb +23 -14
data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
data/utils/enveomics/enveomics.R/R/barplot.R +2 -2
metadata +9 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 4ba91c88b5e9a25633e5633e344fc013b2ab0d6e
-  data.tar.gz: 24e7c2426c9ad4c86da246c3b4c76a94ed378aa9
+  metadata.gz: b53d716162f9aedbc64f1e54e02ffc293b16a7e7
+  data.tar.gz: a5c46555329c2da1ba1fd165d423e513a27562ef
 SHA512:
-  metadata.gz: bba625b0f7777a8aed26b0e3e55f16ada263c01e371f29ba7c89aa80f23afd5db9ae58c41abd1d0e37c6aaa17b924d00d487c6a09e222cd8d27b9e1394cd59d9
-  data.tar.gz: e906281a4ccf5b8f8505d6e7d5a342d44f16505c64602f1af4d65e3e09f456b21487c9c8273f84ec2c9cb2a009de13efb3b1fe20d65d879a2510abbbac7825b8
+  metadata.gz: c94add412b17de6a932ee247e90ef5682afdf5b61cf09a3b6b9baa64d401da09d29915f6b7a6f39a9e8e6e67ba6e7afb5ed2a982e488805e22f16974cedc9ad7
+  data.tar.gz: 2b8e6fcbdc0b4f1b72e43bb02d47e3fb4618773ef4e86c9644b77926429b6f117cc2d965437704c2540fffd26a8576bdfc299fdd927f7fe2c0f2f33a4c961727

data/actions/ncbi_get.rb CHANGED Viewed

@@ -7,45 +7,50 @@ require 'miga/remote_dataset'
 o = {q:true, query:false, unlink:false,
       reference: false, ignore_plasmids: false,
-      complete:false, chromosome:false,
-      scaffold:false, contig:false,}
+      complete: false, chromosome: false,
+      scaffold: false, contig: false, add_version: true, dry: false}
 OptionParser.new do |opt|
   opt_banner(opt)
   opt_object(opt, o, [:project])
-  opt.on("-T", "--taxon STRING",
-        "(Mandatory unless --reference) Name of the taxon (e.g., a species binomial)."
+  opt.on('-T', '--taxon STRING',
+        '(Mandatory unless --reference) Taxon name (e.g., a species binomial).'
         ){ |v| o[:taxon]=v }
-  opt.on("--reference",
-        "Download all reference genomes (ignores -T)."){ |v| o[:reference]=v }
-  opt.on("--ref-no-plasmids",
-        "If passed, ignores plasmids (only for --reference)."
+  opt.on('--reference',
+        'Download all reference genomes (ignores -T).'){ |v| o[:reference]=v }
+  opt.on('--ref-no-plasmids',
+        'If passed, ignores plasmids (only for --reference).'
         ){ |v| o[:ignore_plasmids]=v }
-  opt.on("--complete", "Download complete genomes."){ |v| o[:complete]=v }
-  opt.on("--chromosome", "Download complete chromosomes."){ |v| o[:chromosome]=v }
-  opt.on("--scaffold", "Download genomes in scaffolds."){ |v| o[:scaffold]=v }
-  opt.on("--contig", "Download genomes in contigs."){ |v| o[:contig]=v }
-  opt.on("--all", "Download all genomes (in any status).") do
+  opt.on('--complete', 'Download complete genomes.'){ |v| o[:complete]=v }
+  opt.on('--chromosome',
+        'Download complete chromosomes.'){ |v| o[:chromosome]=v }
+  opt.on('--scaffold', 'Download genomes in scaffolds.'){ |v| o[:scaffold]=v }
+  opt.on('--contig', 'Download genomes in contigs.'){ |v| o[:contig]=v }
+  opt.on('--all', 'Download all genomes (in any status).') do
     o[:complete] = true
     o[:chromosome] = true
     o[:scaffold] = true
     o[:contig] = true
   end
-  opt.on("-q", "--query",
-        "If set, the datasets are registered as queries, not reference datasets."
+  opt.on('--no-version-name',
+        'Do not add sequence version to the dataset name.',
+        'Only affects --complete and --chromosome.'){ |v| o[:add_version]=v }
+  opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
+  opt.on('-q', '--query',
+        'Register the datasets as queries, not reference datasets.'
         ){ |v| o[:query]=v }
-  opt.on("-u", "--unlink",
-        "If set, unlinks all datasets in the project missing from the download list."
+  opt.on('-u', '--unlink',
+        'Unlink all datasets in the project missing from the download list.'
         ){ |v| o[:unlink]=v }
-  opt.on("-R", "--remote-list PATH",
-        "Path to an output file with the list of all datasets listed remotely."
+  opt.on('-R', '--remote-list PATH',
+        'Path to an output file with the list of all datasets listed remotely.'
         ){ |v| o[:remote_list]=v }
   opt_common(opt, o)
 end.parse!
-opt_require(o, project: "-P")
-opt_require(o, taxon: "-T") unless o[:reference]
+opt_require(o, project: '-P')
+opt_require(o, taxon: '-T') unless o[:reference]
 unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
-  raise "No action requested. Pick at least one type of genome"
+  raise 'No action requested. Pick at least one type of genome.'
 end
 ##=> Main <=
@@ -57,12 +62,12 @@ ds = {}
 downloaded = 0
 def get_list(taxon, status)
-  url_base = "https://www.ncbi.nlm.nih.gov/genomes/Genome2BE/genome2srv.cgi?"
+  url_base = 'https://www.ncbi.nlm.nih.gov/genomes/Genome2BE/genome2srv.cgi?'
   url_param = if status==:reference
-    { action: "refgenomes", download: "on" }
+    { action: 'refgenomes', download: 'on' }
   else
-    { action: "download", report: "proks", group: "-- All Prokaryotes --",
-          subgroup: "-- All Prokaryotes --", orgn: "#{taxon}[orgn]",
+    { action: 'download', report: 'proks', group: '-- All Prokaryotes --',
+          subgroup: '-- All Prokaryotes --', orgn: "#{taxon}[orgn]",
           status: status }
   end
   url = url_base + URI.encode_www_form(url_param)
@@ -75,14 +80,16 @@ end
 # Download IDs with reference status
 if o[:reference]
-  $stderr.puts "Downloading reference genomes" unless o[:q]
+  $stderr.puts 'Downloading reference genomes' unless o[:q]
   lineno = 0
   get_list(nil, :reference).each_line do |ln|
     next if (lineno+=1)==1
     r = ln.chomp.split("\t")
     next if r[3].nil? or r[3].empty?
-    ids = r[3].split(",")
-    ids += r[5].split(",") unless o[:ignore_plasmids] or r[5].empty?
+    ids = r[3].split(',')
+    ids += r[5].split(',') unless o[:ignore_plasmids] or r[5].empty?
+    ids.delete_if{ |i| i =~ /\A\-*\z/ }
+    next if ids.empty?
     n = r[2].miga_name
     ds[n] = {ids: ids, md: {type: :genome}, db: :nuccore, universe: :ncbi}
   end
@@ -90,22 +97,26 @@ end
 # Download IDs with complete or chromosome status
 if o[:complete] or o[:chromosome]
-  status = (o[:complete] and o[:chromosome] ? "50|40" : o[:complete] ? "50" : "40")
-  $stderr.puts "Downloading complete/chromosome genomes" unless o[:q]
+  status = (o[:complete] and o[:chromosome] ?
+        '50|40' : o[:complete] ? '50' : '40')
+  $stderr.puts 'Downloading complete/chromosome genomes' unless o[:q]
   lineno = 0
   get_list(o[:taxon], status).each_line do |ln|
     next if (lineno+=1)==1
     r = ln.chomp.split("\t")
     next if r[10].nil? or r[10].empty?
-    ids = r[10].gsub(/[^:;]*:/,"").gsub(/\/[^\/;]*/,"").split(";")
-    n = (r[0] + "_" + ids[0]).miga_name
+    ids = r[10].gsub(/[^:;]*:/,'').gsub(/\/[^\/;]*/,'').split(';')
+    ids.delete_if{ |i| i =~ /\A\-*\z/ }
+    next if ids.empty?
+    acc = o[:add_version] ? ids[0] : ids[0].gsub(/\.\d+\Z/,'')
+    n = "#{r[0]}_#{acc}".miga_name
     ds[n] = {ids: ids, md: {type: :genome}, db: :nuccore, universe: :ncbi}
   end
 end
 # Download IDs with scaffold or contig status
 if o[:scaffold] or o[:contig]
-  status = (o[:scaffold] and o[:contig] ? "30|20" : o[:scaffold] ? "30" : "20")
+  status = (o[:scaffold] and o[:contig] ? '30|20' : o[:scaffold] ? '30' : '20')
   $stderr.puts "Downloading scaffold/contig genomes" unless o[:q]
   lineno = 0
   get_list(o[:taxon], status).each_line do |ln|
@@ -113,11 +124,14 @@ if o[:scaffold] or o[:contig]
     r = ln.chomp.split("\t")
     next if r[7].nil? or r[7].empty?
     next if r[19].nil? or r[19].empty?
-    asm = r[7].gsub(/[^:;]*:/,"").gsub(/\/[^\/;]*/,"").gsub(/\s/,"")
-    ids = r[19].gsub(/\s/, "").split(";").map{ |i| i + "/" + File.basename(i) + "_genomic.fna.gz" }
-    n = (r[0] + "_" + asm).miga_name
+    asm = r[7].gsub(/[^:;]*:/,'').gsub(/\/[^\/;]*/,'').gsub(/\s/,'')
+    ids = r[19].gsub(/\s/,'').split(';').delete_if{ |i| i =~ /\A\-*\z/ }.
+          map{ |i| "#{i}/#{File.basename(i)}_genomic.fna.gz" }
+    next if ids.empty?
+    n = "#{r[0]}_#{asm}".miga_name
     comm = "Assembly: #{asm}"
-    ds[n] = {ids: ids, md: {type: :genome, comments: comm}, db: :assembly_gz, universe: :web}
+    ds[n] = {ids: ids, md: {type: :genome, comments: comm},
+          db: :assembly_gz, universe: :web}
   end
 end
@@ -127,17 +141,18 @@ ds.each do |name,body|
   d << name
   puts name
   next unless p.dataset(name).nil?
-  $stderr.puts "  Locating remote dataset." unless o[:q]
+  downloaded += 1
+  next if o[:dry]
+  $stderr.puts '  Locating remote dataset.' unless o[:q]
   rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
-  $stderr.puts "  Creating dataset." unless o[:q]
+  $stderr.puts '  Creating dataset.' unless o[:q]
   rd.save_to(p, name, !o[:query], body[:md])
   p.add_dataset(name)
-  downloaded += 1
 end
 # Finalize
 $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
-$stderr.puts "Datasets downloaded: #{downloaded}" unless o[:q]
+$stderr.puts "Datasets #{"to be " if o[:dry]}downloaded: #{downloaded}" unless o[:q]
 unless o[:remote_list].nil?
   File.open(o[:remote_list], 'w') do |fh|
     d.each { |i| fh.puts i }

data/lib/miga/result/base.rb ADDED Viewed

@@ -0,0 +1,7 @@
+class MiGA::Result < MiGA::MiGA
+end
+module MiGA::Result::Base
+end

data/lib/miga/result/dates.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require "miga/result/base"
+##
+# Helper module including date-specific functions for results.
+module MiGA::Result::Dates
+  include MiGA::Result::Base
+  ##
+  # Returns the start date of processing as DateTime or +nil+ if it doesn't exist.
+  def started_at
+    date_at :start
+  end
+  ##
+  # Returns the end (done) date of processing as DateTime or +nil+ if it doesn't exist.
+  def done_at
+    date_at :done
+  end
+  ##
+  # Time it took for the result to complete as Float in minutes.
+  def running_time
+    a = started_at or return nil
+    b = done_at or return nil
+    (b - a).to_f * 24 * 60
+  end
+  private
+    ##
+    # Internal function to detect start and end dates
+    def date_at(event)
+      f = path event
+      return nil unless File.size? f
+      DateTime.parse File.read(f)
+    end
+end

data/lib/miga/result.rb CHANGED Viewed

@@ -1,10 +1,14 @@
 # @package MiGA
 # @license Artistic-2.0
+require "miga/result/dates"
 ##
 # The result from a task run. It can be project-wide or dataset-specific.
 class MiGA::Result < MiGA::MiGA
+  include MiGA::Result::Dates
   # Class-level
   ##

data/lib/miga/version.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module MiGA
   # - Float representing the major.minor version.
   # - Integer representing gem releases of the current version.
   # - Integer representing minor changes that require new version number.
-  VERSION = [0.3, 1, 6]
+  VERSION = [0.3, 1, 7]
   ##
   # Nickname for the current major.minor version.

data/scripts/essential_genes.bash CHANGED Viewed

@@ -37,10 +37,11 @@ else
 fi
 # Reduce files
-( cd "${DATASET}.ess" \
-    && exists *.faa \
-    && tar -zcf proteins.tar.gz *.faa \
-    && rm *.faa )
+if exists "$DATASET".ess/*.faa ; then
+  ( cd "${DATASET}.ess" \
+      && tar -zcf proteins.tar.gz *.faa \
+      && rm *.faa )
+fi
 # Finalize
 miga date > "$DATASET.done"

data/utils/enveomics/Makefile CHANGED Viewed

@@ -7,7 +7,7 @@ include globals.mk
 TEST=Tests
 enveomics_r=enveomics.R
-enveomics_r_v=enveomics.R_1.1.5
+enveomics_r_v=enveomics.R_1.1.6
 .PHONY: test install install-scripts install-r uninstall install-deps
 test: $(enveomics_r_v).tar.gz

data/utils/enveomics/Manifest/Tasks/aasubs.json CHANGED Viewed

@@ -4,99 +4,99 @@
       "task": "AAsubs.log2ratio.rb",
       "description": ["Estimates the log2-ratio of different amino acids in",
         "homologous sites using an AAsubs file (see BlastPairwise.AAsubs.pl).",
-	"It provides the point estimation (.obs file), the bootstrap of the",
-	"estimation (.boot file) and the null model based on label-permutation",
-	"(.null file)."],
+        "It provides the point estimation (.obs file), the bootstrap of the",
+        "estimation (.boot file) and the null model based on label-permutation",
+        "(.null file)."],
       "see_also": ["BlastPairwise.AAsubs.pl"],
       "cite": [["Konstantinidis et al, 2009, AEM",
         "http://dx.doi.org/10.1128%2FAEM.00473-09"]],
       "help_arg": "--help",
       "options": [
         {
-	  "opt": "--input",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": ["Input file in AAsubs format. It's a tab-delimited",
-	    "table where each line corresponds to a substitution, the first",
-	    "column corresponds to the compared protein IDs, the second",
-	    "and third columns correspond to the AA on each protein, and the",
-	    "fourth column indicates the length of the protein (not used by",
-	    "this script."]
-	},
-	{
-	  "opt": "--obs-file",
-	  "arg": "out_file",
-	  "description": ["Output file with the log2-ratios per amino acid.",
-	    "By default, 'Input value'.obs."]
-	},
-	{
-	  "opt": "--bootstrap-file",
-	  "arg": "out_file",
-	  "description": ["Output file with the bootstrap results of",
-	    "log2-ratios per amino acid. By default, 'Input value'.boot."]
-	},
-	{
-	  "opt": "--null-file",
-	  "arg": "out_file",
-	  "description": ["Output file with the permutation results of",
-	    "log2-ratios per amino acid. By default, 'Input value'.null."]
-	},
-	{
-	  "opt": "--overwrite",
-	  "description": ["Overwrite existing files. By default, skip steps if",
-	    "the files already exist."]
-	},
-	{
-	  "opt": "--bootstraps",
-	  "arg": "integer",
-	  "default": 1000,
-	  "description": "Number of bootstraps to run."
-	},
-	{
-	  "opt": "--permutations",
-	  "arg": "integer",
-	  "default": 1000,
-	  "description": "Number of permutations to run."
-	},
-	{
-	  "opt": "--quiet",
-	  "description": "Run quietly (no STDERR output)."
-	}
+          "opt": "--input",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file in AAsubs format. It's a tab-delimited",
+            "table where each line corresponds to a substitution, the first",
+            "column corresponds to the compared protein IDs, the second",
+            "and third columns correspond to the AA on each protein, and the",
+            "fourth column indicates the length of the protein (not used by",
+            "this script."]
+        },
+        {
+          "opt": "--obs-file",
+          "arg": "out_file",
+          "description": ["Output file with the log2-ratios per amino acid.",
+            "By default, 'Input value'.obs."]
+        },
+        {
+          "opt": "--bootstrap-file",
+          "arg": "out_file",
+          "description": ["Output file with the bootstrap results of",
+            "log2-ratios per amino acid. By default, 'Input value'.boot."]
+        },
+        {
+          "opt": "--null-file",
+          "arg": "out_file",
+          "description": ["Output file with the permutation results of",
+            "log2-ratios per amino acid. By default, 'Input value'.null."]
+        },
+        {
+          "opt": "--overwrite",
+          "description": ["Overwrite existing files. By default, skip steps if",
+            "the files already exist."]
+        },
+        {
+          "opt": "--bootstraps",
+          "arg": "integer",
+          "default": 1000,
+          "description": "Number of bootstraps to run."
+        },
+        {
+          "opt": "--permutations",
+          "arg": "integer",
+          "default": 1000,
+          "description": "Number of permutations to run."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
       ]
     },
     {
       "task": "BlastPairwise.AAsubs.pl",
       "description": ["Counts the different AA substitutions in the best hit",
         "blast alignments, from a BLASTP pairwise format output (-outfmt 0 in",
-	"BLAST+, -m 0 in legacy BLAST)."],
+        "BLAST+, -m 0 in legacy BLAST)."],
       "see_also": ["AAsubs.log2ratio.rb"],
       "cite": [["Konstantinidis et al, 2009, AEM",
         "http://dx.doi.org/10.1128%2FAEM.00473-09"]],
       "help_arg": "",
       "options": [
         {
-	  "name": "Cigar char",
-	  "arg": "select",
-	  "values": ["+","_"],
-	  "mandatory": true,
-	  "description": ["Use '+' for similar substitutions, use '_' for non",
-	    "similar substitutions."]
-	},
-	{
-	  "name": "Blast M0",
-	  "arg": "in_file",
-	  "mandatory": true,
-	  "description": "Blast in 'pairwise text' format (-outfmt/-m 0)."
-	},
-	">",
-	{
-	  "name": "AA subs",
-	  "arg": "out_file",
-	  "mandatory": true,
-	  "description": ["A tab-delimited raw file with one substitution per",
-	    "row and columns: (1) Name-of-query_Name-of-subject, (2)",
-	    "AA-in-subject, (3) AA-in-query, (4) Total-Align-Length."]
-	}
+          "name": "Cigar char",
+          "arg": "select",
+          "values": ["+","_"],
+          "mandatory": true,
+          "description": ["Use '+' for similar substitutions, use '_' for non",
+            "similar substitutions."]
+        },
+        {
+          "name": "Blast M0",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Blast in 'pairwise text' format (-outfmt/-m 0)."
+        },
+        ">",
+        {
+          "name": "AA subs",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": ["A tab-delimited raw file with one substitution per",
+            "row and columns: (1) Name-of-query_Name-of-subject, (2)",
+            "AA-in-subject, (3) AA-in-query, (4) Total-Align-Length."]
+        }
       ]
     }
   ]