RubyGems - miga-base - Versions diffs - 0.7.26.3 → 1.0.0.sr1 - Mend

miga-base 0.7.26.3 → 1.0.0.sr1

Files changed (105) hide show

checksums.yaml +4 -4
data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
data/lib/miga/cli/action/doctor.rb +50 -19
data/lib/miga/cli/action/doctor/base.rb +20 -18
data/lib/miga/cli/action/init.rb +11 -7
data/lib/miga/cli/action/init/files_helper.rb +1 -0
data/lib/miga/cli/action/ncbi_get.rb +3 -3
data/lib/miga/cli/action/tax_dist.rb +2 -2
data/lib/miga/cli/action/wf.rb +5 -4
data/lib/miga/daemon.rb +11 -4
data/lib/miga/dataset/result.rb +10 -6
data/lib/miga/json.rb +1 -2
data/lib/miga/metadata.rb +5 -1
data/lib/miga/parallel.rb +11 -6
data/lib/miga/project.rb +8 -8
data/lib/miga/project/base.rb +4 -4
data/lib/miga/project/result.rb +2 -2
data/lib/miga/sqlite.rb +7 -0
data/lib/miga/version.rb +23 -9
data/scripts/aai_distances.bash +16 -18
data/scripts/ani_distances.bash +16 -17
data/scripts/assembly.bash +31 -16
data/scripts/haai_distances.bash +3 -27
data/scripts/miga.bash +6 -4
data/scripts/p.bash +1 -1
data/scripts/read_quality.bash +9 -18
data/scripts/trimmed_fasta.bash +14 -30
data/scripts/trimmed_reads.bash +36 -36
data/test/parallel_test.rb +31 -0
data/test/project_test.rb +2 -1
data/utils/distance/commands.rb +1 -0
data/utils/distance/runner.rb +2 -4
data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
data/utils/enveomics/Manifest/Tasks/other.json +77 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
data/utils/enveomics/Manifest/categories.json +13 -4
data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
data/utils/enveomics/Scripts/SRA.download.bash +6 -8
data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
data/utils/enveomics/Scripts/aai.rb +3 -2
data/utils/enveomics/Scripts/anir.rb +137 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
data/utils/enveomics/Scripts/rbm.rb +87 -133
data/utils/enveomics/Scripts/sam.filter.rb +148 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
data/utils/enveomics/enveomics.R/R/utils.R +30 -0
data/utils/enveomics/enveomics.R/README.md +1 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +67 -0
data/utils/multitrim/multitrim.py +1555 -0
data/utils/multitrim/multitrim.yml +13 -0
data/utils/requirements.txt +4 -3
metadata +33 -6
data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30

data/utils/enveomics/Manifest/categories.json CHANGED Viewed

@@ -24,11 +24,13 @@
         "BlastTab.pairedHits.rb",
         "BlastTab.subsample.pl",
         "BlastTab.taxid2taxrank.pl",
-        "BlastTab.topHits_sorted.rb"
+        "BlastTab.topHits_sorted.rb",
+        "sam.filter.rb"
       ],
       "Execution": [
         "aai.rb",
         "ani.rb",
+        "anir.rb",
         "HMM.haai.rb",
         "rbm.rb"
       ]
@@ -58,9 +60,11 @@
         "FastA.split.rb",
         "FastA.subsample.pl",
         "FastA.tag.rb",
+        "FastA.toFastQ.rb",
         "FastA.wrap.rb",
         "FastQ.filter.pl",
         "FastQ.interpose.pl",
+        "FastQ.maskQual.rb",
         "FastQ.offset.pl",
         "FastQ.split.pl",
         "FastQ.tag.rb",
@@ -71,11 +75,13 @@
       "Community": [
         "AlphaDiversity.pl",
         "Chao1.pl",
-        "Table.barplot.R"
+        "Table.barplot.R",
+        "Table.prefScore.R"
       ],
       "Population": [
         "VCF.SNPs.rb",
-        "VCF.KaKs.rb"
+        "VCF.KaKs.rb",
+        "Table.prefScore.R"
       ]
     },
     "Annotation": {
@@ -143,13 +149,16 @@
         "clust.rand.rb"
       ],
       "Read recruitments": [
+        "anir.rb",
         "BedGraph.tad.rb",
         "BedGraph.window.rb",
         "BlastTab.catsbj.pl",
         "BlastTab.pairedHits.rb",
         "BlastTab.recplot2.R",
+        "FastQ.test-error.rb",
         "GFF.catsbj.pl",
-        "RecPlot2.compareIdentities.R"
+        "RecPlot2.compareIdentities.R",
+        "sam.filter.rb"
       ]
     }
   }

data/utils/enveomics/Scripts/Aln.cat.rb CHANGED Viewed

@@ -1,163 +1,221 @@
 #!/usr/bin/env ruby
-#
 # @author  Luis M. Rodriguez-R
-# @update  Nov-30-2015
 # @license artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
-require "enveomics_rb/enveomics"
+$VERSION = 1.0
+$:.push File.expand_path('../lib', __FILE__)
+require 'enveomics_rb/enveomics'
+o = {
+  q: false, missing: '-', model: 'AUTO', removeinvar: false, undefined: '-.Xx?'
+}
-o = {:q=>false, :missing=>"-", :model=>"AUTO", :removeinvar=>false,
-   :undefined=>"-.Xx?"}
 OptionParser.new do |opt|
-   opt.banner = "
-   Concatenates several multiple alignments in FastA format into a single
-   multiple alignment.  The IDs of the sequences (or the ID prefixes, if using
-   --ignore-after) must coincide across files.
-   Usage: #{$0} [options] aln1.fa aln2.fa ... > aln.fa".gsub(/^ +/,"")
-   opt.separator ""
-   opt.on("-c", "--coords FILE",
-      "Output file of coordinates in RAxML-compliant format."
-      ){ |v| o[:coords]=v }
-   opt.on("-i", "--ignore-after STRING",
-      "Remove everything in the IDs after the specified string."
-      ){ |v| o[:ignoreafter]=v }
-   opt.on("-I", "--remove-invariable", "Remove invariable sites.",
-      "Note: Invariable sites are defined as columns with only one state and",
-      "undefined characters.  Additional ambiguous characters may exist and",
-      "should be declared using --undefined."){ |v| o[:removeinvar]=v }
-   opt.on("-u", "--missing-char CHAR",
-      "Character denoting missing data. By default: '#{o[:missing]}'.") do |v|
-	 abort "Missing positions can only be denoted by single characters, " +
-	    "offending value: '#{v}'." if v.length != 1
-	 o[:missing]=v
-      end
-   opt.on("-m", "--model STRING",
-      "Name of the model to use if --coords is used. See RAxML's docs; ",
-      "supported values in v8+ include:",
-      "o For DNA alignments:",
-      "  'DNA[F|X]', or 'DNA[F|X]/3' (to estimate rates per codon position,",
-      "  particular notation for this script).",
-      "o General protein alignments:",
-      "  'AUTO' (default in this script), 'DAYHOFF' (1978), 'DCMUT' (MBE 2005;",
-      "  22(2):193-199), 'JTT' (Nat 1992;358:86-89), 'VT' (JCompBiol 2000;",
-      "  7(6):761-776), 'BLOSUM62' (PNAS 1992;89:10915), and 'LG' (MBE 2008;",
-      "  25(7):1307-1320).",
-      "o Specialized protein alignments:",
-      "  'MTREV' (mitochondrial, JME 1996;42(4):459-468), 'WAG' (globular, MBE",
-      "  2001;18(5):691-699), 'RTREV' (retrovirus, JME 2002;55(1):65-73), ",
-      "  'CPREV' (chloroplast, JME 2000;50(4):348-358), and 'MTMAM' (nuclear",
-      "  mammal proteins, JME 1998;46(4):409-418)."){|v| o[:model]=v}
-   opt.on("--undefined STRING",
-      "All characters to be regarded as 'undefined'. It should include all",
-      "ambiguous and missing data chars.  Ignored unless --remove-invariable.",
-      "By default: '#{o[:undefined]}'."){|v| o[:undefined]=v}
-   opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
-   opt.on("-h", "--help", "Display this screen.") do
-      puts opt
-      exit
-   end
-   opt.separator ""
+  cmd = File.basename($0)
+  opt.banner = <<~BANNER
+    [Enveomics Collection: #{cmd} v#{$VERSION}]
+    Concatenates several multiple alignments in FastA format into a single
+    multiple alignment.  The IDs of the sequences (or the ID prefixes, if using
+    --ignore-after) must coincide across files.
+    Usage: #{cmd} [options] aln1.fa aln2.fa ... > aln.fa
+  BANNER
+  opt.on(
+    '-c', '--coords FILE',
+    'Output file of coordinates in RAxML-compliant format'
+  ) { |v| o[:coords] = v }
+  opt.on(
+    '-i', '--ignore-after STRING',
+    'Remove everything in the IDs after the specified string'
+  ) { |v| o[:ignoreafter] = v }
+  opt.on(
+    '-I', '--remove-invariable', 'Remove invariable sites',
+    'Note: Invariable sites are defined as columns with only one state and',
+    'undefined characters.  Additional ambiguous characters may exist and',
+    'should be declared using --undefined'
+  ) { |v| o[:removeinvar] = v }
+  opt.on(
+    '-u', '--missing-char CHAR',
+    "Character denoting missing data. By default: '#{o[:missing]}'"
+  ) do |v|
+    if v.length != 1
+      abort "-missing-char can only be denoted by single characters: #{v}"
+    end
+    o[:missing] = v
+  end
+  opt.on(
+    '-m', '--model STRING',
+    'Name of the model to use if --coords is used. See RAxML docs;',
+    'supported values in v8+ include:',
+    '~ For DNA alignments:',
+    '  "DNA[F|X]", or "DNA[F|X]/3" (to estimate rates per codon position,',
+    '  particular notation for this script)',
+    '~ General protein alignments:',
+    '  "AUTO" (default in this script), "DAYHOFF" (1978), "DCMUT" (MBE 2005;',
+    '  22(2):193-199), "JTT" (Nat 1992;358:86-89), "VT" (JCompBiol 2000;',
+    '  7(6):761-776), "BLOSUM62" (PNAS 1992;89:10915), and "LG" (MBE 2008;',
+    '  25(7):1307-1320)',
+    '~ Specialized protein alignments:',
+    '  "MTREV" (mitochondrial, JME 1996;42(4):459-468), "WAG" (globular, MBE',
+    '  2001;18(5):691-699), "RTREV" (retrovirus, JME 2002;55(1):65-73),',
+    '  "CPREV" (chloroplast, JME 2000;50(4):348-358), and "MTMAM" (nuclear',
+    '  mammal proteins, JME 1998;46(4):409-418)'
+  ) { |v| o[:model] = v }
+  opt.on(
+    '--undefined STRING',
+    'All characters to be regarded as "undefined". It should include all',
+    'ambiguous and missing data chars.  Ignored unless --remove-invariable',
+    "By default: '#{o[:undefined]}'"
+  ) { |v| o[:undefined] = v }
+  opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
+  opt.on('-V', '--version', 'Returns version') { puts $VERSION ; exit }
+  opt.on('-h', '--help', 'Display this screen') { puts opt ; exit }
+  opt.separator ''
 end.parse!
-alns = ARGV
-abort "Alignment files are mandatory" if alns.nil? or alns.empty?
+files = ARGV
+abort 'Alignment files are mandatory' if files.nil? || files.empty?
+$QUIET = o[:q]
-##### MAIN:
-begin
-   $stderr.puts "Reading." unless o[:q]
-   a = {}
-   n = alns.size-1
-   lengths = []
-   (0 .. n).each do |i|
-      key = nil
-      File.open(alns[i],"r").each do |ln|
-	 ln.chomp!
-	 if ln =~ /^>(\S+)/
-	    key = $1
-	    key.sub!(/#{o[:ignoreafter]}.*/,"") unless o[:ignoreafter].nil?
-	    a[key] ||= []
-	    a[key][i] = ""
-	 else
-	    abort "#{alns[i]}: Leading line is not a def-line, is this a "+
-	       "valid FastA file?" if key.nil?
-	    ln.gsub!(/\s/,"")
-	    a[key][i] += ln
-	 end
+# Read individual gene alignments and return them as a single hash with genome
+# IDs as keys and arrays of single-line strings as values
+#
+# IDs are trimmed after the first occurrence of +ignoreafter+, if defined
+def read_alignments(files, ignoreafter = nil)
+  aln = {}
+  files.each_with_index do |file, i|
+    key = nil
+    File.open(file, 'r').each do |ln|
+      ln.chomp!
+      if ln =~ /^>(\S+)/
+        key = $1
+        key.sub!(/#{ignoreafter}.*/, '') if ignoreafter
+        aln[key] ||= []
+        aln[key][i] = ''
+      else
+        if key.nil?
+          abort "Invalid FastA file: #{file}: Leading line not a def-line"
+        end
+        ln.gsub!(/\s/, '')
+        aln[key][i] += ln
       end
-      abort "#{alns[i]}: Empty alignment?" if key.nil?
-      lengths[i] = a[key][i].length
-   end
-   if o[:removeinvar]
-      $stderr.puts "Removing invariable sites." unless o[:q]
-      invs = 0
-      (0 .. n).each do |i|
-	 olen = lengths[i]
-	 (0 .. (lengths[i]-1)).each do |pos|
-	    chr = nil
-	    inv = true
-	    a.keys.each do |key|
-	       next if a[key][i].nil?
-	       chr = a[key][i][pos] if
-		  chr.nil? or o[:undefined].chars.include? chr
-	       if chr != a[key][i][pos] and
-		     not o[:undefined].chars.include? a[key][i][pos]
-		  inv = false
-		  break
-	       end
-	    end
-	    if inv
-	       a.keys.each{|key| a[key][i][pos]="!" unless a[key][i].nil?}
-	       lengths[i] -= 1
-	       invs += 1
-	    end
-	 end
-	 a.keys.each{|key| a[key][i].gsub!("!", "") unless a[key][i].nil?}
+    end
+    abort "Empty alignment file: #{file}" if key.nil?
+  end
+  aln
+end
+# Remove invariable sites from the alignment hash +aln+, using +undefined+ as
+# a string including all characters representing undefined positions (e.g., X)
+#
+# Returns number of columns removed
+def remove_invariable(aln, undefined)
+  invs = 0
+  lengths = aln.values.first.map(&:length)
+  undef_chars = undefined.chars
+  lengths.each_with_index do |len, i|
+    (0 .. len - 1).each do |pos|
+      chr = nil
+      inv = true
+      aln.each_key do |key|
+        next if aln[key][i].nil?
+        chr = aln[key][i][pos] if chr.nil? || undefined.chars.include?(chr)
+        if chr != aln[key][i][pos] && !undef_chars.include?(aln[key][i][pos])
+          inv = false
+          break
+        end
       end
-      $stderr.puts "  Removed #{invs} sites." unless o[:q]
-   end
-   $stderr.puts "Concatenating." unless o[:q]
-   a.keys.each do |key|
-      (0 .. n).each do |i|
-	 a[key][i] = (o[:missing] * lengths[i]) if a[key][i].nil?
+      if inv
+        aln.each_key { |key| aln[key][i][pos] = '!' unless aln[key][i].nil? }
+        lengths[i] -= 1
+        invs += 1
       end
-      abort "Inconsistent lengths in '#{key}'
-      exp:#{lengths.join(" ")}
-      obs:#{a[key].map{|i| i.length}.join(" ")}." unless
-	 lengths == a[key].map{|i| i.length}
-      puts ">#{key}", a[key].join("").gsub(/(.{1,60})/, "\\1\n")
-      a.delete(key)
-   end
-   $stderr.puts "  #{lengths.inject(:+)} columns." unless o[:q]
-   unless o[:coords].nil?
-      $stderr.puts "Generating coordinates." unless o[:q]
-      coords = File.open(o[:coords],"w")
-      s = 0
-      names = (alns.map do |a|
-	 File.basename(a).gsub(/\..*/,"").gsub(/[^A-Za-z0-9_]/,"_")
-      end)
-      (0 .. n).each do |i|
-	 l = lengths[i]
-	 next unless l > 0
-	 names[i] += "_#{i}" while names.count(names[i])>1
-	 if o[:model] =~ /(DNA.?)\/3/
-	    coords.puts "#{$1}, #{names[i]}codon1 = #{s+1}-#{s+l}\\3"
-	    coords.puts "#{$1}, #{names[i]}codon2 = #{s+2}-#{s+l}\\3"
-	    coords.puts "#{$1}, #{names[i]}codon3 = #{s+3}-#{s+l}\\3"
-	 else
-	    coords.puts "#{o[:model]}, #{names[i]} = #{s+1}-#{s+l}"
-	 end
-	 s += l
+    end
+    aln.each_key { |key| aln[key][i].gsub!('!', '') unless aln[key][i].nil? }
+  end
+  invs
+end
+# Concatenate the alignments hash +aln+ using the character +missing+ to
+# indicate missing alignments, and send each entry in the concatenated alignment
+# to +blk+ as two variables: key (name) and value (alignment string)
+#
+# Returns an array with the lengths of each individual alignment
+def concatenate(aln, missing, &blk)
+  say 'Concatenating'
+  lengths = aln.values.first.map(&:length)
+  aln.each_key do |key|
+    # Pad missing entries
+    lengths.each_with_index { |len, i| aln[key][i] ||= missing * len }
+    # Check length
+    obs_len = aln[key].map(&:length)
+    unless lengths == obs_len
+      abort "Inconsistent lengths in '#{key}'\nexp: #{lengths}\nobs: #{obs_len}"
+    end
+    # Pass entry to the block and remove from alignment hash
+    blk[key, aln[key].join('')]
+    aln.delete(key)
+  end
+  lengths
+end
+# Save the coordinates in +file+ based on +files+ paths (for the names), and
+# using +lengths+ individual alignment lengths
+#
+# The saved format is RAxML coords, including the +model+ for each alignment
+def save_coords(file, names, lengths, model)
+  File.open(file, 'w') do |fh|
+    s = 0
+    names.each_with_index do |name, i|
+      l = lengths[i]
+      next unless l > 0
+      name += "_#{i}" while names.count(name) > 1
+      if model =~ /(DNA.?)\/3/
+        fh.puts "#{$1}, #{name}codon1 = #{s + 1}-#{s + l}\\3"
+        fh.puts "#{$1}, #{name}codon2 = #{s + 2}-#{s + l}\\3"
+        fh.puts "#{$1}, #{name}codon3 = #{s + 3}-#{s + l}\\3"
+      else
+        fh.puts "#{model}, #{name} = #{s + 1}-#{s + l}"
       end
-      coords.close
-   end
-   # Save the output matrix
-   $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
+      s += l
+    end
+  end
 end
+# ------ MAIN ------
+begin
+  say 'Reading'
+  alignments = read_alignments(files, o[:ignoreafter])
+  if o[:removeinvar]
+    say 'Removing invariable sites'
+    inv = remove_invariable(alignments, o[:undefined])
+    say "  Removed #{inv} sites"
+  end
+  lengths = concatenate(alignments, o[:missing]) do |name, seq|
+    puts ">#{name}", seq.gsub(/(.{1,60})/, "\\1\n")
+  end
+  say "  #{lengths.inject(:+)} columns"
+  unless o[:coords].nil?
+    say 'Generating coordinates'
+    names = files.map do |i|
+      File.basename(i).gsub(/\..*/, '').gsub(/[^A-Za-z0-9_]/, '_')
+    end
+    save_coords(o[:coords], names, lengths, o[:model])
+  end
+  $stderr.puts 'Done' unless o[:q]
+rescue => err
+  $stderr.puts "Exception: #{err}\n\n"
+  err.backtrace.each { |l| $stderr.puts l + "\n" }
+  err
+end

data/utils/enveomics/Scripts/FastA.N50.pl CHANGED Viewed

@@ -1,9 +1,8 @@
 #!/usr/bin/env perl
-#
 # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update: Oct 07 2015
-# @license: artistic license 2.0
-#
+# @license: Artistic-2.0
 use strict;
 use warnings;
 use List::Util qw/sum min max/;
@@ -11,46 +10,51 @@ use List::Util qw/sum min max/;
 my ($seqs, $minlen, $n__) = @ARGV;
 $seqs or die "
 Description:
-   Calculates the N50 value of a set of sequences.  Alternatively, it
-   can calculate other N** values.  It also calculates the total number
-   of sequences and the total added length.
+  Calculates the N50 value of a set of sequences.  Alternatively, it
+  can calculate other N** values.  It also calculates the total number
+  of sequences, the total added length, and the longest sequence length.
 Usage:
-   $0 seqs.fa[ minlen[ **]]
+  $0 seqs.fa [minlen [**]]
+  seqs.fa	A FastA file containing the sequences
+  minlen	(optional) The minimum length to take into consideration
+  		By default: 0
+  **		(optional) Value N** to calculate.  By default: 50 (N50)
-   seqs.fa	A FastA file containing the sequences.
-   minlen	(optional) The minimum length to take into consideration.
-   		By default: 0.
-   **		Value N** to calculate.  By default: 50 (N50).
 ";
 $minlen ||= 0;
 $n__    ||= 50;
 my @len = ();
 open SEQ, "<", $seqs or die "Cannot open file: $seqs: $!\n";
 while(<SEQ>){
-   if(/^>/){
-      push @len, 0;
-   }else{
-      next if /^;/;
-      chomp;
-      s/\W//g;
-      $len[-1]+=length $_;
-   }
+  if(/^>/){
+    push @len, 0;
+  }else{
+    next if /^;/;
+    chomp;
+    s/\W//g;
+    $len[-1] += length $_;
+  }
 }
 close SEQ;
-@len = sort { $a <=> $b } map { $_>=$minlen?$_:() } @len;
+@len = sort { $a <=> $b } map { $_ >= $minlen ? $_ : () } @len;
 my $tot = (sum(@len) || 0);
-my $thr = $n__*$tot/100;
+my $thr = $n__ * $tot / 100;
 my $pos = 0;
 for(@len){
-   $pos+= $_;
-   if($pos>=$thr){
-      print "N$n__: $_\n";
-      last;
-   }
+  $pos += $_;
+  if($pos >= $thr){
+    print "N$n__: $_\n";
+    last;
+  }
 }
-print "Sequences: ".scalar(@len)."\n";
+print "Sequences: " . scalar(@len) . "\n";
 print "Total length: $tot\n";
+print "Longest sequence: " . pop(@len) . "\n";