RubyGems - miga-base - Versions diffs - 0.7.26.2 → 1.0.3.0 - Mend

miga-base 0.7.26.2 → 1.0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

checksums.yaml +4 -4
data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
data/lib/miga/cli/action/classify_wf.rb +2 -2
data/lib/miga/cli/action/derep_wf.rb +1 -1
data/lib/miga/cli/action/doctor.rb +57 -14
data/lib/miga/cli/action/doctor/base.rb +47 -23
data/lib/miga/cli/action/env.rb +26 -0
data/lib/miga/cli/action/init.rb +11 -7
data/lib/miga/cli/action/init/files_helper.rb +1 -0
data/lib/miga/cli/action/ncbi_get.rb +3 -3
data/lib/miga/cli/action/tax_dist.rb +2 -2
data/lib/miga/cli/action/wf.rb +5 -4
data/lib/miga/cli/base.rb +1 -0
data/lib/miga/common.rb +1 -0
data/lib/miga/daemon.rb +11 -4
data/lib/miga/dataset/result.rb +10 -6
data/lib/miga/json.rb +5 -4
data/lib/miga/metadata.rb +5 -1
data/lib/miga/parallel.rb +36 -0
data/lib/miga/project.rb +8 -8
data/lib/miga/project/base.rb +4 -4
data/lib/miga/project/result.rb +2 -2
data/lib/miga/sqlite.rb +10 -2
data/lib/miga/version.rb +23 -9
data/scripts/aai_distances.bash +16 -18
data/scripts/ani_distances.bash +16 -17
data/scripts/assembly.bash +31 -16
data/scripts/haai_distances.bash +3 -27
data/scripts/miga.bash +12 -8
data/scripts/p.bash +1 -1
data/scripts/read_quality.bash +9 -18
data/scripts/trimmed_fasta.bash +14 -30
data/scripts/trimmed_reads.bash +36 -36
data/test/parallel_test.rb +31 -0
data/test/project_test.rb +2 -1
data/test/remote_dataset_test.rb +1 -1
data/utils/distance/commands.rb +1 -0
data/utils/distance/database.rb +0 -1
data/utils/distance/runner.rb +2 -4
data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
data/utils/enveomics/Manifest/Tasks/other.json +77 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
data/utils/enveomics/Manifest/categories.json +13 -4
data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
data/utils/enveomics/Scripts/SRA.download.bash +6 -8
data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
data/utils/enveomics/Scripts/aai.rb +3 -2
data/utils/enveomics/Scripts/anir.rb +137 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
data/utils/enveomics/Scripts/rbm.rb +87 -133
data/utils/enveomics/Scripts/sam.filter.rb +148 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
data/utils/enveomics/enveomics.R/R/utils.R +30 -0
data/utils/enveomics/enveomics.R/README.md +1 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +67 -0
data/utils/multitrim/multitrim.py +1555 -0
data/utils/multitrim/multitrim.yml +13 -0
data/utils/requirements.txt +4 -3
data/utils/subclade/pipeline.rb +2 -2
metadata +33 -4
data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30

data/utils/enveomics/Manifest/Tasks/other.json CHANGED Viewed

@@ -824,6 +824,83 @@
           "description": "Features to map in GFF."
         }
       ]
+    },
+    {
+      "task": "Table.prefScore.R",
+      "description": ["Estimate preference score of species based on occupancy",
+        "in biased sample sets."],
+      "help_arg": "--help",
+      "requires": [ { "r_package": "optparse" } ],
+      "options": [
+        {
+          "name": "Occupancy matrix",
+          "opt": "--x",
+          "arg": "in_file",
+          "description": ["A tab-delimited table of presence/absence (1/0)",
+            "with species as rows and samples as columns."],
+          "mandatory": true
+        },
+        {
+          "name": "Sample set",
+          "opt": "--set",
+          "arg": "in_file",
+          "description": ["A list of sample names that constitute the test",
+            "set, one per line."],
+          "mandatory": true
+        },
+        {
+          "opt": "--ignore",
+          "arg": "in_file",
+          "description": ["A list of species to exclude from the analysis,",
+            "one per line."]
+        },
+        {
+          "name": "Significance threshold",
+          "opt": "--signif-thr",
+          "arg": "float",
+          "description": "Absolute value of the significance threshold."
+        },
+        {
+          "opt": "--col-above",
+          "arg": "string",
+          "description": "Color for points significantly above zero.",
+          "default": "#941100"
+        },
+        {
+          "opt": "--col-equal",
+          "arg": "string",
+          "description": ["Color for points not significantly different from",
+            "zero."],
+          "default": "#BDBDBD"
+        },
+        {
+          "opt": "--col-below",
+          "arg": "string",
+          "description": "Color for points significantly below zero.",
+          "default": "#2F5496"
+        },
+        {
+          "name": "Output preference scores",
+          "arg": "out_file",
+          "description": "Output raw-text file with preference scores.",
+          "mandatory": true
+        },
+        {
+          "name": "Graphical utput",
+          "arg": "out_file",
+          "description": "Output PDF file with preference scores plot."
+        },
+        {
+          "name": "Width",
+          "arg": "float",
+          "description": "Width of the plot in inches (7 by default)."
+        },
+        {
+          "name": "Height",
+          "arg": "float",
+          "description": "Height of the plot in inches (7 by default)."
+        }
+      ]
     }
   ]
 }

data/utils/enveomics/Manifest/Tasks/sequence-identity.json CHANGED Viewed

@@ -362,6 +362,139 @@
         }
       ]
     },
+    {
+      "task": "anir.rb",
+      "description": ["Estimates ANIr: the Average Nucleotide Identity of",
+        "reads against a genome."],
+      "help_arg": "--help",
+      "see_also": ["ani.rb", "sam.filter.rb"],
+      "options": [
+        {
+          "opt": "--reads",
+          "arg": "in_file",
+          "description": "Metagenomic reads."
+        },
+        {
+          "opt": "--genome",
+          "arg": "in_file",
+          "description": "Genome assembly."
+        },
+        {
+          "opt": "--mapping",
+          "arg": "in_file",
+          "description": "Mapping file."
+        },
+        {
+          "opt": "--list",
+          "arg": "in_file",
+          "description": "Output file with identities."
+        },
+        {
+          "opt": "--hist",
+          "arg": "in_file",
+          "description": "Output file with histogram."
+        },
+        {
+          "opt": "--tab",
+          "arg": "out_file",
+          "description": "Output file with results in tabular format."
+        },
+        {
+          "name": "Reads format",
+          "opt": "--r-format",
+          "arg": "select",
+          "description": ["Metagenomic reads format: fastq or fasta.",
+            "Both options support compression with .gz file extension."],
+          "values": ["fastq", "fasta"],
+          "default": "fastq"
+        },
+        {
+          "name": "Reads type",
+          "opt": "--r-type",
+          "arg": "select",
+          "description": ["Type of metagenomic reads: Single reads (single),",
+            "coupled reads in separate files (-m must be comma-delimited;",
+            "coupled), or coupled reads in a single interposed file",
+            "(interleaved)."],
+          "values": ["single", "coupled", "interleaved"],
+          "default": "single"
+        },
+        {
+          "name": "Genome format",
+          "opt": "--g-format",
+          "arg": "select",
+          "description": ["Genome assembly format: fasta or list.",
+            "Both options support compression with .gz file extension.",
+            "If passed in mapping-read mode, filters only matches to these",
+            "contigs."],
+          "values": ["fasta", "list"],
+          "default": "fasta"
+        },
+        {
+          "name": "Mapping format",
+          "opt": "--m-format",
+          "arg": "select",
+          "description": ["Mapping file format: sam, bam, tab, or list.",
+            "All except bam support compression with .gz file extension."],
+          "values": ["sam", "bam", "tab", "list"],
+          "default": "sam"
+        },
+        {
+          "opt": "--identity",
+          "arg": "float",
+          "description": "Set a fixed threshold of percent identity.",
+          "default": 95.0
+        },
+        {
+          "opt": "--algorithm",
+          "arg": "select",
+          "description": ["Set an algorithm to automatically detect identity",
+            "threshold: Valley detection by E-M of Gaussian Mixture Model",
+            "(gmm), fixed threshold (see Identity; fix),",
+            "Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
+          "values": ["gmm", "fix", "auto"],
+          "default": "auto"
+        },
+        {
+          "opt": "--bimodality",
+          "arg": "float",
+          "description": ["Threshold of bimodality below which the algorithm",
+            "is set to fix. The coefficient used is the de Michele & Accantino",
+            "(2014) B index."],
+          "default": 0.5
+        },
+        {
+          "opt": "--coefficient",
+          "arg": "select",
+          "description": ["Coefficient of bimodality for Algorithm auto: ",
+            "Sarle's bimodality coefficient b (sarle), or",
+            "de Michele and Accatino (2014 PLoS ONE) B index",
+            "(use with Bimodality 0.1, dma)."],
+          "values": ["sarle", "dma"],
+          "default": "sarle"
+        },
+        {
+          "opt": "--bin-size",
+          "arg": "float",
+          "description": "Width of histogram bins (in percent identity).",
+          "default": 1.0
+        },
+        {
+          "opt": "--threads",
+          "arg": "integer",
+          "description": "Threads to use."
+        },
+        {
+          "opt": "--log",
+          "arg": "out_file",
+          "description": "Log file to save output."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly."
+        }
+      ]
+    },
     {
       "task": "HMM.haai.rb",
       "description": ["Estimates Average Amino Acid Identity (AAI) from the",
@@ -407,10 +540,14 @@
         "sequences."],
       "help_arg": "--help",
       "cite":[
+        ["Camacho et al, 2009, BMC Bioinf (BLAST+)",
+          "https://doi.org/10.1186/1471-2105-10-421"],
         ["Altschul et al, 2000, JMB (BLAST)",
           "http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
         ["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
-          "https://dx.doi.org/10.1038/nmeth.3176"]
+          "https://dx.doi.org/10.1038/nmeth.3176"],
+        ["Kent, 2002, Genome Res (BLAT)",
+          "https://doi.org/10.1101/gr.229202"]
       ],
       "options": [
         {

data/utils/enveomics/Manifest/categories.json CHANGED Viewed

@@ -24,11 +24,13 @@
         "BlastTab.pairedHits.rb",
         "BlastTab.subsample.pl",
         "BlastTab.taxid2taxrank.pl",
-        "BlastTab.topHits_sorted.rb"
+        "BlastTab.topHits_sorted.rb",
+        "sam.filter.rb"
       ],
       "Execution": [
         "aai.rb",
         "ani.rb",
+        "anir.rb",
         "HMM.haai.rb",
         "rbm.rb"
       ]
@@ -58,9 +60,11 @@
         "FastA.split.rb",
         "FastA.subsample.pl",
         "FastA.tag.rb",
+        "FastA.toFastQ.rb",
         "FastA.wrap.rb",
         "FastQ.filter.pl",
         "FastQ.interpose.pl",
+        "FastQ.maskQual.rb",
         "FastQ.offset.pl",
         "FastQ.split.pl",
         "FastQ.tag.rb",
@@ -71,11 +75,13 @@
       "Community": [
         "AlphaDiversity.pl",
         "Chao1.pl",
-        "Table.barplot.R"
+        "Table.barplot.R",
+        "Table.prefScore.R"
       ],
       "Population": [
         "VCF.SNPs.rb",
-        "VCF.KaKs.rb"
+        "VCF.KaKs.rb",
+        "Table.prefScore.R"
       ]
     },
     "Annotation": {
@@ -143,13 +149,16 @@
         "clust.rand.rb"
       ],
       "Read recruitments": [
+        "anir.rb",
         "BedGraph.tad.rb",
         "BedGraph.window.rb",
         "BlastTab.catsbj.pl",
         "BlastTab.pairedHits.rb",
         "BlastTab.recplot2.R",
+        "FastQ.test-error.rb",
         "GFF.catsbj.pl",
-        "RecPlot2.compareIdentities.R"
+        "RecPlot2.compareIdentities.R",
+        "sam.filter.rb"
       ]
     }
   }

data/utils/enveomics/Scripts/Aln.cat.rb CHANGED Viewed

@@ -1,163 +1,221 @@
 #!/usr/bin/env ruby
-#
 # @author  Luis M. Rodriguez-R
-# @update  Nov-30-2015
 # @license artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
-require "enveomics_rb/enveomics"
+$VERSION = 1.0
+$:.push File.expand_path('../lib', __FILE__)
+require 'enveomics_rb/enveomics'
+o = {
+  q: false, missing: '-', model: 'AUTO', removeinvar: false, undefined: '-.Xx?'
+}
-o = {:q=>false, :missing=>"-", :model=>"AUTO", :removeinvar=>false,
-   :undefined=>"-.Xx?"}
 OptionParser.new do |opt|
-   opt.banner = "
-   Concatenates several multiple alignments in FastA format into a single
-   multiple alignment.  The IDs of the sequences (or the ID prefixes, if using
-   --ignore-after) must coincide across files.
-   Usage: #{$0} [options] aln1.fa aln2.fa ... > aln.fa".gsub(/^ +/,"")
-   opt.separator ""
-   opt.on("-c", "--coords FILE",
-      "Output file of coordinates in RAxML-compliant format."
-      ){ |v| o[:coords]=v }
-   opt.on("-i", "--ignore-after STRING",
-      "Remove everything in the IDs after the specified string."
-      ){ |v| o[:ignoreafter]=v }
-   opt.on("-I", "--remove-invariable", "Remove invariable sites.",
-      "Note: Invariable sites are defined as columns with only one state and",
-      "undefined characters.  Additional ambiguous characters may exist and",
-      "should be declared using --undefined."){ |v| o[:removeinvar]=v }
-   opt.on("-u", "--missing-char CHAR",
-      "Character denoting missing data. By default: '#{o[:missing]}'.") do |v|
-	 abort "Missing positions can only be denoted by single characters, " +
-	    "offending value: '#{v}'." if v.length != 1
-	 o[:missing]=v
-      end
-   opt.on("-m", "--model STRING",
-      "Name of the model to use if --coords is used. See RAxML's docs; ",
-      "supported values in v8+ include:",
-      "o For DNA alignments:",
-      "  'DNA[F|X]', or 'DNA[F|X]/3' (to estimate rates per codon position,",
-      "  particular notation for this script).",
-      "o General protein alignments:",
-      "  'AUTO' (default in this script), 'DAYHOFF' (1978), 'DCMUT' (MBE 2005;",
-      "  22(2):193-199), 'JTT' (Nat 1992;358:86-89), 'VT' (JCompBiol 2000;",
-      "  7(6):761-776), 'BLOSUM62' (PNAS 1992;89:10915), and 'LG' (MBE 2008;",
-      "  25(7):1307-1320).",
-      "o Specialized protein alignments:",
-      "  'MTREV' (mitochondrial, JME 1996;42(4):459-468), 'WAG' (globular, MBE",
-      "  2001;18(5):691-699), 'RTREV' (retrovirus, JME 2002;55(1):65-73), ",
-      "  'CPREV' (chloroplast, JME 2000;50(4):348-358), and 'MTMAM' (nuclear",
-      "  mammal proteins, JME 1998;46(4):409-418)."){|v| o[:model]=v}
-   opt.on("--undefined STRING",
-      "All characters to be regarded as 'undefined'. It should include all",
-      "ambiguous and missing data chars.  Ignored unless --remove-invariable.",
-      "By default: '#{o[:undefined]}'."){|v| o[:undefined]=v}
-   opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
-   opt.on("-h", "--help", "Display this screen.") do
-      puts opt
-      exit
-   end
-   opt.separator ""
+  cmd = File.basename($0)
+  opt.banner = <<~BANNER
+    [Enveomics Collection: #{cmd} v#{$VERSION}]
+    Concatenates several multiple alignments in FastA format into a single
+    multiple alignment.  The IDs of the sequences (or the ID prefixes, if using
+    --ignore-after) must coincide across files.
+    Usage: #{cmd} [options] aln1.fa aln2.fa ... > aln.fa
+  BANNER
+  opt.on(
+    '-c', '--coords FILE',
+    'Output file of coordinates in RAxML-compliant format'
+  ) { |v| o[:coords] = v }
+  opt.on(
+    '-i', '--ignore-after STRING',
+    'Remove everything in the IDs after the specified string'
+  ) { |v| o[:ignoreafter] = v }
+  opt.on(
+    '-I', '--remove-invariable', 'Remove invariable sites',
+    'Note: Invariable sites are defined as columns with only one state and',
+    'undefined characters.  Additional ambiguous characters may exist and',
+    'should be declared using --undefined'
+  ) { |v| o[:removeinvar] = v }
+  opt.on(
+    '-u', '--missing-char CHAR',
+    "Character denoting missing data. By default: '#{o[:missing]}'"
+  ) do |v|
+    if v.length != 1
+      abort "-missing-char can only be denoted by single characters: #{v}"
+    end
+    o[:missing] = v
+  end
+  opt.on(
+    '-m', '--model STRING',
+    'Name of the model to use if --coords is used. See RAxML docs;',
+    'supported values in v8+ include:',
+    '~ For DNA alignments:',
+    '  "DNA[F|X]", or "DNA[F|X]/3" (to estimate rates per codon position,',
+    '  particular notation for this script)',
+    '~ General protein alignments:',
+    '  "AUTO" (default in this script), "DAYHOFF" (1978), "DCMUT" (MBE 2005;',
+    '  22(2):193-199), "JTT" (Nat 1992;358:86-89), "VT" (JCompBiol 2000;',
+    '  7(6):761-776), "BLOSUM62" (PNAS 1992;89:10915), and "LG" (MBE 2008;',
+    '  25(7):1307-1320)',
+    '~ Specialized protein alignments:',
+    '  "MTREV" (mitochondrial, JME 1996;42(4):459-468), "WAG" (globular, MBE',
+    '  2001;18(5):691-699), "RTREV" (retrovirus, JME 2002;55(1):65-73),',
+    '  "CPREV" (chloroplast, JME 2000;50(4):348-358), and "MTMAM" (nuclear',
+    '  mammal proteins, JME 1998;46(4):409-418)'
+  ) { |v| o[:model] = v }
+  opt.on(
+    '--undefined STRING',
+    'All characters to be regarded as "undefined". It should include all',
+    'ambiguous and missing data chars.  Ignored unless --remove-invariable',
+    "By default: '#{o[:undefined]}'"
+  ) { |v| o[:undefined] = v }
+  opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
+  opt.on('-V', '--version', 'Returns version') { puts $VERSION ; exit }
+  opt.on('-h', '--help', 'Display this screen') { puts opt ; exit }
+  opt.separator ''
 end.parse!
-alns = ARGV
-abort "Alignment files are mandatory" if alns.nil? or alns.empty?
+files = ARGV
+abort 'Alignment files are mandatory' if files.nil? || files.empty?
+$QUIET = o[:q]
-##### MAIN:
-begin
-   $stderr.puts "Reading." unless o[:q]
-   a = {}
-   n = alns.size-1
-   lengths = []
-   (0 .. n).each do |i|
-      key = nil
-      File.open(alns[i],"r").each do |ln|
-	 ln.chomp!
-	 if ln =~ /^>(\S+)/
-	    key = $1
-	    key.sub!(/#{o[:ignoreafter]}.*/,"") unless o[:ignoreafter].nil?
-	    a[key] ||= []
-	    a[key][i] = ""
-	 else
-	    abort "#{alns[i]}: Leading line is not a def-line, is this a "+
-	       "valid FastA file?" if key.nil?
-	    ln.gsub!(/\s/,"")
-	    a[key][i] += ln
-	 end
+# Read individual gene alignments and return them as a single hash with genome
+# IDs as keys and arrays of single-line strings as values
+#
+# IDs are trimmed after the first occurrence of +ignoreafter+, if defined
+def read_alignments(files, ignoreafter = nil)
+  aln = {}
+  files.each_with_index do |file, i|
+    key = nil
+    File.open(file, 'r').each do |ln|
+      ln.chomp!
+      if ln =~ /^>(\S+)/
+        key = $1
+        key.sub!(/#{ignoreafter}.*/, '') if ignoreafter
+        aln[key] ||= []
+        aln[key][i] = ''
+      else
+        if key.nil?
+          abort "Invalid FastA file: #{file}: Leading line not a def-line"
+        end
+        ln.gsub!(/\s/, '')
+        aln[key][i] += ln
       end
-      abort "#{alns[i]}: Empty alignment?" if key.nil?
-      lengths[i] = a[key][i].length
-   end
-   if o[:removeinvar]
-      $stderr.puts "Removing invariable sites." unless o[:q]
-      invs = 0
-      (0 .. n).each do |i|
-	 olen = lengths[i]
-	 (0 .. (lengths[i]-1)).each do |pos|
-	    chr = nil
-	    inv = true
-	    a.keys.each do |key|
-	       next if a[key][i].nil?
-	       chr = a[key][i][pos] if
-		  chr.nil? or o[:undefined].chars.include? chr
-	       if chr != a[key][i][pos] and
-		     not o[:undefined].chars.include? a[key][i][pos]
-		  inv = false
-		  break
-	       end
-	    end
-	    if inv
-	       a.keys.each{|key| a[key][i][pos]="!" unless a[key][i].nil?}
-	       lengths[i] -= 1
-	       invs += 1
-	    end
-	 end
-	 a.keys.each{|key| a[key][i].gsub!("!", "") unless a[key][i].nil?}
+    end
+    abort "Empty alignment file: #{file}" if key.nil?
+  end
+  aln
+end
+# Remove invariable sites from the alignment hash +aln+, using +undefined+ as
+# a string including all characters representing undefined positions (e.g., X)
+#
+# Returns number of columns removed
+def remove_invariable(aln, undefined)
+  invs = 0
+  lengths = aln.values.first.map(&:length)
+  undef_chars = undefined.chars
+  lengths.each_with_index do |len, i|
+    (0 .. len - 1).each do |pos|
+      chr = nil
+      inv = true
+      aln.each_key do |key|
+        next if aln[key][i].nil?
+        chr = aln[key][i][pos] if chr.nil? || undefined.chars.include?(chr)
+        if chr != aln[key][i][pos] && !undef_chars.include?(aln[key][i][pos])
+          inv = false
+          break
+        end
       end
-      $stderr.puts "  Removed #{invs} sites." unless o[:q]
-   end
-   $stderr.puts "Concatenating." unless o[:q]
-   a.keys.each do |key|
-      (0 .. n).each do |i|
-	 a[key][i] = (o[:missing] * lengths[i]) if a[key][i].nil?
+      if inv
+        aln.each_key { |key| aln[key][i][pos] = '!' unless aln[key][i].nil? }
+        lengths[i] -= 1
+        invs += 1
       end
-      abort "Inconsistent lengths in '#{key}'
-      exp:#{lengths.join(" ")}
-      obs:#{a[key].map{|i| i.length}.join(" ")}." unless
-	 lengths == a[key].map{|i| i.length}
-      puts ">#{key}", a[key].join("").gsub(/(.{1,60})/, "\\1\n")
-      a.delete(key)
-   end
-   $stderr.puts "  #{lengths.inject(:+)} columns." unless o[:q]
-   unless o[:coords].nil?
-      $stderr.puts "Generating coordinates." unless o[:q]
-      coords = File.open(o[:coords],"w")
-      s = 0
-      names = (alns.map do |a|
-	 File.basename(a).gsub(/\..*/,"").gsub(/[^A-Za-z0-9_]/,"_")
-      end)
-      (0 .. n).each do |i|
-	 l = lengths[i]
-	 next unless l > 0
-	 names[i] += "_#{i}" while names.count(names[i])>1
-	 if o[:model] =~ /(DNA.?)\/3/
-	    coords.puts "#{$1}, #{names[i]}codon1 = #{s+1}-#{s+l}\\3"
-	    coords.puts "#{$1}, #{names[i]}codon2 = #{s+2}-#{s+l}\\3"
-	    coords.puts "#{$1}, #{names[i]}codon3 = #{s+3}-#{s+l}\\3"
-	 else
-	    coords.puts "#{o[:model]}, #{names[i]} = #{s+1}-#{s+l}"
-	 end
-	 s += l
+    end
+    aln.each_key { |key| aln[key][i].gsub!('!', '') unless aln[key][i].nil? }
+  end
+  invs
+end
+# Concatenate the alignments hash +aln+ using the character +missing+ to
+# indicate missing alignments, and send each entry in the concatenated alignment
+# to +blk+ as two variables: key (name) and value (alignment string)
+#
+# Returns an array with the lengths of each individual alignment
+def concatenate(aln, missing, &blk)
+  say 'Concatenating'
+  lengths = aln.values.first.map(&:length)
+  aln.each_key do |key|
+    # Pad missing entries
+    lengths.each_with_index { |len, i| aln[key][i] ||= missing * len }
+    # Check length
+    obs_len = aln[key].map(&:length)
+    unless lengths == obs_len
+      abort "Inconsistent lengths in '#{key}'\nexp: #{lengths}\nobs: #{obs_len}"
+    end
+    # Pass entry to the block and remove from alignment hash
+    blk[key, aln[key].join('')]
+    aln.delete(key)
+  end
+  lengths
+end
+# Save the coordinates in +file+ based on +files+ paths (for the names), and
+# using +lengths+ individual alignment lengths
+#
+# The saved format is RAxML coords, including the +model+ for each alignment
+def save_coords(file, names, lengths, model)
+  File.open(file, 'w') do |fh|
+    s = 0
+    names.each_with_index do |name, i|
+      l = lengths[i]
+      next unless l > 0
+      name += "_#{i}" while names.count(name) > 1
+      if model =~ /(DNA.?)\/3/
+        fh.puts "#{$1}, #{name}codon1 = #{s + 1}-#{s + l}\\3"
+        fh.puts "#{$1}, #{name}codon2 = #{s + 2}-#{s + l}\\3"
+        fh.puts "#{$1}, #{name}codon3 = #{s + 3}-#{s + l}\\3"
+      else
+        fh.puts "#{model}, #{name} = #{s + 1}-#{s + l}"
       end
-      coords.close
-   end
-   # Save the output matrix
-   $stderr.puts "Done.\n" unless o[:q]
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
+      s += l
+    end
+  end
 end
+# ------ MAIN ------
+begin
+  say 'Reading'
+  alignments = read_alignments(files, o[:ignoreafter])
+  if o[:removeinvar]
+    say 'Removing invariable sites'
+    inv = remove_invariable(alignments, o[:undefined])
+    say "  Removed #{inv} sites"
+  end
+  lengths = concatenate(alignments, o[:missing]) do |name, seq|
+    puts ">#{name}", seq.gsub(/(.{1,60})/, "\\1\n")
+  end
+  say "  #{lengths.inject(:+)} columns"
+  unless o[:coords].nil?
+    say 'Generating coordinates'
+    names = files.map do |i|
+      File.basename(i).gsub(/\..*/, '').gsub(/[^A-Za-z0-9_]/, '_')
+    end
+    save_coords(o[:coords], names, lengths, o[:model])
+  end
+  $stderr.puts 'Done' unless o[:q]
+rescue => err
+  $stderr.puts "Exception: #{err}\n\n"
+  err.backtrace.each { |l| $stderr.puts l + "\n" }
+  err
+end