RubyGems - miga-base - Versions diffs - 0.2.0.6 → 0.2.0.7 - Mend

miga-base 0.2.0.6 → 0.2.0.7

Files changed (60) hide show

checksums.yaml +4 -4
data/Gemfile +3 -0
data/LICENSE +201 -0
data/README.md +17 -335
data/Rakefile +31 -0
data/actions/add_result +2 -5
data/actions/add_taxonomy +4 -7
data/actions/create_dataset +5 -6
data/actions/create_project +2 -5
data/actions/daemon +2 -5
data/actions/download_dataset +88 -58
data/actions/find_datasets +36 -38
data/actions/import_datasets +2 -5
data/actions/index_taxonomy +2 -5
data/actions/list_datasets +47 -49
data/actions/list_files +7 -11
data/actions/unlink_dataset +2 -5
data/bin/miga +1 -1
data/lib/miga/common.rb +132 -0
data/lib/miga/daemon.rb +229 -168
data/lib/miga/dataset.rb +354 -277
data/lib/miga/gui.rb +346 -269
data/lib/miga/metadata.rb +115 -71
data/lib/miga/project.rb +361 -259
data/lib/miga/remote_dataset.rb +200 -148
data/lib/miga/result.rb +150 -99
data/lib/miga/tax_index.rb +124 -67
data/lib/miga/taxonomy.rb +129 -100
data/lib/miga/version.rb +57 -0
data/lib/miga.rb +2 -77
data/scripts/_distances_noref_nomulti.bash +2 -0
data/scripts/_distances_ref_nomulti.bash +2 -0
data/scripts/aai_distances.bash +1 -0
data/scripts/ani_distances.bash +1 -0
data/scripts/assembly.bash +1 -0
data/scripts/cds.bash +1 -0
data/scripts/clade_finding.bash +17 -1
data/scripts/distances.bash +1 -0
data/scripts/essential_genes.bash +1 -0
data/scripts/haai_distances.bash +1 -0
data/scripts/init.bash +2 -0
data/scripts/mytaxa.bash +1 -0
data/scripts/mytaxa_scan.bash +1 -0
data/scripts/ogs.bash +1 -0
data/scripts/read_quality.bash +1 -0
data/scripts/ssu.bash +1 -0
data/scripts/subclades.bash +1 -0
data/scripts/trimmed_fasta.bash +1 -0
data/scripts/trimmed_reads.bash +1 -0
data/test/common_test.rb +82 -0
data/test/daemon_test.rb +53 -0
data/test/dataset_test.rb +156 -0
data/test/jruby_gui_test.rb +20 -0
data/test/metadata_test.rb +48 -0
data/test/project_test.rb +54 -0
data/test/remote_dataset_test.rb +41 -0
data/test/tax_index_test.rb +44 -0
data/test/taxonomy_test.rb +36 -0
data/test/test_helper.rb +32 -0
metadata +53 -38

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f80152072105bd365145133c00ddfcd432a008c0
-  data.tar.gz: 7444a990c359e6c9f2a6a595e688e6319df50ebb
+  metadata.gz: 01b3728971a5d407f85578447d4a66dc4c8ab8a8
+  data.tar.gz: 656535155e0316681f2d7aa9bcc8b501caed9d96
 SHA512:
-  metadata.gz: e4bb05e73def629ea39d72fac9d6e702b247051fb3b21b8db84195127e6d135d9b94bbf5037cde9c8f21e611cf580d53078c215d9c9187bdf861908ad42efe0c
-  data.tar.gz: ee27ea7cf9b98a3de760e18249e89f6410181d963017e86f5878710bf80a6d3ed5c1715d42ff7b394371add8709816d7c54e9fda5556ae6e4e96a3c4b384ca82
+  metadata.gz: 5daf2a27f6a6119e18e5eda94dbda72be91ad7b46f4f8bea401f111ce95d907c88930b08d85543c1694a82aeac54c42dff5d0c3586c0846123d1e1881ad23885
+  data.tar.gz: 0a75b0152ad7374729c1cb09b02b539b70429902850c81cdb086f64e9b616cc66f5ba032469be85505c103dc05d15b9469b2c26689b8b4dee2b0c3ef6636ad61

data/Gemfile ADDED Viewed

@@ -0,0 +1,3 @@
+source "https://rubygems.org"
+gemspec name: "miga-base"
+gem "codeclimate-test-reporter", group: :test, require: nil

data/LICENSE ADDED Viewed

@@ -0,0 +1,201 @@
+               The Artistic License 2.0
+           Copyright (c) 2016 Luis M Rodriguez-R
+     Everyone is permitted to copy and distribute verbatim copies
+      of this license document, but changing it is not allowed.
+Preamble
+This license establishes the terms under which a given free software
+Package may be copied, modified, distributed, and/or redistributed.
+The intent is that the Copyright Holder maintains some artistic
+control over the development of that Package while still keeping the
+Package available as open source and free software.
+You are always permitted to make arrangements wholly outside of this
+license directly with the Copyright Holder of a given Package.  If the
+terms of this license do not permit the full use that you propose to
+make of the Package, you should contact the Copyright Holder and seek
+a different licensing arrangement.
+Definitions
+    "Copyright Holder" means the individual(s) or organization(s)
+    named in the copyright notice for the entire Package.
+    "Contributor" means any party that has contributed code or other
+    material to the Package, in accordance with the Copyright Holder's
+    procedures.
+    "You" and "your" means any person who would like to copy,
+    distribute, or modify the Package.
+    "Package" means the collection of files distributed by the
+    Copyright Holder, and derivatives of that collection and/or of
+    those files. A given Package may consist of either the Standard
+    Version, or a Modified Version.
+    "Distribute" means providing a copy of the Package or making it
+    accessible to anyone else, or in the case of a company or
+    organization, to others outside of your company or organization.
+    "Distributor Fee" means any fee that you charge for Distributing
+    this Package or providing support for this Package to another
+    party.  It does not mean licensing fees.
+    "Standard Version" refers to the Package if it has not been
+    modified, or has been modified only in ways explicitly requested
+    by the Copyright Holder.
+    "Modified Version" means the Package, if it has been changed, and
+    such changes were not explicitly requested by the Copyright
+    Holder.
+    "Original License" means this Artistic License as Distributed with
+    the Standard Version of the Package, in its current version or as
+    it may be modified by The Perl Foundation in the future.
+    "Source" form means the source code, documentation source, and
+    configuration files for the Package.
+    "Compiled" form means the compiled bytecode, object code, binary,
+    or any other form resulting from mechanical transformation or
+    translation of the Source form.
+Permission for Use and Modification Without Distribution
+(1)  You are permitted to use the Standard Version and create and use
+Modified Versions for any purpose without restriction, provided that
+you do not Distribute the Modified Version.
+Permissions for Redistribution of the Standard Version
+(2)  You may Distribute verbatim copies of the Source form of the
+Standard Version of this Package in any medium without restriction,
+either gratis or for a Distributor Fee, provided that you duplicate
+all of the original copyright notices and associated disclaimers.  At
+your discretion, such verbatim copies may or may not include a
+Compiled form of the Package.
+(3)  You may apply any bug fixes, portability changes, and other
+modifications made available from the Copyright Holder.  The resulting
+Package will still be considered the Standard Version, and as such
+will be subject to the Original License.
+Distribution of Modified Versions of the Package as Source
+(4)  You may Distribute your Modified Version as Source (either gratis
+or for a Distributor Fee, and with or without a Compiled form of the
+Modified Version) provided that you clearly document how it differs
+from the Standard Version, including, but not limited to, documenting
+any non-standard features, executables, or modules, and provided that
+you do at least ONE of the following:
+    (a)  make the Modified Version available to the Copyright Holder
+    of the Standard Version, under the Original License, so that the
+    Copyright Holder may include your modifications in the Standard
+    Version.
+    (b)  ensure that installation of your Modified Version does not
+    prevent the user installing or running the Standard Version. In
+    addition, the Modified Version must bear a name that is different
+    from the name of the Standard Version.
+    (c)  allow anyone who receives a copy of the Modified Version to
+    make the Source form of the Modified Version available to others
+    under
+    (i)  the Original License or
+    (ii)  a license that permits the licensee to freely copy,
+    modify and redistribute the Modified Version using the same
+    licensing terms that apply to the copy that the licensee
+    received, and requires that the Source form of the Modified
+    Version, and of any works derived from it, be made freely
+    available in that license fees are prohibited but Distributor
+    Fees are allowed.
+Distribution of Compiled Forms of the Standard Version
+or Modified Versions without the Source
+(5)  You may Distribute Compiled forms of the Standard Version without
+the Source, provided that you include complete instructions on how to
+get the Source of the Standard Version.  Such instructions must be
+valid at the time of your distribution.  If these instructions, at any
+time while you are carrying out such distribution, become invalid, you
+must provide new instructions on demand or cease further distribution.
+If you provide valid instructions or cease distribution within thirty
+days after you become aware that the instructions are invalid, then
+you do not forfeit any of your rights under this license.
+(6)  You may Distribute a Modified Version in Compiled form without
+the Source, provided that you comply with Section 4 with respect to
+the Source of the Modified Version.
+Aggregating or Linking the Package
+(7)  You may aggregate the Package (either the Standard Version or
+Modified Version) with other packages and Distribute the resulting
+aggregation provided that you do not charge a licensing fee for the
+Package.  Distributor Fees are permitted, and licensing fees for other
+components in the aggregation are permitted. The terms of this license
+apply to the use and Distribution of the Standard or Modified Versions
+as included in the aggregation.
+(8) You are permitted to link Modified and Standard Versions with
+other works, to embed the Package in a larger work of your own, or to
+build stand-alone binary or bytecode versions of applications that
+include the Package, and Distribute the result without restriction,
+provided the result does not expose a direct interface to the Package.
+Items That are Not Considered Part of a Modified Version
+(9) Works (including, but not limited to, modules and scripts) that
+merely extend or make use of the Package, do not, by themselves, cause
+the Package to be a Modified Version.  In addition, such works are not
+considered parts of the Package itself, and are not subject to the
+terms of this license.
+General Provisions
+(10)  Any use, modification, and distribution of the Standard or
+Modified Versions is governed by this Artistic License. By using,
+modifying or distributing the Package, you accept this license. Do not
+use, modify, or distribute the Package, if you do not accept this
+license.
+(11)  If your Modified Version has been derived from a Modified
+Version made by someone other than you, you are nevertheless required
+to ensure that your Modified Version complies with the requirements of
+this license.
+(12)  This license does not grant you the right to use any trademark,
+service mark, tradename, or logo of the Copyright Holder.
+(13)  This license includes the non-exclusive, worldwide,
+free-of-charge patent license to make, have made, use, offer to sell,
+sell, import and otherwise transfer the Package with respect to any
+patent claims licensable by the Copyright Holder that are necessarily
+infringed by the Package. If you institute patent litigation
+(including a cross-claim or counterclaim) against any party alleging
+that the Package constitutes direct or contributory patent
+infringement, then this Artistic License to you shall terminate on the
+date that such litigation is filed.
+(14)  Disclaimer of Warranty:
+THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS
+IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL
+LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

data/README.md CHANGED Viewed

@@ -1,351 +1,33 @@
 [![Code Climate](https://codeclimate.com/github/bio-miga/miga/badges/gpa.svg)](https://codeclimate.com/github/bio-miga/miga)
 [![Test Coverage](https://codeclimate.com/github/bio-miga/miga/badges/coverage.svg)](https://codeclimate.com/github/bio-miga/miga/coverage)
-[![Build Status](https://travis-ci.org/lmrodriguezr/gfa.svg?branch=master)](https://travis-ci.org/lmrodriguezr/gfa)
+[![Build Status](https://travis-ci.org/bio-miga/miga.svg?branch=master)](https://travis-ci.org/bio-miga/miga)
+[![Gem Version](https://badge.fury.io/rb/miga-base.svg)](https://badge.fury.io/rb/miga-base)
+[![Inch docs](http://inch-ci.org/github/bio-miga/miga.svg)](http://inch-ci.org/github/bio-miga/miga)
+[![Yard docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://www.rubydoc.info/github/bio-miga/miga)
-MiGA: Microbial Genomes Atlas
-=============================
+# MiGA: Microbial Genomes Atlas
-Installation
-------------
+**Important**: The MiGA code is under active development, and we currently
+cannot ensure any stability on the different interfaces. We'll be launching a
+Beta Testing program soon, with dedicated support for a small number of
+laboratories. If you're interested, please [contact us][contact].
-Please see [INSTALLATION.md](./INSTALLATION.md) for instructions.
+For additional information on the MiGA system, please refer to the
+[MiGA manual][gitbook]. For additional information on the MiGA API
+(and Ruby gem), please refer to the [miga docs][rubydoc].
-Getting started with MiGA
--------------------------
-### MiGA Interfaces
-You caninteract with MiGA through different interfaces. These interfaces have
-different purposes, but they also have some degree of overlap, because different
-users with different aims sometimes want to do the same thing. Throughout this
-manual I'll be telling you how to do things using mostly the CLI, but I'll also
-try to mention the GUI and the Web Interface. The CLI is the most comprehensive
-and flexible interface, but the other two are friendlier to humans. There is a
-fourth interface that I won't be mentioning at all, but I'll try to document:
-the Ruby API. MiGA is mostly written in Ruby, with an object-oriented approach,
-and all the interfaces are just thin layers atop the Ruby core. That means that
-you can write your own interfaces (or pieces) if you know how to talk to these
-Ruby objects. Sometimes I even use `irb`, which is an interactive shell for
-Ruby, but that's mostly for debugging.
-#### MiGA CLI
-CLI stands for Command Line Interface. This is a set of little scripts that let
-you talk with MiGA through the terminal shell. If MiGA is in your PATH (see
-[installation details](./INSTALLATION.md#miga-in-your-path)), you can simply run
-`miga` in your terminal, and the help messages will take it from there. All the
-MiGA CLI calls look like:
-```bash
-miga task [options]
-```
-Where `task` is one of the supported tasks and `[options]` is a set of dash-flag
-options supported by each task. `-h` is always there to provide help. If you're
-a MiGA administrator, this is probably the most convenient option for you (but
-hey, give the GUI a chance).
-#### MiGA GUI
-The Graphical User Interface is the friendlier option for setting up a MiGA
-project. It doesn't have as many options as the CLI, but it's pretty easy to
-use, so it's a good option if you have a typical project in your hands.
-#### MiGA Web
-The Web interface for MiGA is the way MiGA reports results from a project. It's
-not designed to set up new projects, but to explore existing ones, and to submit
-non-reference datasets for analyses.
-### Creating your first project
-You can do this in the GUI, but I like the CLI better, so I'll be telling you
-how to tell MiGA what to do from the CLI. First, think where you'll place your
-project. Normally this means a location...
-1. ... with enough space. This is, plan for at least 4 or 5 times the size of
-the input files.
-2. ... accessible by worker nodes. If you're using a single server, this is not
-really an issue. However, if you plan on deploying MiGA in a cluster
-infrastructure, make sure your project is reachable by worker nodes.
-3. ... with fast access. It's not a great idea to set up projects in remote
-drives with large latency. In some cases there no way around this, for example
-when that's the only available option in your cluster infrastructure, but try
-to avoid this as much as possible.
-Now that you know where to create your project, go ahead and run:
-```bash
-miga create_project -P /path/to/project1 -t type-of-project
-```
-Where `/path/to/project1` is the path to where the project should be created.
-You don't need to create the folder in advance, MiGA will take care. See the
-next section to help you decide what `type-of-project` to use. There are some
-other options that are not mandatory, but will make your project richer. Take a
-look at `miga create_project -h`.
-#### Project types
-Projects can be set for different purposes, so we've divided them into "types".
-There are four of them, depending on the types of datasets to be processed (see
-[Dataset types](#dataset-types)):
-1. **mixed**: A generic project with any supported type of datasets.
-2. **metagenomes**: A project containing only metagenomic datasets. This
-includes either (or both) metagenomes and viromes.
-3. **genomes**: A project containing only single-organism datasets. This
-includes any of the single-organism types: genome, scgenome, and/or popgenome.
-4. **clade**: Same as "genomes", but all the datasets are expected to be from
-the same species. This type of project performs additional analyses that expect
-a very dense ANI matrix, so all genomes in it are expected to have AAI > 90%.
-### Creating datasets
-Once your project is ready, you can start populating it with datasets and data.
-While it's possible to create empty datasets using `miga create_dataset`, the
-preferred method is to first add data and then use the data to create the
-datasets in batch. For example, lets assume you have a collection of paired-end
-raw reads from several datasets. The first step is to format the filenames
-properly. For each one of your datasets, pick a name that conforms the
-[MiGA names](#miga-names) restrictions (we'll call it "ds1") and rename your
-reads to `/path/to/project1/data/01.raw_reads/ds1.1.fastq` for the first
-sister and `/path/to/project1/data/01.raw_reads/ds1.2.fastq` for the second
-sister. Also, add the date into `/path/to/project1/data/01.raw_reads/ds1.done`.
-Check what are the [expected result files](#expected-result-files) below if you
-want to start at any other point in the pipeline. Once you have renamed (or
-copied) the files inside the project folder, run:
-```bash
-miga find_datasets -P /path/to/project1 -a -r -t type-of-dataset
-```
-The `-a` flag tells MiGA that you want to add the datasets (not just find them);
-the `-r` flag tells MiGA that your datasets are to be treated as "reference"
-datasets (see [Non-reference datasets](#non-reference-datasets) below); and the
-`-t` option tells MiGA what type of datasets you're adding (see
-[Dataset types](#dataset-types) below). If you have a mixture of dataset types,
-process one at a time. This is, perform this step for each dataset type. Don't
-worry about the datasets that are already registered, those will be ignored by
-the `find_datasets` task and will remain unchanged.
-#### Expected result files
-For brevity, we'll assume that you're inside `/path/to/project1/data`; *i.e.*,
-in the `data` directory of your project. We'll also assume that you're naming
-your dataset **ds1**, but you can change this by anything following the
-[MiGA names](#miga-names) restrictions. Now, these are the "input" points that
-you can use in MiGA:
-1. **Paired-end raw reads**: The expected files are `01.raw_reads/ds1.1.fastq`
-and `01.raw_reads/ds1.2.fastq`, each including a sister end. The reads must be
-in the same order in both files (MiGA won't check). You can also use gzipped
-files instead.
-2. **Single-end raw reads**: The expected file is `01.raw_reads/ds1.1.fastq`.
-You can also use a gzipped file instead.
-3. **Paired-end trimmed reads**: These are assumed to be quality-controlled
-reads in FastA format, with both ends passing the quality filters. The minimum
-expected file is `04.trimmed_fasta/ds1.CoupledReads.fa`, which contains the
-reads interposed. You can also pass (in addition) the reads that past the
-quality check without the sister as a gzipped FastA at
-`04.trimmed_fasta/ds1.SingleReads.fa.gz`.
-4. **Single-end trimmed reads**: Similar to the option above, only
-quality-checked reads are expected here. The expected file is
-`04.trimmed_fasta/ds1.SingleReads.fa`.
-5. **Assembled fragments**: This can be any assembly result, including complete
-genomes. The expected file is `05.assembly/ds1.LargeContigs.fna`, containing
-only contigs longer than 500bp. You can also provide the complete assembly
-(without length-filtering) at `05.assembly/ds1.AllContigs.fna`.
-6. **Predicted genes/proteins**: This is the total collection of predicted genes
-and proteins. The expected files are `06.cds/ds1.fna`, containing genes, and
-`06.cds/ds1.faa`, containing proteins. You can also provide the locations of
-said genes in the genome in gzipped GFF v2 (`06.cds/ds1.gff2.gz`), gzipped
-GFF v3 (`06.cds/ds1.gff3.gz`), or gzipped tabular (`06.cds/ds1.tab.gz`).
-**IMPORTANT**: In all cases, an additional `ds1.done` file MUST be created in
-the same folder. This is meant to prevent MiGA from mistakenly adding files as
-results before they're done being processed or transferred. This file must
-contain the current [date in MiGA format](#date-in-miga-format). Here's a quick
-code snippet to add the `.done` file for all the input files in `01.raw_reads`
-(you can adapt this accordingly to any of the other options):
-```bash
-cd /path/to/project1/data/01.raw_reads
-for i in *.1.fastq ; do
-   date "+%Y-%m-%d %H:%M:%S %z" > $(basename $i .1.fastq).done
-done
-```
-#### Dataset types
-This is how you tell MiGA what kind of data you have in your datasets. Lets see
-the definitions:
-1. **genome**: The genome from an isolate.
-2. **metagenome**: A metagenome (excluding viromes).
-3. **virome**: A viral metagenome.
-4. **scgenome**: A genome from a single cell.
-5. **popgenome**: The genome of a population (including microdiversity).
-#### Non-reference datasets
-#### Creating a RefSeq project
-If you've reached this point, you are now ready to create a large functional
-project. If you want to continue using this documentation on real data but
-don't have any of your own handy (or if you want to use RefSeq data), this
-is a quick tutoral on how to create a functional MiGA project using ALL of
-NCBI's Prokaryotic RefSeq data.
-**Step 1: Create the project**. That's simple, just `cd` to the directory you
-want to use, and execute `miga create_project -P MiGA_RefSeq -t genomes`.
-**Step 2: Download the data**. Just `cd MiGA_RefSeq`, and execute this code:
-```bash
-wget -O reference_genomes.txt 'http://www.ncbi.nlm.nih.gov/genomes/Genome2BE/genome2srv.cgi?action=refgenomes&amp;download=on&amp;type=reference'
-grep -v '^#' reference_genomes.txt \
-   | awk -F'\t' '{gsub(/[^A-Za-z0-9]/,"_",$3)} {print "miga download_dataset -P . -D "$3" -I "$4" -U ncbi --db nuccore -t genome -v # "$3""}' \
-   | while read ln ; do
-      sp=$(echo $ln | perl -pe 's/.*# //')
-      if [[ ! -n $(miga list_datasets -P . -D $sp) ]] ; then
-	 echo $ln
-	 $ln
-      fi
-   done
-```
-And that's it. The first line will download the most current list of genomes
-included in NCBI's Prokaryotic RefSeq, and the rest will repeatedly execute the
-`download_dataset` task, that automatically fetches the data (even the genome's
-taxonomy!). Note that the code above checks first if a dataset already exists,
-so if you want to update an existing MiGA_RefSeq project, simply repeat step 2
-and only missing genomes will be fetched.
-Note that running time for the above code may vary depending on the network and
-the size of RefSeq, but I was able to create a complete project with 122 genomes
-in under 10 minutes.
-**Alternative step 2: downloading all representatives**. If you want a larger
-and more comprehensive collection, and not just the reference genomes, you can
-download all of the representative genomes in the prokaryotic RefSeq with this
-alternative code:
-```bash
-wget -O representative_genomes.txt 'http://www.ncbi.nlm.nih.gov/genomes/Genome2BE/genome2srv.cgi?action=refgenomes&amp;download=on'
-grep -v '^#' representative_genomes.txt \
-   | awk -F'\t' '{gsub(/[^A-Za-z0-9]/,"_",$3)} $4{print "miga download_dataset -P . -D "$3" -I "$4" -U ncbi --db nuccore -t genome -v # "$3""}' \
-   | while read ln ; do
-      sp=$(echo $ln | perl -pe 's/.*# //')
-      if [[ ! -n $(miga list_datasets -P . -D $sp) ]] ; then
-	 echo $ln
-	 $ln
-      fi
-   done
-```
-This is a much larger set (1,246), hence it'll take much more time. I finished
-downloading the whole thing in about one and a half hours.
-Launching daemons
------------------
-### Configuring daemons
-### Understating the MiGA configuration file
-### Arbitrary configuration scripts
-### Fixing system calls with aliases
-In some cases, we might not have the same executable names as MiGA expects, or
-we might have broken modules in our cluster that can be easily fixed with an
-`alias`. In these cases, you can use
-[arbitrary configuration scripts](#arbitrary-configuration-scripts) to generate
-one or more `alias`. Importantly, MiGA daemons work with non-interactive shells,
-which means you likely need to explicitly allow for alias extensions, for
-example:
-```bash
-# Allow alias expansions in non-interactive shells
-shopt -s expand_aliases
-# Call FastQC with the environmental Perl,
-# not the built-in /usr/bin/perl:
-alias fastqc="perl $(which fastqc)"
-# Use the standard name for RAxML (pthreads)
-# instead of the one my sys-admin decided to use:
-alias raxmlHPC-PTHREADS=RAxML_pthreads
-```
-The examples above illustrate how to use `alias` to fix broken packages or to
-make Software with non-standard names reachable.
-**Known caveats to this solution:** This solution CANNOT BE USED in the few
-cases in which a whole package is expected based on a single executable. For
-example, adding the enveomics scripts to your `PATH` is far easier than creating
-an `alias` for each script. Also, MiGA expects to find the model, the activation
-key, and the scripts of MetaGeneMark in the same folder of the `gmhmmp` binary,
-so setting an`alias` may prevent MiGA from finding these ancillary files.
-Cluster infrastructure
-----------------------
-### Loading optional modules
-See also [Fixing system calls with aliases](#fixing-system-calls-with-aliases).
-Miscellaneous
--------------
-These below are reference snippets that for which I couldn't find a more
-suitable home, but are important documentation.
-### MiGA Names
-MiGA names are non-empty strings composed exclusively of alphanumerics and
-underscores. All the dataset names in MiGA must conform this restriction, but
-not all the projects do. Other objects must conform the MiGA name restrictions,
-such as taxonomic entries.
-### Date in MiGA format
-The official format in which MiGA represents date/times is the default of Ruby's
-`Time.now.to_s`. In the *nix `date` utility this corresponds to the format:
-`+%Y-%m-%d %H:%M:%S %z`.
-Authors
--------
+# Authors
 Developed and maintained by [Luis M. Rodriguez-R][lrr].
-License
--------
+# License
 See [LICENSE](LICENSE).
 [lrr]: http://lmrodriguezr.github.io/
+[gitbook]: https://miga.gitbooks.io/miga/content/
+[rubydoc]: http://www.rubydoc.info/github/bio-miga/miga
+[contact]: http://enve-omics.gatech.edu/node/7

data/Rakefile ADDED Viewed

@@ -0,0 +1,31 @@
+require "rake/testtask"
+SOURCES = FileList["lib/**/*.rb"]
+desc "Default Task"
+task :default => "test:base"
+desc "Base Tests"
+Rake::TestTask.new("test:base") do |t|
+  t.libs << "test"
+  t.pattern = "test/[^j]*_test.rb"
+  t.verbose = true
+end
+desc "GUI Tests"
+Rake::TestTask.new("test:gui") do |t|
+  ENV["GUI_TESTS"] = "true"
+  t.libs << "test"
+  t.libs << "test"
+  t.pattern = "test/j*_test.rb"
+  t.verbose = true
+end
+desc "All the tests"
+Rake::TestTask.new("test:all") do |t|
+  ENV["GUI_TESTS"] = "true"
+  t.libs << "test"
+  t.libs << "test"
+  t.pattern = "test/*_test.rb"
+  t.verbose = true
+end

data/actions/add_result CHANGED Viewed

@@ -1,10 +1,7 @@
 #!/usr/bin/env ruby
-#
 # @package MiGA
-# @author  Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license artistic license 2.0
-# @update  Oct-01-2015
-#
+# @license Artistic-2.0
 o = {q:true}
 opts = OptionParser.new do |opt|

data/actions/add_taxonomy CHANGED Viewed

@@ -1,10 +1,7 @@
 #!/usr/bin/env ruby
-#
 # @package MiGA
-# @author  Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license artistic license 2.0
-# @update  Oct-01-2015
-#
+# @license Artistic-2.0
 o = {q:true}
 OptionParser.new do |opt|
@@ -57,9 +54,9 @@ if not o[:taxfile].nil?
    $stderr.puts "Reading tax-file and registering taxonomy." unless o[:q]
    tfh = File.open(o[:taxfile], "r")
    header = nil
-   while ln = tfh.gets
+   tfh.each_line do |ln|
       next if ln =~ /^\s*?$/
-      r = ln.chomp.split /\t/, -1
+      r = ln.chomp.split(/\t/, -1)
       dn = r.shift
       if header.nil?
 	 header = r

data/actions/create_dataset CHANGED Viewed

@@ -1,10 +1,7 @@
 #!/usr/bin/env ruby
-#
 # @package MiGA
-# @author  Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license artistic license 2.0
-# @update  Nov-29-2015
-#
+# @license Artistic-2.0
 o = {q:true, ref:true}
 OptionParser.new do |opt|
@@ -55,8 +52,10 @@ raise "Impossible to load project: #{o[:project]}" if p.nil?
 $stderr.puts "Creating dataset." unless o[:q]
 md = {}
 [:type, :description, :user, :comments].each{ |k| md[k]=o[k] unless o[k].nil? }
-d = MiGA::Dataset.new(p, o[:dataset], o[:ref], md)
+MiGA::Dataset.new(p, o[:dataset], o[:ref], md)
 p.add_dataset(o[:dataset])
+res = d.first_preprocessing
+put "- #{res}" unless o[:q]
 $stderr.puts "Done." unless o[:q]