RubyGems - BioDSL - Versions diffs - 1.0.1 → 1.0.2 - Mend

BioDSL 1.0.1 → 1.0.2

Files changed (186) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/BioDSL.gemspec +1 -1
data/Gemfile +6 -0
data/README.md +289 -155
data/Rakefile +18 -16
data/lib/BioDSL.rb +1 -1
data/lib/BioDSL/cary.rb +78 -53
data/lib/BioDSL/command.rb +2 -2
data/lib/BioDSL/commands.rb +1 -1
data/lib/BioDSL/commands/add_key.rb +1 -1
data/lib/BioDSL/commands/align_seq_mothur.rb +4 -4
data/lib/BioDSL/commands/analyze_residue_distribution.rb +5 -5
data/lib/BioDSL/commands/assemble_pairs.rb +13 -13
data/lib/BioDSL/commands/assemble_seq_idba.rb +7 -9
data/lib/BioDSL/commands/assemble_seq_ray.rb +13 -13
data/lib/BioDSL/commands/assemble_seq_spades.rb +4 -4
data/lib/BioDSL/commands/classify_seq.rb +8 -8
data/lib/BioDSL/commands/classify_seq_mothur.rb +5 -5
data/lib/BioDSL/commands/clip_primer.rb +7 -7
data/lib/BioDSL/commands/cluster_otus.rb +5 -5
data/lib/BioDSL/commands/collapse_otus.rb +2 -2
data/lib/BioDSL/commands/collect_otus.rb +2 -2
data/lib/BioDSL/commands/complement_seq.rb +4 -4
data/lib/BioDSL/commands/count.rb +1 -1
data/lib/BioDSL/commands/count_values.rb +2 -2
data/lib/BioDSL/commands/degap_seq.rb +6 -7
data/lib/BioDSL/commands/dereplicate_seq.rb +1 -1
data/lib/BioDSL/commands/dump.rb +2 -2
data/lib/BioDSL/commands/filter_rrna.rb +4 -4
data/lib/BioDSL/commands/genecall.rb +7 -7
data/lib/BioDSL/commands/grab.rb +1 -1
data/lib/BioDSL/commands/index_taxonomy.rb +3 -3
data/lib/BioDSL/commands/mask_seq.rb +4 -4
data/lib/BioDSL/commands/mean_scores.rb +2 -2
data/lib/BioDSL/commands/merge_pair_seq.rb +3 -3
data/lib/BioDSL/commands/merge_table.rb +1 -1
data/lib/BioDSL/commands/merge_values.rb +1 -1
data/lib/BioDSL/commands/plot_heatmap.rb +4 -5
data/lib/BioDSL/commands/plot_histogram.rb +4 -4
data/lib/BioDSL/commands/plot_matches.rb +5 -5
data/lib/BioDSL/commands/plot_residue_distribution.rb +6 -6
data/lib/BioDSL/commands/plot_scores.rb +7 -7
data/lib/BioDSL/commands/random.rb +1 -1
data/lib/BioDSL/commands/read_fasta.rb +9 -9
data/lib/BioDSL/commands/read_fastq.rb +16 -16
data/lib/BioDSL/commands/read_table.rb +2 -3
data/lib/BioDSL/commands/reverse_seq.rb +4 -4
data/lib/BioDSL/commands/slice_align.rb +4 -4
data/lib/BioDSL/commands/slice_seq.rb +3 -3
data/lib/BioDSL/commands/sort.rb +1 -1
data/lib/BioDSL/commands/split_pair_seq.rb +6 -7
data/lib/BioDSL/commands/split_values.rb +2 -2
data/lib/BioDSL/commands/trim_primer.rb +13 -8
data/lib/BioDSL/commands/trim_seq.rb +5 -5
data/lib/BioDSL/commands/uchime_ref.rb +6 -6
data/lib/BioDSL/commands/uclust.rb +5 -5
data/lib/BioDSL/commands/unique_values.rb +1 -1
data/lib/BioDSL/commands/usearch_global.rb +2 -2
data/lib/BioDSL/commands/usearch_local.rb +2 -2
data/lib/BioDSL/commands/write_fasta.rb +7 -9
data/lib/BioDSL/commands/write_fastq.rb +4 -4
data/lib/BioDSL/commands/write_table.rb +3 -3
data/lib/BioDSL/commands/write_tree.rb +2 -3
data/lib/BioDSL/config.rb +2 -2
data/lib/BioDSL/csv.rb +8 -10
data/lib/BioDSL/debug.rb +1 -1
data/lib/BioDSL/fasta.rb +54 -40
data/lib/BioDSL/fastq.rb +35 -32
data/lib/BioDSL/filesys.rb +56 -47
data/lib/BioDSL/fork.rb +1 -1
data/lib/BioDSL/hamming.rb +1 -1
data/lib/BioDSL/helpers.rb +1 -1
data/lib/BioDSL/helpers/aux_helper.rb +1 -1
data/lib/BioDSL/helpers/email_helper.rb +1 -1
data/lib/BioDSL/helpers/history_helper.rb +1 -1
data/lib/BioDSL/helpers/log_helper.rb +1 -1
data/lib/BioDSL/helpers/options_helper.rb +1 -1
data/lib/BioDSL/helpers/status_helper.rb +1 -1
data/lib/BioDSL/html_report.rb +1 -1
data/lib/BioDSL/math.rb +1 -1
data/lib/BioDSL/mummer.rb +1 -1
data/lib/BioDSL/pipeline.rb +1 -1
data/lib/BioDSL/seq.rb +240 -231
data/lib/BioDSL/seq/ambiguity.rb +1 -1
data/lib/BioDSL/seq/assemble.rb +1 -1
data/lib/BioDSL/seq/backtrack.rb +93 -76
data/lib/BioDSL/seq/digest.rb +1 -1
data/lib/BioDSL/seq/dynamic.rb +43 -55
data/lib/BioDSL/seq/homopolymer.rb +34 -36
data/lib/BioDSL/seq/kmer.rb +67 -50
data/lib/BioDSL/seq/levenshtein.rb +35 -40
data/lib/BioDSL/seq/translate.rb +64 -55
data/lib/BioDSL/seq/trim.rb +60 -50
data/lib/BioDSL/serializer.rb +1 -1
data/lib/BioDSL/stream.rb +1 -1
data/lib/BioDSL/taxonomy.rb +1 -1
data/lib/BioDSL/test.rb +1 -1
data/lib/BioDSL/tmp_dir.rb +1 -1
data/lib/BioDSL/usearch.rb +1 -1
data/lib/BioDSL/verbose.rb +1 -1
data/lib/BioDSL/version.rb +2 -2
data/test/BioDSL/commands/test_add_key.rb +1 -1
data/test/BioDSL/commands/test_align_seq_mothur.rb +1 -1
data/test/BioDSL/commands/test_analyze_residue_distribution.rb +1 -1
data/test/BioDSL/commands/test_assemble_pairs.rb +1 -1
data/test/BioDSL/commands/test_assemble_seq_idba.rb +1 -1
data/test/BioDSL/commands/test_assemble_seq_ray.rb +1 -1
data/test/BioDSL/commands/test_assemble_seq_spades.rb +1 -1
data/test/BioDSL/commands/test_classify_seq.rb +1 -1
data/test/BioDSL/commands/test_classify_seq_mothur.rb +1 -1
data/test/BioDSL/commands/test_clip_primer.rb +1 -1
data/test/BioDSL/commands/test_cluster_otus.rb +1 -1
data/test/BioDSL/commands/test_collapse_otus.rb +1 -1
data/test/BioDSL/commands/test_collect_otus.rb +1 -1
data/test/BioDSL/commands/test_complement_seq.rb +1 -1
data/test/BioDSL/commands/test_count.rb +1 -1
data/test/BioDSL/commands/test_count_values.rb +1 -1
data/test/BioDSL/commands/test_degap_seq.rb +1 -1
data/test/BioDSL/commands/test_dereplicate_seq.rb +1 -1
data/test/BioDSL/commands/test_dump.rb +1 -1
data/test/BioDSL/commands/test_filter_rrna.rb +1 -1
data/test/BioDSL/commands/test_genecall.rb +1 -1
data/test/BioDSL/commands/test_grab.rb +1 -1
data/test/BioDSL/commands/test_index_taxonomy.rb +1 -1
data/test/BioDSL/commands/test_mask_seq.rb +1 -1
data/test/BioDSL/commands/test_mean_scores.rb +1 -1
data/test/BioDSL/commands/test_merge_pair_seq.rb +1 -1
data/test/BioDSL/commands/test_merge_table.rb +1 -1
data/test/BioDSL/commands/test_merge_values.rb +1 -1
data/test/BioDSL/commands/test_plot_heatmap.rb +1 -1
data/test/BioDSL/commands/test_plot_histogram.rb +1 -1
data/test/BioDSL/commands/test_plot_matches.rb +1 -1
data/test/BioDSL/commands/test_plot_residue_distribution.rb +1 -1
data/test/BioDSL/commands/test_plot_scores.rb +1 -1
data/test/BioDSL/commands/test_random.rb +1 -1
data/test/BioDSL/commands/test_read_fasta.rb +1 -1
data/test/BioDSL/commands/test_read_fastq.rb +1 -1
data/test/BioDSL/commands/test_read_table.rb +1 -1
data/test/BioDSL/commands/test_reverse_seq.rb +1 -1
data/test/BioDSL/commands/test_slice_align.rb +1 -1
data/test/BioDSL/commands/test_slice_seq.rb +1 -1
data/test/BioDSL/commands/test_sort.rb +1 -1
data/test/BioDSL/commands/test_split_pair_seq.rb +1 -1
data/test/BioDSL/commands/test_split_values.rb +1 -1
data/test/BioDSL/commands/test_trim_primer.rb +1 -1
data/test/BioDSL/commands/test_trim_seq.rb +1 -1
data/test/BioDSL/commands/test_uchime_ref.rb +1 -1
data/test/BioDSL/commands/test_uclust.rb +1 -1
data/test/BioDSL/commands/test_unique_values.rb +1 -1
data/test/BioDSL/commands/test_usearch_global.rb +1 -1
data/test/BioDSL/commands/test_usearch_local.rb +1 -1
data/test/BioDSL/commands/test_write_fasta.rb +1 -1
data/test/BioDSL/commands/test_write_fastq.rb +1 -1
data/test/BioDSL/commands/test_write_table.rb +1 -1
data/test/BioDSL/commands/test_write_tree.rb +1 -1
data/test/BioDSL/helpers/test_options_helper.rb +3 -3
data/test/BioDSL/seq/test_assemble.rb +58 -56
data/test/BioDSL/seq/test_backtrack.rb +83 -81
data/test/BioDSL/seq/test_digest.rb +47 -45
data/test/BioDSL/seq/test_dynamic.rb +66 -64
data/test/BioDSL/seq/test_homopolymer.rb +35 -33
data/test/BioDSL/seq/test_kmer.rb +29 -28
data/test/BioDSL/seq/test_translate.rb +44 -42
data/test/BioDSL/seq/test_trim.rb +59 -57
data/test/BioDSL/test_cary.rb +1 -1
data/test/BioDSL/test_command.rb +2 -2
data/test/BioDSL/test_csv.rb +34 -31
data/test/BioDSL/test_debug.rb +31 -31
data/test/BioDSL/test_fasta.rb +30 -29
data/test/BioDSL/test_fastq.rb +27 -26
data/test/BioDSL/test_filesys.rb +28 -27
data/test/BioDSL/test_fork.rb +29 -28
data/test/BioDSL/test_math.rb +31 -30
data/test/BioDSL/test_mummer.rb +1 -1
data/test/BioDSL/test_pipeline.rb +1 -1
data/test/BioDSL/test_seq.rb +42 -41
data/test/BioDSL/test_serializer.rb +35 -33
data/test/BioDSL/test_stream.rb +28 -27
data/test/BioDSL/test_taxonomy.rb +38 -37
data/test/BioDSL/test_test.rb +32 -31
data/test/BioDSL/test_tmp_dir.rb +1 -1
data/test/BioDSL/test_usearch.rb +28 -27
data/test/BioDSL/test_verbose.rb +32 -31
data/test/helper.rb +34 -31
metadata +3 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b828e339f7d9337acdaf88a4206cb4cf15a6778c
-  data.tar.gz: 6c130d98ba2e9ca1c1bdf6a044bbe7d6e2c6f309
+  metadata.gz: 806bfca700a56365bd01a11fb981fb16363aad95
+  data.tar.gz: 91718f260a6e32fb38af4724cfef035a9224e072
 SHA512:
-  metadata.gz: 8f1fcfd7080a7487fd1a75152c4da3ce7328e86e19843181a93c30d1bb94a2f8f78a065cd208002324df2e0bbbbfbde5576a6157ece8c4c6c4878a6311e0074e
-  data.tar.gz: 31b549d1294e2be25897d824ab019154dfa570305dd1f79b041ae641b8208d2fa00e97ac36e4bdfe8a2dacd46d7cc8da9db4ddb5d75f4c9f3a3e6997aa4e0ae0
+  metadata.gz: 875d37e145698145b42b250a0bed8ac81ad3bb9576b48cb6e14a68515906a6b773c154a9caf33f282a9f193aaf0877484f15fffd4a2600ac266322fef7e9f347
+  data.tar.gz: 21aeb489434d449fbfab7950015481672b3e2734f7cb4ae3384aa66927416655c53b847c13a03f4d42f4ceb907513cfbe80772d3175d4b1e9f25c96628d625df

data/.gitignore CHANGED

@@ -8,3 +8,4 @@ pkg/
 .tags*
 tags
 8mer
+Gemfile.lock

data/BioDSL.gemspec CHANGED

@@ -20,7 +20,7 @@
 #                                                                              #
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
 #                                                                              #
-# This software is part of BioDSL (www.github.com/maasha/BioDSL).              #
+# This software is part of BioDSL (http://maasha.github.io/BioDSL).            #
 #                                                                              #
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #

data/Gemfile ADDED

@@ -0,0 +1,6 @@
+# A sample Gemfile
+source "https://rubygems.org"
+gem 'bundler'    #, '1.7.4'
+gem 'mocha'      #, '1.0.0'
+gem 'simplecov'  #, '0.9.2'

data/README.md CHANGED

@@ -1,169 +1,224 @@
-BioDSL
-=========
-Installation
-------------
-`gem install BioDSL`
-Getting started
----------------
-A test script:
-    #!/usr/bin/env ruby
-    require 'BioDSL'
-    p = BD.new.
-    read_fasta(input: "input.fna").
-    grab(select: "ATC$", keys: :SEQ).
-    write_fasta(output: "output.fna").
-    run(progress: true)
-Or using an interactive shell using the alias ibp which you can create by
-adding the following to your `~/.bashrc` file:
-    alias ibp="irb -r BioDSL --noinspect"
-And then start the interactive shell:
-    $ ibp
-    irb(main):001:0> p = BD.new
-    => BD.new
-    irb(main):002:0> p.read_fasta(input: "input.fna")
-    => BD.new.read_fasta(input: "input.fna")
-    irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
-    => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
-    irb(main):004:0> p.write_fasta(output: "output.fna")
-    => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
-    irb(main):005:0> p.run(progress: true)
-    => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
-    irb(main):006:0>
-Or chaining commands directly:
-    $ ibp
-    irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
-    => BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
-    irb(main):002:0>
-Or run on the command line with the alias bp which you can create by adding the
-following to your ~/.bashrc file:
-    alias bp="ruby -r BioDSL"
-Then you can run the below from the command line:
-    $ bp -e 'BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)'
-Available BioDSL
--------------------
-  * [add_key]                          (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AddKey)
-  * [align_seq_mothur]                 (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AlignSeqMothur)
-  * [analyze_residue_distribution]     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AnalyzeResidueDistribution)
-  * [assemble_pairs]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssemblePairs)
-  * [assemble_seq_idba]                (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqIdba)
-  * [assemble_seq_ray]                 (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqRay)
-  * [assemble_seq_spades]              (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/AssembleSeqSpades)
-  * [classify_seq]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeq)
-  * [classify_seq_mothur]              (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClassifySeqMothur)
-  * [clip_primer]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClipPrimer)
-  * [cluster_otus]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ClusterOtus)
-  * [collapse_otus]                    (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollapseOtus)
-  * [collect_otus]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/CollectOtus)
-  * [complement_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ComplementSeq)
-  * [count]                            (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Count)
-  * [degap_seq]                        (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DegapSeq)
-  * [dereplicate_seq]                  (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/DereplicateSeq)
-  * [dump]                             (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Dump)
-  * [filter_rrna]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/FilterRrna)
-  * [genecall]                         (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Genecall)
-  * [grab]                             (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Grab)
-  * [index_taxonomy]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/IndexTaxonomy)
-  * [mean_scores]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MeanScores)
-  * [merge_pair_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergePairSeq)
-  * [merge_table]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeTable)
-  * [merge_values]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/MergeValues)
-  * [plot_heatmap]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHeatmap)
-  * [plot_histogram]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotHistogram)
-  * [plot_matches]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotMatches)
-  * [plot_residue_distribution]        (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotResidueDistribution)
-  * [plot_scores]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/PlotScores)
-  * [random]                           (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Random)
-  * [read_fasta]                       (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFasta)
-  * [read_fastq]                       (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadFastq)
-  * [read_table]                       (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReadTable)
-  * [reverse_seq]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/ReverseSeq)
-  * [slice_align]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceAlign)
-  * [slice_seq]                        (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SliceSeq)
-  * [sort]                             (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/Sort)
-  * [split_pair_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitPairSeq)
-  * [split_values]                     (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/SplitValues)
-  * [trim_primer]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimPrimer)
-  * [trim_seq]                         (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/TrimSeq)
-  * [uchime_ref]                       (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UchimeRef)
-  * [unique_values]                    (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UniqueValues)
-  * [usearch_global]                   (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/UsearchGlobal)
-  * [write_fasta]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFasta)
-  * [write_fastq]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteFastq)
-  * [write_table]                      (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTable)
-  * [write_tree]                       (http://www.rubydoc.info/gems/BioDSL/1.0.1/BioDSL/WriteTree)
-Log and History
----------------
+BioDSL (pronounced Biodiesel) is a Domain Specific Language for creating
+bioinformatic analysis workflows. A workflow may consist of several pipelines
+and each pipeline consists of a series of steps such as reading in data from a
+file, processing the data in some way, and writing data to a new file.
+BioDSL is build on the same principles as [Biopieces](www.biopieces.org), where
+data records are passed through multiple commands each with a specific task. The
+idea is that a command will process the data record if this contains the
+relevant attributes that the command can process. E.g. if a data record contains
+a sequence, then the command [reverse_seq](reverse_seq) will reverse that
+sequence.
+# Installation
+The recommended way of installing BioDSL is via Ruby’s gem package manager:
+`$ gem install BioDSL`
+For those commands which are wrappers around third-party tools, such as Usearch,
+Mothur and SPAdes, you will have to install these and make the executables
+available in your `$PATH`.
+# Getting started
+BioDSL is implemented in Ruby making use of Ruby’s powerful metaprogramming
+facilities. Thus, a workflow is basically a Ruby script containing one or more
+pipelines.
+Here is a test script with a single pipeline that reads all FASTA entries from
+the file `input.fna`, selects all records with a sequence ending in `ATC`, and
+writing those records as FASTA entries to the file `output.fna`:
+```
+#!/usr/bin/env ruby
+require 'BioDSL'
+BD.new.
+read_fasta(input: "input.fna").
+grab(select: "ATC$", keys: :SEQ).
+write_fasta(output: "output.fna").
+run
+```
+Save the test script to a file `test.biodsl` and execute on the command line:
+```
+$ ruby test.biodsl
+```
+# Combining multiple pipelines
+This script demonstrates how multiple pipelines can be created and combined. In
+the end two pipelines are run, one consisting of p1 + p2 and one consisting of
+p1 + p3. The first pipeline run will produce a histogram plot of sequence length
+from sequences containing the pattern `ATCG`, and the other pipeline run will
+produce a plot with sequences length distribution of sequences not matching
+`ATCG`.
+```
+#!/usr/bin/env ruby
+require 'BioDSL'
+p1 = BD.new.read_fasta(input: "test.fna")
+p2 = BD.new.grab(keys: :SEQ, select: "ATCG").
+     plot_histogram(key: :SEQ_LEN, terminal: :png, output: "select.png")
+p3 = BD.new.grab(keys: :SEQ, reject: "ATCG").
+     plot_histogram(key: :SEQ_LEN, terminal: :png, output: "reject.png")
+p4 = p1 + p3
+(p1 + p2).write_fasta(output: "select.fna").run
+p4.write_fasta(output: "reject.fna").run
+```
+# Running pipelines in parallel
+This script demonstrates how to run multiple pipelines in parallel using 20 CPU
+cores. Here we filter pair-end FASTQ entries from a list of samples described in
+the file `samples.txt` which contains three tab separated columns: sample name,
+a forward read file path, and a reverse read file path.
+```
+#!/usr/bin/env ruby
+require 'BioDSL'
+require 'csv'
+samples = CSV.read("samples.txt")
+Parallel.each(samples, in_processes: 20) do |sample|
+  BD.new.
+  read_fastq(input: sample[1], input2: sample[2], encoding: :base_33).
+  grab(keys: :SEQ, select: "ATCG").
+  write_fastq(output: "#{sample[0]}_filted.fastq.bz2", bzip2: true).
+  run
+end
+```
+# Ruby one-liners
+It is possible to execute BioDSL pipelines on the command line:
+```
+ruby -r BioDSL -e 'BD.new.read_fasta(input: "test.fna").plot_histogram(key: :SEQ_LEN).run'
+```
+And to save typing we may use the alias `bd` which is set like this on the
+command line:
+```
+$ alias bd='ruby -r BioDSL'
+```
+It may be a good idea to save that alias in your `.bashrc` file.
+Now it is possible to run a BioDSL pipeline on the command line like this:
+```
+$ bd -e 'BD.new.read_fasta(input: "test.fna").plot_histogram(key: :SEQ_LEN).run'
+```
+# Using the Interactive Ruby interpreter
+Here we demonstrate the use of Ruby's `irb` shell:
+```
+$ irb -r BioDSL --noinspect
+irb(main):001:0> p = BD.new
+=> BD.new
+irb(main):002:0> p.read_fasta(input: "input.fna")
+=> BD.new.read_fasta(input: "input.fna")
+irb(main):003:0> p.grab(select: "ATC$", keys: :SEQ)
+=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ)
+irb(main):004:0> p.write_fasta(output: "output.fna")
+=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna")
+irb(main):005:0> p.run
+=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run
+irb(main):006:0>
+```
+Again, it may be a good idea to save an alias `alias biodsl="irb -r BioDSL --noinspect"` to your `.bashrc` file. Thus, we can use the new `biodsl` alias to chain commands directly:
+```
+$ biodsl
+irb(main):001:0> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
+=> BD.new.read_fasta(input: "input.fna").grab(select: "ATC$", keys: :SEQ).write_fasta(output: "output.fna").run(progress: true)
+irb(main):002:0>
+```
+# History file
+A history file is kept in `$USER/.BioDSL_history` and each time run is called a history entry is added to this file:
+```
+BD.new.read_fasta(input: "test_big.fna", first: 100).plot_histogram(key: :SEQ_LEN).run
+BD.new.read_fasta(input: "test_big.fna", first: 100).plot_histogram(key: :SEQ_LEN).run
+BD.new.read_fasta(input: "test_big.fna", first: 10).plot_histogram(key: :SEQ_LEN).run
+BD.new.read_fasta(input: "test_big.fna").plot_histogram(key: :SEQ_LEN).run
+BD.new.read_fasta(input: "test_big.fna", first: 1000).plot_histogram(key: :SEQ_LEN).run
+```
+Thus it is possible to redo the last pipeline by pasting the line in irb or a Ruby one-liner.
+# Log and History
 All BioDSL events are logged to `~/.BioDSL_log`.
 BioDSL history is saved to `~/.BioDSL_history`.
+# Features
-Features
---------
-Progress:
+## Progress
 Show nifty progress table with commands, records read and emittet and time.
 `BD.new.read_fasta(input: "input.fna").dump.run(progress: true)`
-Verbose:
+## Verbose
 Output verbose messages from commands and the run status.
-`BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)`
+```
+BD.new.read_fasta(input: "input.fna").dump.run(verbose: true)
+```
-Debug:
+## Debug
 Output debug messages from commands using these.
-`BD.new.read_fasta(input: "input.fna").dump.run(debug: true)`
+```
+BD.new.read_fasta(input: "input.fna").dump.run(debug: true)
+```
-E-mail notification:
+## E-mail notification
 Send an email when run is complete.
-`BD.new.read_fasta(input: "input.fna").dump.run(email: mail@maasha.dk, subject: "Script done!")`
-Report:
+```
+BD.new.read_fasta(input: "input.fna").dump.run(email: bill@hotmail.com, subject: "Script done!")
+```
-Create an HTML report of the run stats:
+## Reports
-`BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")`
+Create an HTML report of the run stats for a pipeline:
-Output dir:
+```
+BD.new.read_fasta(input: "input.fna").dump.run(report: "status.html")
+```
-All output files from commands are put in a specified dir:
+## Output directory
-`BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")`
+All output files from commands are put in a specified directory:
+```
+BD.new.read_fasta(input: "input.fna").dump.run(output_dir: "Results")
+```
-Configuration File
-------------------
+## Configuration File
-It is possible to pre-set options in a configuration file located in your $HOME
+It is possible to pre-set options in a configuration file located in your `$HOME`
 directory called `.BioDSLrc`. Thus if an option is not already set, its value
 will fall back to the one set in the configuration file. The configuration file
 contains three whitespace separated columns:
@@ -172,34 +227,113 @@ contains three whitespace separated columns:
   * Option
   * Option value
-Lines starting with '#' are considered comments and are ignored.
+Lines starting with `#` are considered comments and are ignored.
 An example:
-    maasha@mel:~$ cat ~/.BioDSLrc
-    uchime_ref   database   /home/maasha/Install/QIIME1.8/data/rdp_gold.fa
-    uchime_ref   cpus       20
+```
+maasha@mel:~$ cat ~/.BioDSLrc
+uchime_ref   database   /home/maasha/Install/QIIME1.8/data/rdp_gold.fa
+uchime_ref   cpus       20
+```
 On compute clusters it is necessary to specify the max processor count, which
 is otherwise determined as the number of cores on the current node. To override
 this add the following line:
-    pipeline   processor_count   1000
+```
+pipeline   processor_count   1000
+```
 It is also possible to change the temporary directory from the systems default
 by adding the following line:
-    pipeline   tmp_dir   /home/projects/ku_microbio/scratch/tmp
-Contributing
-------------
-Fork it
-Create your feature branch (git checkout -b my-new-feature)
-Commit your changes (git commit -am 'Add some feature')
-Push to the branch (git push origin my-new-feature)
-Create new Pull Request
+```
+pipeline   tmp_dir   /home/projects/ku_microbio/scratch/tmp
+```
+# Available BioDSL commands
+  * [add_key]                          (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AddKey)
+  * [align_seq_mothur]                 (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AlignSeqMothur)
+  * [analyze_residue_distribution]     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AnalyzeResidueDistribution)
+  * [assemble_pairs]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AssemblePairs)
+  * [assemble_seq_idba]                (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AssembleSeqIdba)
+  * [assemble_seq_ray]                 (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AssembleSeqRay)
+  * [assemble_seq_spades]              (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/AssembleSeqSpades)
+  * [classify_seq]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ClassifySeq)
+  * [classify_seq_mothur]              (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ClassifySeqMothur)
+  * [clip_primer]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ClipPrimer)
+  * [cluster_otus]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ClusterOtus)
+  * [collapse_otus]                    (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/CollapseOtus)
+  * [collect_otus]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/CollectOtus)
+  * [complement_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ComplementSeq)
+  * [count]                            (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Count)
+  * [degap_seq]                        (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/DegapSeq)
+  * [dereplicate_seq]                  (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/DereplicateSeq)
+  * [dump]                             (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Dump)
+  * [filter_rrna]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/FilterRrna)
+  * [genecall]                         (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Genecall)
+  * [grab]                             (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Grab)
+  * [index_taxonomy]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/IndexTaxonomy)
+  * [mean_scores]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/MeanScores)
+  * [merge_pair_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/MergePairSeq)
+  * [merge_table]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/MergeTable)
+  * [merge_values]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/MergeValues)
+  * [plot_heatmap]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/PlotHeatmap)
+  * [plot_histogram]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/PlotHistogram)
+  * [plot_matches]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/PlotMatches)
+  * [plot_residue_distribution]        (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/PlotResidueDistribution)
+  * [plot_scores]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/PlotScores)
+  * [random]                           (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Random)
+  * [read_fasta]                       (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ReadFasta)
+  * [read_fastq]                       (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ReadFastq)
+  * [read_table]                       (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ReadTable)
+  * [reverse_seq]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/ReverseSeq)
+  * [slice_align]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/SliceAlign)
+  * [slice_seq]                        (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/SliceSeq)
+  * [sort]                             (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/Sort)
+  * [split_pair_seq]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/SplitPairSeq)
+  * [split_values]                     (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/SplitValues)
+  * [trim_primer]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/TrimPrimer)
+  * [trim_seq]                         (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/TrimSeq)
+  * [uchime_ref]                       (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/UchimeRef)
+  * [unique_values]                    (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/UniqueValues)
+  * [usearch_global]                   (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/UsearchGlobal)
+  * [write_fasta]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/WriteFasta)
+  * [write_fastq]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/WriteFastq)
+  * [write_table]                      (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/WriteTable)
+  * [write_tree]                       (http://www.rubydoc.info/gems/BioDSL/1.0.2/BioDSL/WriteTree)
+# Running the test suite
+BioDSL have an extended set of unit tests that can be run after installing
+development dependencies. First you need to install the bundler gem:
+```
+$ gem install bundler
+```
+Next you need to change to the source directory of BioDSL and run bundler to
+download depending gems:
+```
+$ bundle install
+```
+And then you run the test suite by running `rake`:
+```
+$ rake
+```
+And the unit tests should all run, except those omitted because a third-party
+executable was missing.
+# Contributing
+1. Fork it
+1. Create your feature branch (git checkout -b my-new-feature)
+1. Commit your changes (git commit -am 'Add some feature')
+1. Push to the branch (git push origin my-new-feature)
+1. Create new Pull Request