RubyGems - bio-ngs - Versions diffs - 0.3.2.alpha.01 - Mend

bio-ngs 0.3.2.alpha.01

Files changed (76) hide show

data/.document +5 -0
data/Gemfile +39 -0
data/Gemfile.lock +81 -0
data/LICENSE.txt +28 -0
data/README.rdoc +240 -0
data/Rakefile +60 -0
data/VERSION +1 -0
data/bin/biongs +35 -0
data/bio-ngs.gemspec +215 -0
data/ext/mkrf_conf.rb +87 -0
data/lib/bio-ngs.rb +54 -0
data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
data/lib/bio/appl/ngs/blast.rb +36 -0
data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
data/lib/bio/appl/ngs/cufflinks.rb +489 -0
data/lib/bio/appl/ngs/fastx.rb +170 -0
data/lib/bio/appl/ngs/samtools.rb +118 -0
data/lib/bio/appl/ngs/sff_extract.rb +23 -0
data/lib/bio/appl/ngs/tophat.rb +158 -0
data/lib/bio/ngs/converter.rb +100 -0
data/lib/bio/ngs/core_ext.rb +12 -0
data/lib/bio/ngs/db.rb +66 -0
data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
data/lib/bio/ngs/db/models.rb +1 -0
data/lib/bio/ngs/db/models/homology.rb +8 -0
data/lib/bio/ngs/db/models/ontology.rb +16 -0
data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
data/lib/bio/ngs/ext/versions.yaml +73 -0
data/lib/bio/ngs/graphics.rb +189 -0
data/lib/bio/ngs/homology.rb +102 -0
data/lib/bio/ngs/ontology.rb +103 -0
data/lib/bio/ngs/quality.rb +64 -0
data/lib/bio/ngs/record.rb +50 -0
data/lib/bio/ngs/task.rb +46 -0
data/lib/bio/ngs/utils.rb +176 -0
data/lib/development_tasks.rb +34 -0
data/lib/enumerable.rb +37 -0
data/lib/tasks/bwa.thor +126 -0
data/lib/tasks/convert.thor +454 -0
data/lib/tasks/history.thor +51 -0
data/lib/tasks/homology.thor +121 -0
data/lib/tasks/ontology.thor +93 -0
data/lib/tasks/project.thor +51 -0
data/lib/tasks/quality.thor +142 -0
data/lib/tasks/rna.thor +126 -0
data/lib/tasks/sff_extract.thor +9 -0
data/lib/templates/README.tt +43 -0
data/lib/templates/db.tt +6 -0
data/lib/wrapper.rb +225 -0
data/spec/converter_qseq_spec.rb +56 -0
data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
data/spec/quality_spec.rb +40 -0
data/spec/sff_extract_spec.rb +98 -0
data/spec/spec_helper.rb +55 -0
data/spec/tophat_spec.rb +99 -0
data/spec/utils_spec.rb +22 -0
data/test/conf/test_db.yml +4 -0
data/test/data/blastoutput.xml +69 -0
data/test/data/gene-GO.json +1 -0
data/test/data/goa_uniprot +27 -0
data/test/data/goslim_goa.obo +1763 -0
data/test/helper.rb +18 -0
data/test/test_bio-ngs.rb +17 -0
data/test/test_db.rb +21 -0
data/test/test_homology.rb +102 -0
data/test/test_ngs.rb +21 -0
data/test/test_ontology.rb +74 -0
data/test/test_utils.rb +29 -0
metadata +460 -0

data/lib/bio/appl/ngs/fastx.rb ADDED Viewed

@@ -0,0 +1,170 @@
+#
+#   fastx.rb - The FASTX-Toolkit is a collection of command line tools for Short-Reads FASTA/FASTQ files preprocessing.
+#
+# Next-Generation sequencing machines usually produce FASTA or FASTQ files, containing multiple short-reads sequences (possibly with quality information).
+#
+# The main processing of such FASTA/FASTQ files is mapping (aka aligning) the sequences to reference genomes or other databases using specialized programs. Example of such mapping programs are: Blat, SHRiMP, LastZ, MAQ and many many others.
+#
+# However,
+# It is sometimes more productive to preprocess the FASTA/FASTQ files before mapping the sequences to the genome - manipulating the sequences to produce better mapping results.
+#
+# The FASTX-Toolkit tools perform some of these preprocessing tasks.
+# http://hannonlab.cshl.edu/fastx_toolkit/
+#
+# Copyright:: Copyright (C) 2011
+#     Raoul Bonnal <r@bioruby.org>
+# License:: The Ruby License
+#
+# + Mapped
+# - Not Yet Mapped
+#
+# - fastx_artifacts_filter
+# - fastx_collapser
+# + fastx_quality_stats
+# - fastx_trimmer
+# - fastx_barcode_splitter.pl
+# - fastx_nucleotide_distribution_graph.sh
+# - fastx_renamer
+# - fastx_uncollapser
+# - fastx_clipper
+# - fastx_nucleotide_distribution_line_graph.sh
+# - fastx_reverse_complement
+# + fastq_coverage_graph.sh
+# - fastq_masker
+# + fastq_quality_boxplot_graph.sh
+# - fastq_quality_converter
+# - fastq_quality_filter
+# - fastq_quality_trimmer
+# - fastq_to_fasta
+module Bio
+  module Ngs
+    module Fastx
+      # [-h]         = This helpful help screen.
+      # [-t N]       = Quality threshold - nucleotides with lower
+      #                quality will be trimmed (from the end of the sequence).
+      # [-l N]       = Minimum length - sequences shorter than this (after trimming)
+      #                will be discarded. Default = 0 = no minimum length.
+      # [-z]         = Compress output with GZIP.
+      # [-i INFILE]  = FASTQ input file. default is STDIN.
+      # [-o OUTFILE] = FASTQ output file. default is STDOUT.
+      # [-v]         = Verbose - report number of sequences.
+      #                If [-o] is specified,  report will be printed to STDOUT.
+      #                If [-o] is not specified (and output goes to STDOUT),
+      #                report will be printed to STDERR.
+      class Trim
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("fastq_quality_trimmer")
+        use_aliases
+        add_option :min_size, :type=>:numeric, :default=>20, :aliases => "-l", :desc=>"Minimum length - sequences shorter than this (after trimming)
+        will be discarded. Default = 0 = no minimum length."
+        add_option :min_quality, :type=>:numeric, :default=>10, :aliases => "-t", :desc=>"Quality threshold - nucleotides with lower
+        quality will be trimmed (from the end of the sequence)."
+        add_option :output, :type=>:string, :aliases => "-o", :desc => "FASTQ output file.", :collapse=>true
+        add_option :input, :type=>:string, :aliases => "-i", :desc => "FASTQ input file.", :collapse=>true
+        add_option :gzip, :type => :boolean, :aliases => "-z", :desc => "Compress output with GZIP."
+        add_option :verbose, :type => :boolean, :aliases => "-v", :desc => "[-v]         = Verbose - report number of sequences.
+        If [-o] is specified,  report will be printed to STDOUT.
+        If [-o] is not specified (and output goes to STDOUT),
+        report will be printed to STDERR."
+        add_option :quality_type,  :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
+      end #Trim
+      # Solexa-Quality BoxPlot plotter
+      # Generates a solexa quality score box-plot graph
+      #
+      # Usage: /usr/local/bin/fastq_quality_boxplot_graph.sh [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]
+      #
+      #   [-p]           - Generate PostScript (.PS) file. Default is PNG image.
+      #   [-i INPUT.TXT] - Input file. Should be the output of "solexa_quality_statistics" program.
+      #   [-o OUTPUT]    - Output file name. default is STDOUT.
+      #   [-t TITLE]     - Title (usually the solexa file name) - will be plotted on the graph.
+      class ReadsBoxPlot
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("fastq_quality_boxplot_graph.sh")
+        use_aliases
+        add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
+        add_option :output, :type=>:string, :aliases => "-o", :desc => "FASTQ output file."
+        add_option :input, :type=>:string, :aliases => "-i", :desc => "FASTQ input file."
+        add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
+      end #ReadsBoxPlot
+      # Solexa-Reads coverage plotter
+      # Generates a solexa line coverage graph
+      #
+      # Usage: /usr/local/bin/fastq_coverage_graph.sh [-i INPUT.TXT] [-t TITLE] [-p] [-o OUTPUT]
+      #
+      #   [-p]           - Generate PostScript (.PS) file. Default is PNG image.
+      #   [-i INPUT.TXT] - Input file. Should be the output of "solexa_quality_statistics" program.
+      #   [-o OUTPUT]    - Output file name. default is STDOUT.
+      #   [-t TITLE]     - Title (usually the solexa file name) - will be plotted on the graph.
+      class ReadsCoverage
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("fastq_coverage_graph.sh")
+        use_aliases
+        add_option :ps, :type => :boolean, :aliases => "-p", :desc => "Generate PostScript (.PS) file. Default is PNG image."
+        add_option :output, :type=>:string, :aliases => "-o", :desc => "FASTQ output file."
+        add_option :input, :type=>:string, :aliases => "-i", :desc => "FASTQ input file."
+        add_option :title, :type => :string, :aliases => "-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
+      end #ReadsCoverage
+      # usage: fastx_quality_stats [-h] [-N] [-i INFILE] [-o OUTFILE]
+      # Part of FASTX Toolkit 0.0.13 by A. Gordon (gordon@cshl.edu)
+      #
+      #    [-h] = This helpful help screen.
+      #    [-i INFILE]  = FASTQ input file. default is STDIN.
+      #    [-o OUTFILE] = TEXT output file. default is STDOUT.
+      #    [-N]         = New output format (with more information per nucleotide/cycle).
+      #
+      # The *OLD* output TEXT file will have the following fields (one row per column):
+      #   column  = column number (1 to 36 for a 36-cycles read solexa file)
+      #   count   = number of bases found in this column.
+      #   min     = Lowest quality score value found in this column.
+      #   max     = Highest quality score value found in this column.
+      #   sum     = Sum of quality score values for this column.
+      #   mean    = Mean quality score value for this column.
+      #   Q1  = 1st quartile quality score.
+      #   med = Median quality score.
+      #   Q3  = 3rd quartile quality score.
+      #   IQR = Inter-Quartile range (Q3-Q1).
+      #   lW  = 'Left-Whisker' value (for boxplotting).
+      #   rW  = 'Right-Whisker' value (for boxplotting).
+      #   A_Count = Count of 'A' nucleotides found in this column.
+      #   C_Count = Count of 'C' nucleotides found in this column.
+      #   G_Count = Count of 'G' nucleotides found in this column.
+      #   T_Count = Count of 'T' nucleotides found in this column.
+      #   N_Count = Count of 'N' nucleotides found in this column.
+      #   max-count = max. number of bases (in all cycles)
+      #
+      #
+      # The *NEW* output format:
+      #   cycle (previously called 'column') = cycle number
+      #   max-count
+      #   For each nucleotide in the cycle (ALL/A/C/G/T/N):
+      #     count   = number of bases found in this column.
+      #     min     = Lowest quality score value found in this column.
+      #     max     = Highest quality score value found in this column.
+      #     sum     = Sum of quality score values for this column.
+      #     mean    = Mean quality score value for this column.
+      #     Q1  = 1st quartile quality score.
+      #     med = Median quality score.
+      #     Q3  = 3rd quartile quality score.
+      #     IQR = Inter-Quartile range (Q3-Q1).
+      #     lW  = 'Left-Whisker' value (for boxplotting).
+      #     rW  = 'Right-Whisker' value (for boxplotting).
+      class FastqStats
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("fastx_quality_stats")
+        use_aliases
+        add_option :output, :type=>:string, :aliases => "-o", :desc => "FASTQ output file.", :collapse=>true
+        add_option :input, :type=>:string, :aliases => "-i", :desc => "FASTQ input file.", :collapse=>true
+        add_option :new_format, :type => :boolean, :aliases => "-N", :desc => "New output format (with more information per nucleotide/cycle)."
+      end #ReadsCoverage
+    end #Fastx
+  end #Ngs
+end #Bio

data/lib/bio/appl/ngs/samtools.rb ADDED Viewed

@@ -0,0 +1,118 @@
+#
+#   samtools.rb - description
+#
+# Copyright:: Copyright (C) 2011
+#     Raoul Bonnal <r@bioruby.org>
+# License:: The Ruby License
+#
+#
+#
+# Program: samtools (Tools for alignments in the SAM format)
+# Version: 0.1.16 (r963:234)
+#
+# Usage:   samtools <command> [options]
+#
+# Command: view        SAM<->BAM conversion
+#          sort        sort alignment file
+#          pileup      generate pileup output
+#          mpileup     multi-way pileup
+#          depth       compute the depth
+#          faidx       index/extract FASTA
+#          tview       text alignment viewer
+#          index       index alignment
+#          idxstats    BAM index stats (r595 or later)
+#          fixmate     fix mate information
+#          glfview     print GLFv3 file
+#          flagstat    simple stats
+#          calmd       recalculate MD/NM tags and '=' bases
+#          merge       merge sorted alignments
+#          rmdup       remove PCR duplicates
+#          reheader    replace BAM header
+#          cat         concatenate BAMs
+#          targetcut   cut fosmid regions (for fosmid pool only)
+#          phase       phase heterozygotes
+module Bio
+  module Ngs
+    module Samtools
+      # Usage:   samtools view [options] <in.bam>|<in.sam> [region1 [...]]
+      #
+      # Options: -b       output BAM
+      #          -h       print header for the SAM output
+      #          -H       print header only (no alignments)
+      #          -S       input is SAM
+      #          -u       uncompressed BAM output (force -b)
+      #          -1       fast compression (force -b)
+      #          -x       output FLAG in HEX (samtools-C specific)
+      #          -X       output FLAG in string (samtools-C specific)
+      #          -c       print only the count of matching records
+      #          -L FILE  output alignments overlapping the input BED FILE [null]
+      #          -t FILE  list of reference names and lengths (force -S) [null]
+      #          -T FILE  reference sequence file (force -S) [null]
+      #          -o FILE  output file name [stdout]
+      #          -R FILE  list of read groups to be outputted [null]
+      #          -f INT   required flag, 0 for unset [0]
+      #          -F INT   filtering flag, 0 for unset [0]
+      #          -q INT   minimum mapping quality [0]
+      #          -l STR   only output reads in library STR [null]
+      #          -r STR   only output reads in read group STR [null]
+      #          -?       longer help
+      class View
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("samtools")
+        set_sub_program "view"
+        use_aliases
+        add_option :bam_output, :type => :boolean, :aliases => "-b", :desc => "output BAM", :default => true
+        add_option :print_header_alignment, :type => :boolean, :aliases => "-h", :desc => "print header for the SAM output"
+        add_option :print_header_only, :type => :boolean, :aliases => "-H", :desc => "print header only (no alignments)"
+        add_option :sam_input, :type => :boolean, :aliases => "-S", :desc => "input is SAM"
+        add_option :uncompress, :type => :boolean, :aliases => "-u", :desc => "uncompressed BAM output (force -b)"
+        add_option :compress, :type => :boolean , :aliases => "-1", :desc => "fast compression (force -b)"
+        add_option :flag_hex, :type => :boolean, :aliases => "-x", :desc => "output FLAG in HEX (samtools-C specific)"
+        add_option :flag_string, :type => :boolean, :aliases => "-X", :desc => "output FLAS is string (samtools-C specific)"
+        add_option :output_alignment, :type => :string, :aliases => "-L", :desc => "output alignments overlapping the input BED FILE [null]"
+        add_option :list_ref, :type => :string, :aliases => "-t", :desc => "list of reference names and lengths (force -S) [null]"
+        add_option :ref_sequence, :type => :string, :aliases => "-T", :desc => "reference sequence file (force -S) [null]"
+        add_option :output, :type => :string, :aliases => "-o", :desc => "output file name [stdout]", :required => true
+        add_option :list_group, :type => :string, :aliases => "-R", :desc => "list of read groups to be outputted [null]"
+        add_option :required_flag, :type => :numeric, :aliases => "-f", :desc => "required flag, 0 for unset [0]"
+        add_option :filtering_flag, :type => :numeric, :aliases => "-F", :desc => "filtering flag, 0 for unset [0]"
+        add_option :min_map_qual, :type => :numeric, :aliases => "-q", :desc => "minimum mapping quality [0]"
+        add_option :only_lib_reads, :type => :string, :aliases => "-l", :desc => "only output reads in library STR [null]"
+        add_option :only_grp_reads, :type => :string, :aliases => "r", :desc => "only output reads in read group STR [null]"
+      end #View
+      # Usage:   samtools merge [-nr] [-h inh.sam] <out.bam> <in1.bam> <in2.bam> [...]
+      #
+      # Options: -n       sort by read names
+      #          -r       attach RG tag (inferred from file names)
+      #          -u       uncompressed BAM output
+      #          -f       overwrite the output BAM if exist
+      #          -1       compress level 1
+      #          -R STR   merge file in the specified region STR [all]
+      #          -h FILE  copy the header in FILE to <out.bam> [in1.bam]
+      #
+      # Note: Samtools' merge does not reconstruct the @RG dictionary in the header. Users
+      #       must provide the correct header with -h, or uses Picard which properly maintains
+      #       the header dictionary in merging.
+      #out, in1, in2, ... inx Must be passed as arguments
+      class Merge
+        include Bio::Command::Wrapper
+        set_program Bio::Ngs::Utils.binary("samtools")
+        set_sub_program "merge"
+        use_aliases
+        add_option :sort_by_read_name, :type => :boolean, :aliases => "-n", :desc => "sort by read names"
+        add_option :attach_rg, :type => :boolean, :aliases => "-r", :desc => "attach RG tag (inferred from file names)"
+        add_option :uncompress, :type => :boolean, :aliases => "-u", :desc => "uncompressed BAM output"
+        add_option :overwrite_output, :type => :boolean, :aliases => "-f", :desc => "overwrite the output BAM if exist"
+        add_option :compress, :type => :boolean , :aliases => "-1", :desc => "compress level 1"
+        add_option :merge_regions, :type => :string, :aliases => "-R", :desc => "merge file in the specified region STR [all]"
+        add_option :copy_header, :type => :string, :aliases => "-h", :desc => "copy the header in FILE to <out.bam> [in1.bam]"
+      end #Merge
+    end #Samtools
+  end #Ngs
+end #Bio

data/lib/bio/appl/ngs/sff_extract.rb ADDED Viewed

@@ -0,0 +1,23 @@
+module Bio
+  module Ngs
+    class SffExtract
+      include Bio::Command::Wrapper
+      set_program Bio::Ngs::Utils.binary("sff_extract")
+      add_option "append", :type => :boolean, :aliases => "-a", :desc => "append output to existing files"
+      add_option "xml_info", :type => :string, :aliases => "-i", :desc => "extra info to write in the xml file"
+      add_option "linker_file", :type => :string, :aliases => "-l", :desc => "FASTA file with paired-end linker sequences"
+      add_option "clip", :type => :boolean, :aliases => "-c", :desc => "clip (completely remove) ends with low qual and/or adaptor sequence"
+      add_option "upper_case", :type => :boolean, :aliases => "-u", :desc => "all bases in upper case, including clipped ends"
+      add_option "min_left_clip", :type => :numeric, :desc => "if the left clip coming from the SFF is smaller than this value, override it"
+      add_option "fastq", :type => :boolean, :aliases => "-Q", :desc => "store as FASTQ file instead of FASTA + FASTA quality file"
+      add_option "out_basename", :type => :string, :aliases => "-o", :desc => "base name for all output files"
+      add_option "seq_file", :type => :string, :aliases => "-s", :desc => "output sequence file name"
+      add_option "qual_file", :type => :string, :aliases => "-q", :desc => "output quality file name"
+      add_option "xml_file", :type => :string, :aliases => "-x", :desc => "output ancillary xml file name"
+    end
+  end
+end

data/lib/bio/appl/ngs/tophat.rb ADDED Viewed

@@ -0,0 +1,158 @@
+#
+#   tophat.rb - description
+#
+# Copyright:: Copyright (C) 2011
+#     Raoul Bonnal <@bioruby.org>
+# License:: The Ruby License
+#
+#
+#require 'bio/command'
+#require 'shellwords'
+#require 'thor'
+#require 'bio/ngs/utils'
+# TopHat maps short sequences from spliced transcripts to whole genomes.
+#
+# Usage:
+#     tophat [options] <bowtie_index> <reads1[,reads2,...]> [reads1[,reads2,...]] \
+#                                     [quals1,[quals2,...]] [quals1[,quals2,...]]
+#
+# Options:
+#     -v/--version
+#     -o/--output-dir                <string>    [ default: ./tophat_out     ]
+#     -a/--min-anchor                <int>       [ default: 8                ]
+#     -m/--splice-mismatches         <0-2>       [ default: 0                ]
+#     -i/--min-intron-length         <int>       [ default: 50               ]
+#     -I/--max-intron-length         <int>       [ default: 500000           ]
+#     -g/--max-multihits             <int>       [ default: 20               ]
+#     -F/--min-isoform-fraction      <float>     [ default: 0.15             ]
+#     --max-insertion-length         <int>       [ default: 3                ]
+#     --max-deletion-length          <int>       [ default: 3                ]
+#     --solexa-quals
+#     --solexa1.3-quals                          (same as phred64-quals)
+#     --phred64-quals                            (same as solexa1.3-quals)
+#     -Q/--quals
+#     --integer-quals
+#     -C/--color                                 (Solid - color space)
+#     --color-out
+#     --library-type                 <string>    (fr-unstranded, fr-firststrand,
+#                                                 fr-secondstrand)
+#     -p/--num-threads               <int>       [ default: 1                ]
+#     -G/--GTF                       <filename>
+#     -j/--raw-juncs                 <filename>
+#     --insertions                   <filename>
+#     --deletions                    <filename>
+#     -r/--mate-inner-dist           <int>
+#     --mate-std-dev                 <int>       [ default: 20               ]
+#     --no-novel-juncs
+#     --no-novel-indels
+#     --no-gtf-juncs
+#     --no-coverage-search
+#     --coverage-search
+#     --no-closure-search
+#     --closure-search
+#     --microexon-search
+#     --butterfly-search
+#     --no-butterfly-search
+#     --keep-tmp
+#     --tmp-dir                      <dirname>   [ default: <output_dir>/tmp ]
+#     -z/--zpacker                   <program>   [ default: gzip             ]
+#     -X/--unmapped-fifo                         [ use mkfifo to compress more temporary files]
+#
+# Advanced Options:
+#     --initial-read-mismatches      <int>       [ default: 2                ]
+#     --segment-mismatches           <int>       [ default: 2                ]
+#     --segment-length               <int>       [ default: 25               ]
+#     --bowtie-n                                 [ default: bowtie -v        ]
+#     --min-closure-exon             <int>       [ default: 100              ]
+#     --min-closure-intron           <int>       [ default: 50               ]
+#     --max-closure-intron           <int>       [ default: 5000             ]
+#     --min-coverage-intron          <int>       [ default: 50               ]
+#     --max-coverage-intron          <int>       [ default: 20000            ]
+#     --min-segment-intron           <int>       [ default: 50               ]
+#     --max-segment-intron           <int>       [ default: 500000           ]
+#     --no-sort-bam                              [Output BAM is not coordinate-sorted]
+#     --no-convert-bam                           [Do not convert to bam format.
+#                                                 Output is <output_dir>accepted_hit.sam.
+#                                                 Implies --no-sort-bam.]
+#
+# SAM Header Options (for embedding sequencing run metadata in output):
+#     --rg-id                        <string>    (read group ID)
+#     --rg-sample                    <string>    (sample ID)
+#     --rg-library                   <string>    (library ID)
+#     --rg-description               <string>    (descriptive string, no tabs allowed)
+#     --rg-platform-unit             <string>    (e.g Illumina lane ID)
+#     --rg-center                    <string>    (sequencing center name)
+#     --rg-date                      <string>    (ISO 8601 date of the sequencing run)
+#     --rg-platform                  <string>    (Sequencing platform descriptor)
+#
+#     for detailed help see http://tophat.cbcb.umd.edu/manual.html
+module Bio
+  module Ngs
+    class Tophat
+      include Bio::Command::Wrapper
+      set_program Bio::Ngs::Utils.binary("tophat")
+      add_option "output-dir",:type => :string, :aliases => '-o'
+      add_option "min-anchor", :type => :numeric, :aliases => '-a'
+      add_option "splice-mismatches", :type => :numeric, :aliases => '-m'
+      add_option "min-intron-length", :type => :numeric , :aliases => '-i'
+      add_option "max-intron-length", :type => :numeric, :aliases => '-I'
+      add_option "max-multihits", :type => :numeric, :aliases => '-g'
+      add_option "min-isoform_fraction", :type => :numeric, :aliases => '-F'
+      add_option "max-insertion-length", :type => :numeric
+      add_option "max-deletion-length", :type => :numeric
+      add_option "solexa-quals", :type => :boolean
+      add_option "solexa1.3-quals", :type => :boolean, :aliases => '--phred64-quals'
+      add_option :quals, :type => :boolean, :aliases => '-Q'
+      add_option "integer-quals", :type => :boolean
+      add_option :color, :type => :boolean, :aliases => '-C'
+      add_option "library-type", :type => :string
+      add_option "num-threads", :type => :numeric, :aliases => '-p'
+      add_option "GTF", :type => :string, :aliases => '-G'
+      add_option "raw-juncs", :type => :string, :aliases => '-j'
+      add_option :insertions, :type => :string
+      add_option :deletions, :type => :string
+      add_option "mate-inner-dist", :type=>:numeric, :aliases => '-r'
+      add_option "mate-std-dev", :type => :numeric
+      add_option "no-novel-juncs", :type => :boolean
+      add_option "allow-indels", :type => :boolean
+      add_option "no-novel-indels", :type => :boolean
+      add_option "no-gtf-juncs", :type => :boolean
+      add_option "no-coverage-search", :type => :boolean
+      add_option "coverage-search", :type => :boolean
+      add_option "no-closure-search", :type => :boolean
+      add_option "closure-search", :type => :boolean
+      add_option "fill-gaps", :type => :boolean
+      add_option "microexon-search", :type => :boolean
+      add_option "butterfly-search", :type => :boolean
+      add_option "no-butterfly-search", :type => :boolean
+      add_option "keep-tmp", :type => :boolean
+      add_option "tmp-dir", :type => :string
+      add_option "segment-mismatches", :type => :numeric
+      add_option "segment-length", :type => :numeric
+      add_option "min-closure-exon", :type => :numeric
+      add_option "min-closure-intron", :type => :numeric
+      add_option "max-closure-intron", :type => :numeric
+      add_option "min-coverage-intron", :type => :numeric
+      add_option "max-coverage-intron", :type => :numeric
+      add_option "min-segment-intron", :type => :numeric
+      add_option "max-segment-intron", :type => :numeric
+      add_option "rg-id", :type => :string
+      add_option "rg-sample", :type => :string
+      add_option "rg-library", :type => :string
+      add_option "rg-description", :type => :string
+      add_option "rg-platform-unit", :type => :string
+      add_option "rg-center", :type => :string
+      add_option "rg-date", :type => :string
+      add_option "rg-platform", :type => :string
+    end #That
+  end #Ngs
+end #Bio