RubyGems - miga-base - Versions diffs - 0.7.26.2 → 1.0.3.0 - Mend

miga-base 0.7.26.2 → 1.0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

checksums.yaml +4 -4
data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
data/lib/miga/cli/action/classify_wf.rb +2 -2
data/lib/miga/cli/action/derep_wf.rb +1 -1
data/lib/miga/cli/action/doctor.rb +57 -14
data/lib/miga/cli/action/doctor/base.rb +47 -23
data/lib/miga/cli/action/env.rb +26 -0
data/lib/miga/cli/action/init.rb +11 -7
data/lib/miga/cli/action/init/files_helper.rb +1 -0
data/lib/miga/cli/action/ncbi_get.rb +3 -3
data/lib/miga/cli/action/tax_dist.rb +2 -2
data/lib/miga/cli/action/wf.rb +5 -4
data/lib/miga/cli/base.rb +1 -0
data/lib/miga/common.rb +1 -0
data/lib/miga/daemon.rb +11 -4
data/lib/miga/dataset/result.rb +10 -6
data/lib/miga/json.rb +5 -4
data/lib/miga/metadata.rb +5 -1
data/lib/miga/parallel.rb +36 -0
data/lib/miga/project.rb +8 -8
data/lib/miga/project/base.rb +4 -4
data/lib/miga/project/result.rb +2 -2
data/lib/miga/sqlite.rb +10 -2
data/lib/miga/version.rb +23 -9
data/scripts/aai_distances.bash +16 -18
data/scripts/ani_distances.bash +16 -17
data/scripts/assembly.bash +31 -16
data/scripts/haai_distances.bash +3 -27
data/scripts/miga.bash +12 -8
data/scripts/p.bash +1 -1
data/scripts/read_quality.bash +9 -18
data/scripts/trimmed_fasta.bash +14 -30
data/scripts/trimmed_reads.bash +36 -36
data/test/parallel_test.rb +31 -0
data/test/project_test.rb +2 -1
data/test/remote_dataset_test.rb +1 -1
data/utils/distance/commands.rb +1 -0
data/utils/distance/database.rb +0 -1
data/utils/distance/runner.rb +2 -4
data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
data/utils/enveomics/Manifest/Tasks/other.json +77 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
data/utils/enveomics/Manifest/categories.json +13 -4
data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
data/utils/enveomics/Scripts/SRA.download.bash +6 -8
data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
data/utils/enveomics/Scripts/aai.rb +3 -2
data/utils/enveomics/Scripts/anir.rb +137 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
data/utils/enveomics/Scripts/rbm.rb +87 -133
data/utils/enveomics/Scripts/sam.filter.rb +148 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
data/utils/enveomics/enveomics.R/R/utils.R +30 -0
data/utils/enveomics/enveomics.R/README.md +1 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +67 -0
data/utils/multitrim/multitrim.py +1555 -0
data/utils/multitrim/multitrim.yml +13 -0
data/utils/requirements.txt +4 -3
data/utils/subclade/pipeline.rb +2 -2
metadata +33 -4
data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30

data/utils/enveomics/Scripts/FastA.N50.pl CHANGED Viewed

@@ -1,9 +1,8 @@
 #!/usr/bin/env perl
-#
 # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update: Oct 07 2015
-# @license: artistic license 2.0
-#
+# @license: Artistic-2.0
 use strict;
 use warnings;
 use List::Util qw/sum min max/;
@@ -11,46 +10,51 @@ use List::Util qw/sum min max/;
 my ($seqs, $minlen, $n__) = @ARGV;
 $seqs or die "
 Description:
-   Calculates the N50 value of a set of sequences.  Alternatively, it
-   can calculate other N** values.  It also calculates the total number
-   of sequences and the total added length.
+  Calculates the N50 value of a set of sequences.  Alternatively, it
+  can calculate other N** values.  It also calculates the total number
+  of sequences, the total added length, and the longest sequence length.
 Usage:
-   $0 seqs.fa[ minlen[ **]]
+  $0 seqs.fa [minlen [**]]
+  seqs.fa	A FastA file containing the sequences
+  minlen	(optional) The minimum length to take into consideration
+  		By default: 0
+  **		(optional) Value N** to calculate.  By default: 50 (N50)
-   seqs.fa	A FastA file containing the sequences.
-   minlen	(optional) The minimum length to take into consideration.
-   		By default: 0.
-   **		Value N** to calculate.  By default: 50 (N50).
 ";
 $minlen ||= 0;
 $n__    ||= 50;
 my @len = ();
 open SEQ, "<", $seqs or die "Cannot open file: $seqs: $!\n";
 while(<SEQ>){
-   if(/^>/){
-      push @len, 0;
-   }else{
-      next if /^;/;
-      chomp;
-      s/\W//g;
-      $len[-1]+=length $_;
-   }
+  if(/^>/){
+    push @len, 0;
+  }else{
+    next if /^;/;
+    chomp;
+    s/\W//g;
+    $len[-1] += length $_;
+  }
 }
 close SEQ;
-@len = sort { $a <=> $b } map { $_>=$minlen?$_:() } @len;
+@len = sort { $a <=> $b } map { $_ >= $minlen ? $_ : () } @len;
 my $tot = (sum(@len) || 0);
-my $thr = $n__*$tot/100;
+my $thr = $n__ * $tot / 100;
 my $pos = 0;
 for(@len){
-   $pos+= $_;
-   if($pos>=$thr){
-      print "N$n__: $_\n";
-      last;
-   }
+  $pos += $_;
+  if($pos >= $thr){
+    print "N$n__: $_\n";
+    last;
+  }
 }
-print "Sequences: ".scalar(@len)."\n";
+print "Sequences: " . scalar(@len) . "\n";
 print "Total length: $tot\n";
+print "Longest sequence: " . pop(@len) . "\n";

data/utils/enveomics/Scripts/FastA.fragment.rb CHANGED Viewed

@@ -1,92 +1,100 @@
 #!/usr/bin/env ruby
-# @author  Luis M. Rodriguez-R
-# @license artistic license 2.0
+# frozen_string_literal: true
-$:.push File.expand_path("../lib", __FILE__)
-require "enveomics_rb/enveomics"
-require "enveomics_rb/stat"
+$:.push File.expand_path('../lib', __FILE__)
+require 'enveomics_rb/enveomics'
+require 'enveomics_rb/stats'
+$VERSION = 1.0
-o = {q:false, completeness:nil, minlen:500, shuffle:true}
+o = { q: false, completeness: nil, minlen: 500, shuffle: true }
 OptionParser.new do |opts|
-  opts.banner = "
-Simulates incomplete (fragmented) drafts from complete genomes.
+  opts.version = $VERSION
+  Enveomics.opt_banner(
+    opts, 'Simulates incomplete (fragmented) drafts from complete genomes',
+    "#{File.basename($0)} -i in.fasta -o out.fasta -c 0.5 [options]"
+  )
-Usage: #{$0} [options]"
-  opts.separator ""
-  opts.separator "Mandatory"
-  opts.on("-i", "--in FILE",
-    "Path to the FastA file containing the complete sequences."
-    ){ |v| o[:in] = v }
-  opts.on("-o", "--out FILE", "Path to the FastA to create."){ |v| o[:out] = v }
-  opts.on("-c", "--completeness FLOAT",
-    "Fraction of genome completeness to simulate from 0 to 1."
-    ){ |v| o[:completeness] = v.to_f }
-  opts.separator ""
-  opts.separator "Options"
-  opts.on("-m", "--minlen INT",
-    "Minimum fragment length to report. By default: #{o[:minlen]}."
-    ){ |v| o[:minlen] = v.to_i }
-  opts.on("-s", "--sorted", "Keep fragments sorted as in the input file. ",
-    "By default, fragments are shuffled."){ |v| o[:shuffle] = !v }
-  opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
-  opts.on("-h", "--help", "Display this screen") do
-    puts opts
-    exit
-  end
-  opts.separator ""
+  opts.separator 'Mandatory'
+  opts.on(
+    '-i', '--in FILE',
+    'Path to the FastA file containing the complete sequences',
+    'Supports compression with .gz extension, use - for STDIN'
+  ) { |v| o[:in] = v }
+  opts.on(
+    '-o', '--out FILE', 'Path to the FastA to create',
+    'Supports compression with .gz extension, use - for STDOUT'
+  ) { |v| o[:out] = v }
+  opts.on(
+    '-c', '--completeness FLOAT',
+    'Fraction of genome completeness to simulate from 0 to 1'
+  ) { |v| o[:completeness] = v.to_f }
+  opts.separator ''
+  opts.separator 'Options'
+  opts.on(
+    '-m', '--minlen INT',
+    "Minimum fragment length to report. By default: #{o[:minlen]}"
+  ) { |v| o[:minlen] = v.to_i }
+  opts.on(
+    '-s', '--sorted', 'Keep fragments sorted as in the input file',
+    'By default, fragments are shuffled'
+  ) { |v| o[:shuffle] = !v }
+  opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
+  opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
+  opts.separator ''
 end.parse!
-abort "-i is mandatory" if o[:in].nil?
-abort "-o is mandatory" if o[:out].nil?
-abort "-c is mandatory" if o[:completeness].nil?
+raise Enveomics::OptionError.new('-i is mandatory') if o[:in].nil?
+raise Enveomics::OptionError.new('-o is mandatory') if o[:out].nil?
+raise Enveomics::OptionError.new('-c is mandatory') if o[:completeness].nil?
 begin
   # Read input sequences
   g_id  = []
   g_seq = []
-  File.open(o[:in], "r") do |ifh|
-    id = ""
-    ifh.each_line do |ln|
-      if ln =~ /^>(\S*)/
-        g_id  << $1
-        g_seq << ""
-      else
-        g_seq[g_seq.size-1] += ln.gsub(/[^A-Za-z]/,"")
-      end
+  ifh = reader(o[:in])
+  id = ''
+  ifh.each_line do |ln|
+    if ln =~ /^>(\S*)/
+      g_id  << $1
+      g_seq << ''
+    else
+      g_seq[g_seq.size - 1] += ln.gsub(/[^A-Za-z]/, '')
     end
   end
+  ifh.close
   # Fragment genomes
   f = {}
   binlen = [1, (o[:minlen].to_f/(1.5**2)).ceil].max
   p = [0.001, [1.0, 1.0 - (o[:completeness]/1.25 + 0.1)].min].max
-  while not g_seq.empty?
+  while !g_seq.empty?
     id  = g_id.shift
     seq = g_seq.shift
     gL  = seq.length
-    while not seq.empty?
-      fL = [0, ((Enve::Stat.r_geom(p).to_f +
-                  Enve::Stat.r_unif(-0.5,0.5))*binlen).round].max
-      f["#{f.size+1}_#{id}"] = seq[0,fL] if fL >= o[:minlen]
-      seq = seq[(fL+1) .. -1]
-      seq = "" if seq.nil?
+    while !seq.empty?
+      rand_x =
+        Enveomics::Stats.r_geom(p).to_f + Enveomics::Stats.r_unif(-0.5, 0.5)
+      fL = [0, (rand_x * binlen).round].max
+      f["#{f.size+1}_#{id}"] = seq[0, fL] if fL >= o[:minlen]
+      seq = seq[(fL + 1) .. -1]
+      seq = '' if seq.nil?
     end
   end
   # Save output
   k = f.keys
   k.shuffle! if o[:shuffle]
-  File.open(o[:out], "w") do |ofh|
-    k.each do |id|
-      ofh.puts ">#{id}"
-      ofh.puts f[id].gsub(/(\S{50})/, "\\1\n")
-    end
+  ofh = writer(o[:out])
+  k.each do |id|
+    ofh.puts ">#{id}"
+    ofh.puts f[id].gsub(/(\S{50})/, "\\1\n")
   end
+  ofh.close
 rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
+  $stderr.puts "Exception: #{err}\n\n"
+  err.backtrace.each { |l| $stderr.puts l + "\n" }
+  err
 end

data/utils/enveomics/Scripts/FastA.sample.rb CHANGED Viewed

@@ -1,43 +1,57 @@
 #!/usr/bin/env ruby
-#
-# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license Artistic-2.0
-#
+# frozen_string_literal: false
-require 'optparse'
+$VERSION = 1.0
+$:.push File.expand_path('../lib', __FILE__)
+require 'enveomics_rb/enveomics'
-o = {q: false, rep: false}
-ARGV << '-h' if ARGV.size==0
+o = { q: false, rep: false }
 OptionParser.new do |opt|
-  opt.banner = "
-Samples a random set of sequences from a multi-FastA file.
-Usage: #{$0} [options]"
-  opt.separator ''
+  Enveomics.opt_banner(
+    opt, 'Samples a random set of sequences from a multi-FastA file',
+    "#{File.basename($0)} -i seq.fa -o 10pc.fa -f 0.1 [options]"
+  )
   opt.separator 'Mandatory'
-  opt.on('-i', '--in PATH', 'Input FastA file.'){ |v| o[:i] = v }
-  opt.on('-o', '--out PATH', 'Output FastA file.'){ |v| o[:o] = v }
-  opt.on('-f', '--fraction FLOAT',
+  opt.on(
+    '-i', '--in PATH',
+    'Input FastA file',
+    'Supports compression with .gz extension, use - for STDIN'
+  ) { |v| o[:i] = v }
+  opt.on(
+    '-o', '--out PATH',
+    'Output FastA file',
+    'Supports compression with .gz extension, use - for STDOUT'
+  ) { |v| o[:o] = v }
+  opt.on(
+    '-f', '--fraction FLOAT', Float,
     'Fraction of sequences to sample [0-1].',
-    'Mandatory unless -n is provided.'){ |v| o[:f] = v.to_f }
+    'Mandatory unless -c is provided.'
+  ) { |v| o[:f] = v }
   opt.separator ''
   opt.separator 'Options'
-  opt.on('-c', '--number INT',
-    'Number of sequences to sample.',
-    'Mandatory unless -f is provided.'){ |v| o[:n] = v.to_i }
-  opt.on('-r', '--replacement','Sample with replacement'){ |v| o[:rep] = v }
-  opt.on('-q', '--quiet', 'Run quietly (no STDERR output).'){ o[:q] = true }
+  opt.on(
+    '-c', '--number INT', Integer,
+    'Number of sequences to sample',
+    'Mandatory unless -f is provided'
+  ) { |v| o[:n] = v }
+  opt.on('-r', '--replacement','Sample with replacement') { |v| o[:rep] = v }
+  opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
   opt.on('-h', '--help', 'Display this screen.') do
     puts opt
     exit
   end
   opt.separator ''
 end.parse!
-abort '-i is mandatory.' if o[:i].nil?
-abort '-o is mandatory.' if o[:o].nil?
-abort '-f or -n is mandatory.' if o[:f].nil? and o[:n].nil?
+raise Enveomics::OptionError.new('-i is mandatory') if o[:i].nil?
+raise Enveomics::OptionError.new('-o is mandatory') if o[:o].nil?
+if o[:f].nil? && o[:n].nil?
+  raise Enveomics::OptionError.new('-f or -n is mandatory')
+end
+$QUIET = o[:q]
 # Functions to parse sequences
 def do_stuff(id, sq)
@@ -53,31 +67,32 @@ def do_stuff(id, sq)
 end
 # Parse sequences
-$stderr.puts 'Parsing sequences' unless o[:q]
+say 'Parsing sequences'
 seq = []
-File.open(o[:i], 'r') do |fh|
-  id = nil
-  sq = ''
-  fh.each do |ln|
-    next if ln =~ /^;/
-    if ln =~ /^>(.+)/
-      seq << [id, sq] unless id.nil?
-      id = $1
-      sq = ''
-    else
-      sq << ln
-    end
+fh = reader(o[:i])
+id = nil
+sq = ''
+fh.each do |ln|
+  next if ln =~ /^;/
+  if ln =~ /^>(.+)/
+    seq << [id, sq] unless id.nil?
+    id = $1
+    sq = ''
+  else
+    sq << ln
   end
-  seq << [id, sq] unless id.nil?
 end
-$stderr.puts "  Input sequences: #{seq.size}"
+seq << [id, sq] unless id.nil?
+fh.close
+say "Input sequences: #{seq.size}"
 o[:n] ||= (seq.size * o[:f]).round
-seq_o = o[:rep] ? o[:n].times.map{ seq.sample } : seq.sample(o[:n])
-File.open(o[:o], 'w') do |fh|
-  seq_o.each do |i|
-    fh.puts ">#{i[0]}"
-    fh.puts i[1]
-  end
+seq_o = o[:rep] ? o[:n].times.map { seq.sample } : seq.sample(o[:n])
+fh = writer(o[:o])
+seq_o.each do |i|
+  fh.puts ">#{i[0]}"
+  fh.puts i[1]
 end
-$stderr.puts "  Output sequences: #{seq_o.size}"
+fh.close
+say "Output sequences: #{seq_o.size}"

data/utils/enveomics/Scripts/FastA.toFastQ.rb ADDED Viewed

@@ -0,0 +1,69 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'zlib'
+o = { qual: 31, encoding: 33 }
+ARGV << '-h' if ARGV.empty?
+OptionParser.new do |opts|
+  opts.banner = "
+Creates a FastQ-compliant file from a FastA file.
+Usage: #{$0} [options]"
+  opts.separator ''
+  opts.separator 'Options'
+  opts.on(
+    '-i', '--in FILE', 'Input FastA file (supports .gz compression)'
+  ) { |v| o[:in] = v }
+  opts.on(
+    '-o', '--out FILE', 'Output FastQ file (supports .gz compression)'
+  ) { |v| o[:out] = v }
+  opts.on(
+    '-q', '--quality INT', Integer,
+    'PHRED quality score to use (fixed), in the range [-5, 41]',
+    "By default: #{o[:qual]}"
+  ) { |v| o[:qual] = v }
+  opts.on(
+    '--encoding INT', Integer,
+    "Base encoding (33 or 64). By default: #{o[:encoding]}"
+  ) { |v| o[:encoding] = v }
+  opts.on('-h', '--help', 'Display this screen.') do
+    puts opts
+    exit
+  end
+  opts.separator ''
+end.parse!
+abort '-i is mandatory' if o[:in].nil?
+abort '-o is mandatory' if o[:out].nil?
+abort '-q must be in the range -5 .. 41' if o[:qual] < -5 || o[:qual] > 41
+# Determine quality character
+$qchar = (o[:qual] + o[:encoding]).chr
+# Create file handlers
+ifh = o[:in] =~ /\.gz$/ ?
+  Zlib::GzipReader.open(o[:in]) : File.open(o[:in], 'r')
+ofh = o[:out] =~ /\.gz$/ ?
+  Zlib::GzipWriter.open(o[:out]) : File.open(o[:out], 'w')
+def print_seq(ofh, id, seq)
+  ofh.puts "@#{id}", seq, '+', $qchar * seq.length unless seq.empty?
+end
+# Generate FastQ
+id = ''
+seq = ''
+ifh.each_line do |ln|
+  next if ln =~ /^;/
+  if ln =~ /^>(.*)/
+    print_seq(ofh, id, seq)
+    seq = ''
+    id = $1
+  else
+    seq += ln.chomp.upcase.gsub(/[^A-Z]/,'')
+  end
+end
+print_seq(ofh, id, seq)
+ofh.close
+ifh.close