RubyGems - full_lengther_next - Versions diffs - 0.0.8 → 0.5.6 - Mend

full_lengther_next 0.0.8 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/.gemtest +0 -0
data/History.txt +2 -2
data/Manifest.txt +33 -18
data/Rakefile +4 -2
data/bin/download_fln_dbs.rb +310 -158
data/bin/full_lengther_next +160 -103
data/bin/make_test_dataset.rb +236 -0
data/bin/make_user_db.rb +101 -117
data/bin/plot_fln.rb +270 -0
data/bin/plot_taxonomy.rb +70 -0
data/lib/expresscanvas.zip +0 -0
data/lib/full_lengther_next.rb +3 -3
data/lib/full_lengther_next/classes/artifacts.rb +66 -0
data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
data/lib/full_lengther_next/classes/cdhit.rb +154 -0
data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
data/lib/full_lengther_next/classes/common_functions.rb +105 -63
data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
data/lib/full_lengther_next/classes/handle_db.rb +30 -0
data/lib/full_lengther_next/classes/my_worker.rb +308 -138
data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
data/lib/full_lengther_next/classes/reptrans.rb +210 -0
data/lib/full_lengther_next/classes/sequence.rb +439 -80
data/lib/full_lengther_next/classes/test_code.rb +15 -16
data/lib/full_lengther_next/classes/types.rb +12 -0
data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
data/lib/full_lengther_next/classes/warnings.rb +40 -0
metadata +207 -93
data/lib/full_lengther_next/classes/lcs.rb +0 -33
data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240

data/bin/full_lengther_next CHANGED

@@ -2,11 +2,15 @@
 # 12-2-2011 Noe Fernandez Pozo.
 # Full-LengtherNEXT predicts if your sequences are complete, showing you the nucleotide sequences and the translated protein
+ROOT_PATH=File.dirname(__FILE__)
+$: << File.expand_path(File.join(ROOT_PATH, "../lib/full_lengther_next/classes/"))
-#------------------------------------------------------------------ parameters entry
 require 'optparse'
 require 'socket'
+###############################################################################################
+# PARSE OPTIONS
+###############################################################################################
 options = {}
 if !File.exists?('logs')
@@ -14,87 +18,147 @@ if !File.exists?('logs')
 end
 optparse = OptionParser.new do |opts|
+	options[:acess_db] = 'stnp'
+	opts.on( '-a', '--acess_db STRING', 'Select that databases is going to be used. s for Swissprot, t for trEMBL and n for ncrna, p for use Transdecoder and c for use testcode algothrim. By default is set to stnp' ) do |acess_db|
+		options[:acess_db] = acess_db
+	end
+    options[:blast] = ''
+    opts.on( '-b', '--blast STRING', 'Aditional options to blast execution' ) do |blast|
+            options[:blast] = blast
+    end
+	options[:chunk_size] = 200
+	opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s|
+		options[:chunk_size] = s.to_i
+	end
+	options[:est_db] = nil
+	opts.on( '-d', '--est_db FILE', "EST database for representative transcriptome\n" ) do |est_db|
+		options[:est_db] = est_db
+		if !File.exists?(options[:est_db])
+			puts "No valid path to EST database"
+			Process.exit(-1)
+		end
+	end
+	options[:exonerate] = TRUE
+	opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate|
+		options[:exonerate] = FALSE
+	end
 	options[:fasta] = nil
 	opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
 		options[:fasta] = file
 	end
 	options[:tax_group] = nil
-	opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n\t\t\t\t\tfungi\n\t\t\t\t\thuman\n\t\t\t\t\tinvertebrates\n\t\t\t\t\tmammals\n\t\t\t\t\tplants\n\t\t\t\t\trodents\n\t\t\t\t\tvertebrates\n\n" ) do |tax_name|
+	opts.on( '-g', '--taxon_group GROUP', "Taxon group, required to use the best databases:\n#{"\t"*5}fungi\n#{"\t"*5}human\n#{"\t"*5}invertebrates\n#{"\t"*5}mammals\n#{"\t"*5}plants\n#{"\t"*5}rodents\n#{"\t"*5}vertebrates\n\n" ) do |tax_name|
 		options[:tax_group] = tax_name
 	end
-	options[:user_db] = nil
-	opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
-		options[:user_db] = db
+	options[:ident] = 45.00
+	opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident|
+		options[:ident] = ident.to_f
 	end
-	# options[:verbose] = nil
-	# opts.on( '-v', '--verbose_mode', "verbose mode\n\n" ) do |verbose|
-	# 	options[:verbose] = verbose
-	# end
-	options[:evalue] = 1.0e-25
-	opts.on( '-e', '--evalue EVALUE', 'e value threshold to consider as reliable the orthologue sequence. Default=1.0e-25' ) do |evalue|
-		options[:evalue] = evalue.to_f
+	options[:high_clustering] = TRUE
+	opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default true' ) do
+		options[:high_clustering] = FALSE
 	end
-	options[:ident] = 45.00
-	opts.on( '-i', '--identity_percent IDENTITY', 'identity percent threshold to consider as reliable the sequence similarity. Default=45.00' ) do |ident|
-		options[:ident] = ident.to_f
+	options[:subject_coverage] = 0.25
+	opts.on( '-j', '--subject_coverage_percent FLOAT', "Subject coverage percentage threshold" ) do |j|
+		options[:subject_coverage] = j.to_f/100
+	end
+	options[:min_nucleotides] = 100
+	opts.on( '-n', '--min_nucleotides minLONG', "min nucleotides to consider a part of chimera like putative unigene. Default=100\n\n" ) do |min_nucleotides|
+		options[:min_nucleotides] = min_nucleotides.to_i
 	end
 	options[:distance] = 15
 	opts.on( '-m', '--max_distance maxDIST', "maximal distance between query and subject gene boundaries to be qualified as putative, the less distance the more strict. Default=15\n\n" ) do |distance|
 		options[:distance] = distance.to_i
 	end
-	options[:chimera] = nil
-	opts.on( '-q', '--chimera_detection', "apply chimera detection mode\n\n" ) do |chimera|
+	options[:port] = 0 #50000
+	opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port|
+		options[:port] = port.to_i
+	end
+	options[:chimera] = 'rc'
+	opts.on( '-q', '--chimera_detection STRING', "d for deactivate chimera detection mode, s for search chimeras only, r for revise it and c for cut it. Default = rcs \n\n" ) do |chimera|
+		chimera.downcase!
 		options[:chimera] = chimera
 	end
+	options[:reptrans] = nil
+	opts.on( '-r', '--representative_transcriptome', "Generates a fasta file with the minime transcriptome\n" ) do |reptrans|
+		options[:reptrans] = reptrans
+	end
+	options[:server_ip] = '0.0.0.0'
+	opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
+		# get list of available ips
+		ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
+		ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
+		if !ip
+			ip='0.0.0.0'
+			# $LOG.info("No available ip matching #{server_ip}")
+		end
+		# $ .info("Using ip #{ip}")
+		options[:server_ip] = ip
+	end
+	options[:ident_thresold] = 55.0
+	opts.on( '-t', '--identity_thresold FLOAT', "For chimeras only. Min identity to consider that two proteins are the same. Default=55.0\n\n" ) do |ident_thresold|
+		options[:ident_thresold] = ident_thresold.to_i
+	end
+	options[:user_db] = nil
+	opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
+		options[:user_db] = db
+		if !File.exists?(File.expand_path(options[:user_db])+'.psq')
+			puts "user database: #{options[:user_db]} was not found"
+			exit
+		end
+	end
+	options[:verbose] = 0
+	opts.on( '-v', '--verbose INTEGER', 'Show extra info' ) do |verbose|
+		options[:verbose] = verbose.to_i
+	end
 	options[:workers] = 2
 	opts.on( '-w', '--workers INTEGER/FILE', 'Number of CPUs, or a file containing machine names to launch workers with ssh' ) do |workers|
-    if File.exists?(workers)
-       # use workers file
-       options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
-       options[:workers].shift
-    elsif (workers.to_i > 0)
-       options[:workers] = workers.to_i
-		else
-			options[:workers] = 2
+             if File.exists?(workers)
+               # use workers file
+               options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
+               options[:workers].shift
+             elsif (workers.to_i > 0)
+               options[:workers] = workers.to_i
+             else
+               options[:workers] = 2
+             end
     end
-  end
-	options[:chunk_size] = 200
-	opts.on( '-c', '--chunk_size SIZE', "Number of sequences processed in each block when parallelization is used. Default=200" ) do |s|
-		options[:chunk_size] = s.to_i
+	options[:training_ident] = 45.00
+	opts.on( '-x', '--training_identity_percent IDENTITY', 'identity percent threshold to use a complete sure sequence for Transdecoder training. Default=45.00' ) do |training_ident|
+		options[:training_ident] = ident.to_f
 	end
-	options[:server_ip] = '0.0.0.0'
-  opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
-    # get list of available ips
-    ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
-    ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
-    if !ip
-      ip='0.0.0.0'
-      # $LOG.info("No available ip matching #{server_ip}")
-    end
-    # $ .info("Using ip #{ip}")
-    options[:server_ip] = ip
-  end
-  options[:port] = 0 #50000
-  opts.on( '-p', '--port PORT', "Server port\n\n" ) do |port|
-    options[:port] = port.to_i
-  end
+	options[:hdd] = FALSE
+	opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd|
+		options[:hdd] = TRUE
+	end
 	# Set a banner, displayed at the top of the help screen.
 	opts.banner = "\nUsage: full_lengther_next -f input.fasta -g [fungi|human|invertebrates|mammals|plants|rodents|vertebrates] [options]\n\n"
@@ -108,35 +172,25 @@ end
 # parse options and remove from ARGV
 optparse.parse!
-# @verbose = options[:verbose]
-# if (!@verbose.nil?)
-# 	puts "You have chosen the verbose mode:\n\nInput File:\t#{options[:fasta]}\nTaxon Group:\t#{options[:tax_group]}\nOwn Database:\t#{options[:user_db]}\nCPU Number:\t#{options[:workers]}"
-# end
-#----------------------------------------------------------------------- testing errors in parameters entry
 if (options[:fasta].nil?) || (options[:tax_group].nil?)
-		puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t"
-		puts optparse.help
-		exit
+	puts "incorrect number of arguments, you need a fasta file and a taxonomical group:\n\n\t"
+	puts optparse.help
+	exit
 end
-#----------------------------------------------------------------------- loading classes and gems
-ROOT_PATH=File.dirname(__FILE__)
-# $: << File.expand_path(File.join(ROOT_PATH, "classes"))
-# load gem path, only to test locally
-# $: << File.expand_path('~/progs/ruby/gems/full_lengther_next/lib')
-require 'full_lengther_next'
+###################################################################################################
+# PREPARE ENVIROMENT
+###################################################################################################
 if ENV['FULL_LENGTHER_NEXT_INIT'] && File.exists?(ENV['FULL_LENGTHER_NEXT_INIT'])
   FULL_LENGTHER_NEXT_INIT=File.expand_path(ENV['FULL_LENGTHER_NEXT_INIT'])
 else
   FULL_LENGTHER_NEXT_INIT=File.join(ROOT_PATH,'init_env')
 end
+if !File.exists?('temp')
+	Dir.mkdir('temp')
+end
 if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
   formatted_db_path = ENV['BLASTDB']
@@ -147,50 +201,53 @@ end
 ENV['BLASTDB']=formatted_db_path
 puts "Using databases at: #{ENV['BLASTDB']}"
-ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna_fln_100.fasta.nhr')
-if !File.exists?(ncrna_path)
-  puts "DB File #{ncrna_path} doesn't exists"
+ncrna_path = File.join(ENV['BLASTDB'],'nc_rna_db','ncrna.nhr')
+if !File.exists?(ncrna_path) && options[:acess_db].include?('c')
+  	puts "DB File #{ncrna_path} doesn't exists"
 	puts optparse.help
 	exit
 end
-sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.fasta.psq")
-if !File.exists?(sp_path)
-  puts "DB File #{sp_path} doesn't exists, or"
-	puts "incorrect taxon group name: #{options[:tax_group]} choose:"
-	puts optparse.help
-	exit
+if options[:acess_db].include?('s') || options[:acess_db].include?('t')
+	sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq")
+	if !File.exists?(sp_path)
+		puts "DB File #{sp_path} doesn't exists, or"
+		puts "incorrect taxon group name: #{options[:tax_group]} choose:"
+		puts optparse.help
+		exit
+	end
 end
-require 'scbi_blast' # is a gem
-require 'scbi_mapreduce'
-# puts $:
-require 'fl_string_utils'
-require "une_los_hit"
-require "lcs" # like the class simliar of seqtrim, return the longest common sequence
-require "test_code"
-##########################################################  MAIN  #################################################################
+##################################################################################################
+# MAIN
+###################################################################################################
-require 'my_worker_manager'
+require 'scbi_mapreduce'
+require 'my_worker_manager_fln' #First server
+require 'reptrans'
 $LOG = Logger.new(STDOUT)
 $LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
-# puts "ROOT_PATH: #{ROOT_PATH}"
-custom_worker_file = File.join(File.dirname(ROOT_PATH),'lib','full_lengther_next','classes','my_worker.rb')
+main_path = File.dirname(ROOT_PATH)
+custom_worker_file = File.join(main_path, 'lib','full_lengther_next','classes','my_worker.rb')
-	$LOG.info 'Starting server'
+$LOG.info 'Starting server'
 	# initialize work manager (open files, etc)
-	MyWorkerManager.init_work_manager(options, options[:chunk_size])
+	MyWorkerManagerFln.init_work_manager(options)
 	# Create server
-	server = ScbiMapreduce::Manager.new(options[:server_ip],options[:port], options[:workers], MyWorkerManager,custom_worker_file, STDOUT,FULL_LENGTHER_NEXT_INIT)
-	server.chunk_size=options[:chunk_size]
+	server = ScbiMapreduce::Manager.new(options[:server_ip], options[:port], options[:workers], MyWorkerManagerFln, custom_worker_file, STDOUT, FULL_LENGTHER_NEXT_INIT)
+	server.chunk_size = options[:chunk_size]
 	# launch server
 	server.start_server
-	$LOG.info 'Closing server'
+$LOG.info 'Closing server'
+if !options[:reptrans].nil?
+	seqs_annotation_prot, seqs_some_coding ,seqs_unknown= MyWorkerManagerFln.get_annotations()
+	reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options)
+end
+puts "\nGracias por utilizar Full-LengtherNEXT"
-	puts "\nGracias por utilizar Full-LengtherNEXT"

data/bin/make_test_dataset.rb ADDED

@@ -0,0 +1,236 @@
+#!/usr/bin/env ruby
+require 'scbi_fasta'
+require 'optparse'
+##########################################################################################
+## FUNCTIONS
+##########################################################################################
+def load_fasta(fasta)
+  seqs = []
+  fasta = FastaQualFile.new(fasta)
+  fasta.each do |name, seq|
+    seqs << [name, seq]
+  end
+  fasta.close
+  return seqs
+end
+def copy_seqs(seqs)
+  all = []
+  seqs.each do |seq|
+    all << [seq.first.dup, seq.last.dup]
+  end
+  return all
+end
+def output_files(options)
+  file = File.basename(options[:file])
+  output_files = {}
+  output_files[:insertions] = File.open(file+'_insertions', 'w') if options[:indel]
+  output_files[:delections] = File.open(file+'_deletions', 'w') if options[:indel]
+  output_files[:mix] = File.open(file+'_mix', 'w') if options[:indel]
+  output_files[:cut_100_pb] = File.open(file+'_trimmed', 'w') if options[:trim]
+  output_files[:paired] = File.open(file+'_paired', 'w') if options[:pair]
+  output_files[:fullChim] = File.open(file+'_fullChim', 'w') if options[:chim]
+  output_files[:fusionChim] = File.open(file+'_fusionChim', 'w') if options[:chim]
+  output_files[:fusionChimTruncate] = File.open(file+'_truncateChim', 'w') if options[:chim]
+  return output_files
+end
+def random_nt
+  nts =['a','c','g','t']
+  return nts[rand(4).truncate]
+end
+def insertions(seq, position)
+  first_cut = (seq.length/3).truncate
+  second_cut = first_cut *2
+  case position%3
+  when 0
+    seq = seq.insert(first_cut, random_nt)
+    name = '_I__'
+  when 1
+    seq = seq.insert(second_cut, random_nt)
+    name = '__I_'
+  when 2
+    seq = seq.insert(first_cut, random_nt)
+    seq = seq.insert(second_cut, random_nt)
+    name = '_I_I_'
+  end
+  return seq, name
+end
+def delections(seq, position)
+  first_cut = (seq.length/3).truncate
+  second_cut = first_cut *2
+  case position%3
+  when 0
+    seq.slice!(first_cut)
+    name='_D__'
+  when 1
+    seq.slice!(second_cut)
+    name='__D_'
+  when 2
+    seq.slice!(first_cut)
+    seq.slice!(second_cut)
+    name='_D_D_'
+  end
+  return seq, name
+end
+def mix(seq, position)
+  first_cut = (seq.length/3).truncate
+  second_cut = first_cut *2
+  case position%2
+  when 0
+    seq = seq.insert(first_cut, random_nt)
+    seq.slice!(second_cut)
+    name='_I_D_'
+  when 1
+    seq.slice!(first_cut)
+    seq = seq.insert(second_cut, random_nt)
+    name='_D_I_'
+  end
+  return seq, name
+end
+def load_utrs(utr_file)
+  utrs = {}
+  File.open(utr_file).each do |line|
+    line.chomp!
+    fields = line.split("\t")
+    seq_name = fields.shift
+    utrs[seq_name] = fields.map{|coord| coord.to_i}
+  end
+  return utrs
+end
+##########################################################################################
+## OPTIONS
+##########################################################################################
+options = {}
+optparse = OptionParser.new do |opts|
+  options[:file]='samples'
+  opts.on( '-f', '--file FILE', 'FASTA file') do |file|
+          options[:file]=file
+  end
+  options[:duplicate]= 1
+  opts.on( '-d', '--duplicate INTEGER', 'Duplicate sequences to dataset') do |duplicate|
+          options[:duplicate] = duplicate.to_i
+  end
+  options[:split]= FALSE
+  opts.on( '-s', '--split', 'Split sequences in each case') do
+          options[:duplicate] = 3
+  end
+  options[:chim]= TRUE
+  opts.on( '-c', '--chim', 'Make sequence set of chimeras') do
+          options[:chim] = FALSE
+  end
+  options[:indel]= TRUE
+  opts.on( '-i', '--indel', 'Make sequence set of indels') do
+          options[:indel] = FALSE
+  end
+  options[:pair]= TRUE
+  opts.on( '-p', '--pair', 'Make sequence set of paired') do
+          options[:pair] = FALSE
+  end
+  options[:trim]= TRUE
+  opts.on( '-t', '--trim', 'Make sequence set of trimmed') do
+          options[:trim] = FALSE
+  end
+  # Set a banner, displayed at the top of the help screen.
+  opts.banner = "Usage: #{File.basename($0)} -f FILE  \n\n"
+  # This displays the help screen
+  opts.on( '-h', '--help', 'Display this screen' ) do
+    puts opts
+    exit
+  end
+end # End opts
+# parse options and remove from ARGV
+optparse.parse!
+##########################################################################################
+## MAIN
+##########################################################################################
+if !File.exists?(options[:file])
+  puts 'File not exists'
+  Process.exit
+end
+seqs = load_fasta(options[:file])
+output_files = output_files(options)
+if options[:trim] || options[:chim]
+  file_ext = File.extname(options[:file])
+  utr_file = options[:file].gsub(file_ext,'')+'.utr'
+  utrs = {}
+  utrs = load_utrs(utr_file) if File.exists?(utr_file)
+end
+index = 0
+seqs.each do |name, seq|
+  if index % 2 == 0 && !seqs[index+1].nil? && options[:chim]
+    second_seq = seqs[index+1].first
+    second_seq_fasta = seq+seqs[index+1].last
+    output_files[:fullChim].puts ">#{name+'_'+second_seq}\n#{seq+second_seq_fasta}"
+    if !utrs.empty?
+      utr_coord = utrs[name]
+      utr_coord_second = utrs[second_seq]
+      chim5 = seq[0..utr_coord.last]
+      chim3 = second_seq_fasta[utr_coord_second.first..second_seq_fasta.length-1]
+      output_files[:fusionChim].puts ">#{name+'_'+second_seq}\n#{chim5+chim3}" if !chim5.nil? && !chim3.nil?
+      chim5_trunc = chim5[0..chim5.length-100]
+      chim3_trunc = chim3[100..chim3.length]
+      output_files[:fusionChimTruncate].puts ">#{name+'_'+second_seq}\n#{chim5_trunc+chim3_trunc}" if !chim5_trunc.nil? && !chim3_trunc.nil?
+    end
+  end
+  if options[:trim]
+    if utrs.empty?
+      output_files[:cut_100_pb].puts ">#{name}\n#{seq[99..seq.length-101]}"
+    else
+      utr_coord = utrs[name]
+      trim_seq = seq[utr_coord.first+100..utr_coord.last-100]
+      output_files[:cut_100_pb].puts ">#{name}\n#{trim_seq}" if !trim_seq.nil? && !trim_seq.empty?
+    end
+  end
+  if options[:pair]
+    n_number = rand(5..50)
+    position = seq.length/2 - n_number/2
+    output_files[:paired].puts ">#{name}\n#{seq[0..position] + 'N'*n_number + seq[position+1..seq.length-1]}"
+  end
+  index += 1
+end
+if options[:indel]
+  all_seqs = []
+  options[:duplicate].times do
+    all_seqs.concat(copy_seqs(seqs))
+  end
+  length = all_seqs.length
+  all_seqs.each_with_index do |s, i|
+    case i
+    when 0..length/3-1
+      seq, type = insertions(s.last, i)
+      file = :insertions
+    when length/3..2*length/3-1
+      seq, type = delections(s.last, i)
+      file =:delections
+    else
+      seq, type = mix(s.last, i)
+      file = :mix
+    end
+    output_files[file].puts ">#{s.first}#{type}\n#{seq}"
+  end
+end
+output_files.values.map{|file| file.close}