RubyGems - full_lengther_next - Versions diffs - 0.0.8 → 0.5.6 - Mend

full_lengther_next 0.0.8 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/.gemtest +0 -0
data/History.txt +2 -2
data/Manifest.txt +33 -18
data/Rakefile +4 -2
data/bin/download_fln_dbs.rb +310 -158
data/bin/full_lengther_next +160 -103
data/bin/make_test_dataset.rb +236 -0
data/bin/make_user_db.rb +101 -117
data/bin/plot_fln.rb +270 -0
data/bin/plot_taxonomy.rb +70 -0
data/lib/expresscanvas.zip +0 -0
data/lib/full_lengther_next.rb +3 -3
data/lib/full_lengther_next/classes/artifacts.rb +66 -0
data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
data/lib/full_lengther_next/classes/cdhit.rb +154 -0
data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
data/lib/full_lengther_next/classes/common_functions.rb +105 -63
data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
data/lib/full_lengther_next/classes/handle_db.rb +30 -0
data/lib/full_lengther_next/classes/my_worker.rb +308 -138
data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
data/lib/full_lengther_next/classes/reptrans.rb +210 -0
data/lib/full_lengther_next/classes/sequence.rb +439 -80
data/lib/full_lengther_next/classes/test_code.rb +15 -16
data/lib/full_lengther_next/classes/types.rb +12 -0
data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
data/lib/full_lengther_next/classes/warnings.rb +40 -0
metadata +207 -93
data/lib/full_lengther_next/classes/lcs.rb +0 -33
data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240

data/lib/full_lengther_next/classes/common_functions.rb CHANGED

@@ -1,78 +1,116 @@
 module CommonFunctions
-	def contenidos_en_prot(hit, full_prot, q)
-		is_ok = false
-		q_index_start = 9999
-		fr_index_start = 0
-		min_index_start = 9999
-		aas_parecidos = 0
-		masked_x = 0
-		suma_fragments = 0
-		masked_x = hit.q_seq.count('X')
-		masked_x = masked_x + hit.q_seq.count('-')
-		full_prot = full_prot.gsub(/[\-Xx]+/,'')
-		compare_prot = hit.q_seq.gsub(/[\-Xx]+/,'-')
-		fragments_array = compare_prot.split(/\-+/)
-		fragments_array.each do |seq|
-			# puts "seq: #{seq}\nfull_prot: #{full_prot}"
-			simliar_fragment = full_prot.lcs(seq)
-			suma_fragments += simliar_fragment.length
-			fr_index_start = full_prot.index(simliar_fragment)
-			if (q_index_start == 9999)
-				q_index_start = fr_index_start
+	def contenidos_en_prot(key_seq, full_prot)
+		full_prot = full_prot.gsub(/[\-Xx]/,'-')
+		compare_prot = key_seq.gsub(/[\-Xx]/,'-')
+		q_index_start = full_prot.index(compare_prot) #Full match between hit.q_seq and full_prot (unigene)
+		if q_index_start.nil? #There is gaps that unables the full match
+			q_index_start = match_with_ungapped_reference(full_prot, compare_prot)
+			if q_index_start.nil? && full_prot.include?('-')
+				diff = full_prot.length - compare_prot.length
+				if scan_sequences(full_prot.split(''), compare_prot.split('')) == compare_prot.length
+					q_index_start = 0
+				end
+				if diff >0 && scan_sequences(full_prot.split(''), compare_prot.split(''), diff) == compare_prot.length
+					q_index_start = diff
+				end
+				if q_index_start.nil?
+					q_index_start = match_with_gapped_reference(full_prot, compare_prot)
+				end
+			end
+			if q_index_start.nil?
+				q_index_start = 0
 			end
-			full_prot = full_prot[(fr_index_start + simliar_fragment.length)..full_prot.length]
-		end
-		simliar_fragment = full_prot.lcs(compare_prot)
-		# if ($verbose)
-			# puts "#{q.query_def}-------------------------------------#{suma_fragments} de #{compare_prot.length}"
-			# puts "#{q.query_def}-------------------------------------#{suma_fragments + masked_x} >= #{compare_prot.length * 0.7}"
-			# puts "\nfull: #{full_prot}\ncomp: #{compare_prot}\nsimliar_fragment: #{simliar_fragment}"
-		# end
-		if (suma_fragments + masked_x >= compare_prot.length * 0.7)
-			is_ok = true
-			# puts "OK -- encontramos suficiente similitud entre query y subject -- OK"
-		else
-			is_ok = false
-			# puts "\nfull: #{full_prot}\ncomp: #{compare_prot}"
-			# puts "Warning!: no match comparing proteins"
-		end
-		min_index_start = [min_index_start, q_index_start].min
-		if (min_index_start == 9999)
-			min_index_start = 0
 		end
-		return [is_ok, min_index_start]
+		return q_index_start
 	end
+	def match_with_gapped_reference(full_prot, compare_prot)
+		q_index_start = nil
+		fragments_array = full_prot.split(/\-+/)
+		fragments_array.each_with_index do |seq, i|
+			if seq.length > 4
+				compare_prot_index = compare_prot.index(seq)
+				if compare_prot_index.nil? # In cases that no match by gaps
+					seq =seq[0..4]
+					compare_prot_index = compare_prot.index(seq)
+				end
+				if !compare_prot_index.nil?
+					q_index_start = full_prot.index(seq)
+					if i > 0
+						q_index_start, compare_prot_index = extend_match(full_prot, compare_prot, q_index_start, compare_prot_index)
+					end
+					break
+				end
+			end
+		end
+		return q_index_start
+	end
+	def extend_match(full_prot, compare_prot, q_index_start, compare_prot_index)
+		full_prot_substring = full_prot[0..q_index_start-1].reverse.split('')
+		compare_prot_substring = compare_prot[0..compare_prot_index-1].reverse.split('')
+		extend_match = scan_sequences(full_prot_substring, compare_prot_substring)
+		q_index_start -= extend_match
+		compare_prot_index -= extend_match
+		return q_index_start, compare_prot_index
+	end
-	def reverse_seq(query_fasta, h_qframe, h_qstart, h_qend)
-		q_frame = -h_qframe.to_i
-		q_beg = query_fasta.length - h_qend - 1
-		q_end = query_fasta.length - h_qstart - 1
+	def scan_sequences(ref_seq, compare_seq, diff = 0)
+		extend_match = 0
+		ref_seq.each_with_index do |char,i|
+			if i >= diff
+				compare_char = compare_seq[extend_match]
+				if compare_char.nil? || char != compare_char && char != '-' && compare_char != '-'
+					break
+				end
+				extend_match += 1
+			end
+		end
+		return extend_match
+	end
-		query_fasta = query_fasta.complementary_dna
+	def match_with_ungapped_reference(full_prot, compare_prot)
+		q_index_start = nil
+		fragments_array = compare_prot.split(/\-+/)
+		fragments_array.each_with_index do |seq, i|
+			if q_index_start.nil? && seq.length > 4
+				q_index_start = full_prot.index(seq)
+				if i > 0 && !q_index_start.nil?
+					q_index_start = refine_match(seq, compare_prot, q_index_start) # Correction if first seq isn't enough large
+				end
+				break
+			end
+		end
+		return q_index_start
+	end
-		# el qend y el qstart estan al reves porque cuando la seq tiene frame negativo el blast los pone al reves
-		return [query_fasta, q_frame, q_beg, q_end]
+	def refine_match(subseq, seq, q_index_start)
+		location_seq = seq.index(subseq)
+		gaps_on_location = seq[0..location_seq].count('-')
+		q_index_start -=  location_seq - gaps_on_location # Correction if first seq isn't enough large
+		return q_index_start
 	end
+	def reverse_seq(query_fasta, hit)
+		hit.q_frame = -hit.q_frame
+		hit.q_end = query_fasta.length - 1 - hit.q_end
+		hit.q_beg = query_fasta.length - 1 - hit.q_beg
+		hit.reversed = TRUE
+		query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA.
+		if hit.class.to_s == 'ExoBlastHit'
+			hit.q_frameshift.map!{|position, num_nts|
+				reversed_position = query_fasta.length - 1 - position
+				[reversed_position, num_nts]
+			}
+		end
+		return query_fasta
+	end
 	def corrige_frame(ref_frame,ref_start,ref_end)
@@ -89,6 +127,10 @@ module CommonFunctions
 	end
-end
+	def check_frame_shift(hit)
+		fs = 0
+		prot_length_in_nts = hit.q_end-hit.q_beg+1
+		fs = prot_length_in_nts%3
+		return fs
+	end
+end

data/lib/full_lengther_next/classes/exonerate_result.rb ADDED

@@ -0,0 +1,258 @@
+# Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# 'Software'), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+require 'blast_query'
+require 'blast_hit'
+require 'fl_string_utils.rb'
+OPERATION = 0
+QUERY = 1
+TARGET = 2
+class ExoBlastHit < BlastHit
+	attr_accessor :q_frameshift, :s_frameshift
+	def initialize(start_target, ends_target, start_query, ends_query)
+		super(start_target, ends_target, start_query, ends_query)
+		@s_frameshift=[]
+		@q_frameshift=[]
+	end
+end
+# Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
+class ExonerateResult
+  # Parser initialization
+  def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
+    @querys = []
+    @seqs = seqs #unigenes
+    @prot_seqs = query_seqs#prot
+    if input.is_a?(Array)
+    	input.each do |file|
+	      	parse_file(File.open(file).readlines, all)
+     	end
+    else
+      parse_file(File.open(input).readlines, all)
+    end
+    query_name=''
+  end
+  	def parse_file(lines, all)
+	  	lines_parsed=[]
+	    lines_parsed={} if !all
+	    lines.each do |line|
+			if line=~ /^vulgar:/
+				line.chomp!
+				fields=line.split(' ', 11)
+				features={'query_id'=> fields[1], 'query_start_align'=> fields[2].to_i, 'query_end_align'=> fields[3].to_i, 'query_strand'=> fields[4],'target_id'=> fields[5], 'target_start_align'=> fields[6].to_i, 'target_end_align'=> fields[7].to_i, 'target_strand'=> fields[8], 'score'=> fields[9].to_i, 'align_data'=> fields[10]}
+				if all
+					lines_parsed << features
+				else
+				  if !lines_parsed.key?(features['target_id']) # Añadir valor si no existe
+				    lines_parsed[features['target_id']]=features
+				  else
+				    if features['score']>lines_parsed[features['target_id']]['score'] # Si ya existe una query, ver si la nueva presenta un mayor score y reemplazar la antigua
+				    	lines_parsed[features['target_id']]=features
+				    end
+				  end
+				end
+			end
+		end
+		convert_parsed_lines(lines_parsed)
+  	end
+	def convert_parsed_lines(lines_parsed)
+		last_query = nil
+		query = nil
+		lines_parsed.each_with_index do |line|
+			begin
+				if lines_parsed.class.to_s=='Array'
+					align_data=line['align_data']
+					features=line
+				else #hash
+					align_data=line[1]['align_data']
+					features=line[1]
+				end
+				tags = align_data.scan(/([MFG53S]) ([0-9]+) ([0-9]+)/)
+				tags.map!{|tag| [tag[0], tag[1].to_i, tag[2].to_i]}
+				if features['target_id'] != last_query
+					last_query = features['target_id']
+					query = BlastQuery.new(features['target_id'])
+					@querys << query
+				end
+			  	hiting(features,tags, query)
+			rescue
+				puts "Result: #{features['target_id']} => #{features['query_id']} hasn't been parsed\n#{line}"
+			end
+		end
+	end
+   #this method only works fine with --model protein2dna parameter of exonerate
+ 	def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits
+		do_align = FALSE
+		do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
+		start_target = features['target_start_align']#Unigen
+		start_query = features['query_start_align'] #proteina
+		ends_target = features['target_end_align']
+		ends_query = features['query_end_align']-1 # -> Exonerate don't set to 0 position the ends of target and query
+		if features['target_strand'] == '-' #-> Exonerate don't set to 0 position the ends of target and query
+			start_target -= 1 # Start target is end target when mathc is in reversed complementary strand
+		else
+			ends_target -= 1
+		end
+		hit = ExoBlastHit.new(start_target+1, ends_target+1, start_query+1, ends_query+1)
+		define_hit_parameters(hit, features, tags)
+		query.add_hit(hit)
+		#Define alignment and blast like parameters
+		target_alignment = ''
+		query_alignment = ''
+		counter_target = start_target
+		counter_query = start_query
+		if do_align #get seqs
+			query_seq = @prot_seqs[features['query_id']]
+			target_seq = @seqs[features['target_id']]
+		end
+		counter_target, target_seq = do_reverso_complementary(counter_target, target_seq) if features['target_strand'] == '-'
+		query_frameshift = []
+		target_frameshift = []
+		gap_shift = 0
+		#puts features['query_id']+ ' ' +features['target_strand'], '-----------------------'
+		tags.each_with_index do |tag, n_operation|
+			#puts tag.inspect
+			if do_align
+				gap_shift = 0 if tag[OPERATION] != 'G'
+				query_alignment << query_seq[counter_query, tag[QUERY]]
+				target_alignment << target_seq[counter_target, tag[TARGET]].translate
+			end
+			if tag[OPERATION] == 'F'
+				if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
+					gap_shift += 1
+					if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
+						query_alignment <<  '-' if do_align
+					end
+				else
+					query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
+				end
+				query_frameshift << counter_query
+				fs_counter_target = counter_target
+				fs_counter_target = target_seq.length - counter_target if features['target_strand'] == '-' # ESto es un apaño, habria que plantear el parseo de las reversas como reduccion en el contador del formato del exonerate, en vez de como adiccion
+				if tag[TARGET] > 3
+					real_fs = tag[TARGET]%3
+					real_gap = tag[TARGET] - real_fs
+					fs = [fs_counter_target + real_gap, real_fs]
+				else
+					fs = [fs_counter_target, tag[TARGET]]
+				end
+				target_frameshift << fs
+			elsif tag[OPERATION] == 'G'
+				query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
+				diff = tag[QUERY] - gap_shift
+				target_alignment << '-' * diff if do_align && diff > 0
+				gap_shift = 0
+			end
+			counter_query += tag[QUERY]
+			counter_target += tag[TARGET]
+		end
+		hit.s_frameshift = query_frameshift
+		hit.q_frameshift = target_frameshift
+		#puts "\e[33m#{target_alignment}\e[0m", "\e[36m#{query_alignment}\e[0m"
+		if do_align
+			hit.q_seq = target_alignment
+			hit.s_seq = query_alignment
+			hit.align_len = query_alignment.length
+			hit.ident = set_ident(target_alignment,query_alignment)
+		end
+	end #def
+	def do_reverso_complementary(counter_target, target_seq)
+		counter_target = target_seq.length - 1 - counter_target
+		target_seq = target_seq.complementary_dna
+		return counter_target, target_seq
+	end
+	def set_ident(target_alignment, query_alignment)
+		matchs = 0
+		position = 0
+		target_alignment.each_char do |char|
+			matchs +=1 if char == query_alignment[position]
+			position +=1
+		end
+		perc_ident = ('%.2f' % (matchs*100.0/target_alignment.length)).to_f
+		return perc_ident
+	end
+	def define_hit_parameters(hit, features, tags)
+		hit.gaps = 0
+		tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
+		hit.reversed = FALSE
+		hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1
+		hit.mismatches=0
+		hit.e_val=0
+		hit.bit_score=0
+		hit.score = features['score']
+		hit.s_frame = nil
+		strand = 1
+		strand = -1 if features['target_strand'] == '-'
+		hit.q_frame = (((features['target_start_align']) % 3) +1) *strand
+		hit.subject_id = features['query_id']
+		hit.full_subject_length=0
+		hit.definition=''
+		hit.acc=features['query_id']
+		hit.q_seq=''
+		hit.s_seq=''
+	end
+  # inspect results
+  def inspect
+    res = "Exonerate results:\n"
+    res+= '-'*20
+    res+= "\nQuerys: #{@querys.count}\n"
+    @querys.each{|q| res+=q.inspect+"\n"}
+    return res
+  end
+  # find query by name
+  def find_query(querys,name_q)
+    #  newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
+    new_q=nil
+    if !querys.empty?
+      new_q=querys.find{|q| (q.query_id==name_q)}
+    end
+    return new_q
+  end
+  # check if there are querys
+  def empty?
+    return @querys.empty?
+  end
+  # get query count
+  def size
+    @querys.size
+  end
+  attr_accessor :querys
+end

data/lib/full_lengther_next/classes/fl_analysis.rb CHANGED

@@ -1,688 +1,297 @@
+require 'types'
 require 'une_los_hit'
 module FlAnalysis
-	def analiza_orf_y_fl(seq, blast_query, options, db_name)
-		aas_n_end = options[:distance]
-		pident_threshold = options[:ident]
-		evalue_threshold = options[:evalue]
-		# @verbose = options[:verbose]
-		# test_blast_hits(blast_query)
-		# used to detect if the sequence and the blast are from different query
-		if seq.seq_name != blast_query.query_def
-			raise "BLAST query name and sequence are different"
+	$global_warnings = []
+	def analiza_orf_y_fl(seq, hit, options, db_name)
+		query_fasta = seq.seq_fasta.upcase.dup # Upcase for prevents complications with masked sequences, dup for discard changes
+		if hit.count > 1 # if the sequence has more than one hit, the frames are checked and fixed to get a single hit
+				seq_unida = UneLosHit.new(hit, query_fasta)
+				full_prot =		seq_unida.full_prot
+				query_fasta =	seq_unida.output_seq	# repaired fasta
+				final_hit =		seq_unida.final_hit		# single hit
+				$global_warnings +=	seq_unida.msgs		# warning messages
+		else
+			query_fasta = reverse_seq(query_fasta, hit.first) if hit.first.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
+			final_hit = hit.first # single hit
 		end
-		q=blast_query
-		msgs = ''
-		atg_status = ''
-		end_status = ''
-		final_status = ''
-		# the fasta sequence is saved
-		query_fasta = seq.seq_fasta
+		query_fasta = exonerate_fix_frame_shift(query_fasta, hit) if options[:exonerate]
-		if q.hits[0].nil? # There is no match in blast, the seq go to the next DB
-			# puts "#{db_name} -- #{q.query_def} --> NO BLASTX match"
-			# If the DB is trembl and the seq has annotations from other DB the annotations must be printed
-			if (db_name =~ /^tr_/)
-				if (seq.get_annotations(:tmp_annotation).empty?)
-					if (seq.sec_desc.empty?)
-						seq.annotate(:apply_tcode,'')
-					else
-						seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
-					end
-				else
-					save_last_db_annotations(seq)
-				end
-			end
-			return
-		end
-#----------------------------------------------------------------------------------------------------------
-		warnings = ''
-		errors = ''
-		wrong_seq = false
+		full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate
+		original_query_coordinates = [final_hit.q_beg, final_hit.q_end] ## VERBOSE
+		seq.show_alignment(final_hit, query_fasta, show_nts) if  $verbose > 2 ## VERBOSE
+		atg_status, tmp_prot = set_start_codon(final_hit, options[:distance], full_prot, query_fasta)
+		end_status, final_prot = find_end(final_hit, options[:distance], tmp_prot, query_fasta)
-		# if the sequence has more than one hit, the frames are checked and fixed to get an single hit
-		if (q.hits.count > 1)
-			seq_unida = UneLosHit.new(q, query_fasta, pident_threshold)
-			wrong_seq = seq_unida.wrong_seq
-			is_ok = seq_unida.is_ok
-			q_index_start = seq_unida.q_index_start
-			full_prot = seq_unida.full_prot
-			query_fasta = seq_unida.output_seq # repaired fasta
-			final_hit = seq_unida.final_hit # single hit
-			msgs = seq_unida.msgs # warning messages
-			x_number = seq_unida.number_x # number of nucleotides used to fix frame errors
-		else # if there is only one hit
+		puts "\n------------------- POST EXTENSION---------------------" if $verbose > 1 ## VERBOSE
+		seq.show_alignment(final_hit, query_fasta, show_nts, original_query_coordinates) if  $verbose > 1 ## VERBOSE
+		puts "ATG: #{atg_status}  STOP: #{end_status}" if  $verbose > 2 ## VERBOSE
-			if (q.hits[0].q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
-				(query_fasta, q.hits[0].q_frame, q.hits[0].q_beg, q.hits[0].q_end) = reverse_seq(query_fasta, q.hits[0].q_frame, q.hits[0].q_beg, q.hits[0].q_end)
-				q.hits[0].reversed = true
-			end
-			final_hit = q.hits[0] # single hit
-			x_number = 0 # number of nucleotides used to fix frame errors
-			full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate
-			(is_ok, q_index_start) = contenidos_en_prot(final_hit, full_prot, q)
-		end
-		# test_final_hit(final_hit, query_fasta)
-#----------------------------------------------------------------------------------------------------------
-		if wrong_seq
-			warnings = "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence, " + warnings
-			# puts "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence"
-			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#1\tcontains sense and antisense hits!!!, putative chimeric sequence, "
-			error_log(q, seq, warnings, db_name)
-			return
-		end
-		#----------------------------------------------------------------------------------------------------------
-		warnings += msgs
-		msgs = ''
-		#----------------------------------------------------------------------------------------------------------
-		if (x_number < 0)
-			warnings = "ERROR#2, unexpected negative index in x_number, " + warnings
-			# puts "ERROR#2, unexpected negative index in x_number"
-			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#2\tunexpected negative index in x_number, "
-			error_log(q, seq, warnings, db_name)
-			return
-		end
-		#----------------------------------------------------------------------------------------------------------
-		if (!is_ok)
-			warnings = "ERROR#3, very serious frame error, " + warnings
-			# puts "#{q.query_def} ERROR#3, hit was NOT found in the protein"
-			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#3\thit was NOT found in the protein, "
-			# error_log(q, seq, warnings, db_name)
-			# return
-		end
-#----------------------------------------------------------------------------------------------------------
-		fiable = false
-		if ((final_hit.ident >= pident_threshold) && (final_hit.e_val <= evalue_threshold))
-			fiable = true
+		# decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus)
+		type, status = determine_status(atg_status, end_status)
+		status = compare_seq_length_with_subject(final_prot, options[:distance], final_hit, type, status)
+		if final_prot.length >= 25 && final_prot.length.to_f/final_hit.full_subject_length >= options[:subject_coverage] # Prot length min of 25 aa and subject coverage by generated prot of 25%
+			save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name)
 		end
-		# if the query protein is large enough at the start of the sequence should have the start codon
-		if (final_hit.q_beg/3 + aas_n_end >= final_hit.s_beg.to_i)
-			substring = full_prot[0, q_index_start + 10]
-			resto_substring = full_prot[q_index_start + 10, full_prot.length - q_index_start - 10]
+	end
-			# to look for the beginning of the protein
-			(m_substring, atg_status, msgs) = find_start(final_hit.s_beg, substring, fiable, aas_n_end)
-			# pasting the substring sequence with the rest of the sequence
-			tmp_prot = "#{m_substring}#{resto_substring}"
-			# to get the value of the start_ORF index
-			final_hit.q_beg = final_hit.q_beg.to_i - ((m_substring.length - 10) * 3)
+	def set_start_codon(final_hit, distance, full_prot, query_fasta)
+		q_index_start = contenidos_en_prot(final_hit.q_seq, full_prot)
+		atg_status = nil
+		_5prima = q_index_start + distance
+		if  final_hit.s_beg == 0 && final_hit.q_seq[0] == 'M' && final_hit.s_seq[0] == 'M' #there is M in query and subject at first position of alignment and subject's M is in first position
+			atg_status = 'complete'
+			tmp_prot = full_prot[q_index_start..full_prot.length]
+		elsif _5prima >= final_hit.s_beg
+			amine_seq = full_prot[0, _5prima] #Contiene parte amino de la proteina
+			carboxile_seq = full_prot[_5prima, full_prot.length - _5prima] #Contiene parte carboxilo de la proteina hasta el fin de la secuencia
+			length_before_cut = amine_seq.length
+			amine_seq, atg_status = find_start(final_hit.s_beg, amine_seq, distance) # to look for the beginning of the protein
+			tmp_prot = "#{amine_seq}#{carboxile_seq}" # merge seqs in prot
+			new_q_beg = final_hit.q_frame-1 + (length_before_cut - amine_seq.length) * 3
+			modify_5p_align(new_q_beg, final_hit, query_fasta)	if  $verbose > 1 ## VERBOSE, Modify query align
+			final_hit.q_beg = new_q_beg # to get the value of the start_ORF index
 		else
-			# if (@verbose)
-				# puts "beginning too short!"
-			# end
+			$global_warnings << 'UnexpStopBegSeq' if full_prot[0, q_index_start].rindex('*')
 			atg_status = 'incomplete'
-			substring = full_prot[0, q_index_start]
-			distance_s_atg = (final_hit.s_beg.to_i - final_hit.q_beg/3) + 1
-			if (substring.rindex('*'))
-				warnings += "Unexpected stop codon in the beginning of your sequence, "
-				# if (@verbose)
-					# puts "#{db_name} -- #{q.query_def} --> Unexpected stop codon in the beginning of your sequence"
-				# end
-			end
-			final_hit.q_beg = final_hit.q_beg.to_i - (substring.length * 3)
 			tmp_prot = full_prot
 		end
-#----------------------------------------------------------------------------------------------------------
-		# look for the end of the protein
-		(resto_substring, end_substring, end_status, warnings, putative_end) = find_end(final_hit, q, full_prot, tmp_prot, end_status, warnings, aas_n_end)
-#----------------------------------------------------------------------------------------------------------
-		final_prot = "#{resto_substring}#{end_substring}"
-		warnings += msgs
-		# to get the value of the end_ORF index
-		if (atg_status == 'complete')
-			final_hit.q_end = final_hit.q_beg - 3 + (final_prot.length * 3)
-		else
-			if (putative_end)
-				final_hit.q_end = final_hit.q_end - 45 + (putative_end*3)
-			end
-		end
-#--------------------------------------------------------------------------------------------------------------
-		# decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus)
-		final_status = determine_status(atg_status,end_status)
-		#----------------------------------------------------------------------------------------------------------
-		if (final_prot.length - 2*aas_n_end > final_hit.full_subject_length)
-			warnings += " your sequence is longer than subject: #{final_prot.length} - #{final_hit.full_subject_length}"
-		elsif (final_prot.length + aas_n_end < final_hit.full_subject_length)
-			warnings += " your sequence is shorter than subject: #{final_prot.length} - #{final_hit.full_subject_length}"
-			if (final_prot.length + 100 < final_hit.full_subject_length) || (final_prot.length*2 < final_hit.full_subject_length)
-				if (final_status == 'Complete')
-					final_status = 'Putative Complete'
-					warnings += ". Was predicted as Complete, but is very much shorter than de subject"
-					# if (@verbose)
-					# 	puts "#{db_name} -- #{q.query_def} --> your sequence is 100 aas shorter than the subject or shorter than the half length of the subject"
-					# end
-				end
-			end
-		end
-		# test_final_hit(final_hit, query_fasta)
-		print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
-	end
-	def test_blast_hits(q)
-			puts "query_def: #{q.query_def} full_query_length: #{q.full_query_length} ------------------------------------------------"
-			q.hits.each do |h|
-				puts "\t subject_id: #{h.acc}"
-				puts "\t acc: #{h.acc}"
-				puts "\t full_subject_length: #{h.full_subject_length}"
-				puts "\t q_beg: #{h.q_beg + 1}"
-				puts "\t q_end: #{h.q_end + 1}"
-				puts "\t q_frame: #{h.q_frame}"
-				puts "\t s_beg: #{h.s_beg + 1}"
-				puts "\t s_end: #{h.s_end + 1}"
-				puts "\t s_frame: #{h.s_frame}"
-				puts "\t align_len: #{h.align_len}"
-				puts "\t gaps: #{h.gaps}"
-				puts "\t mismatches: #{h.mismatches}"
-				puts "\t reversed: #{h.reversed}"
-				puts "\t score: #{h.score}"
-				puts "\t bit_score: #{h.bit_score}"
-				puts "\t ident: #{h.ident}"
-				puts "\t e_val: #{h.e_val}"
-				puts "\t definition: #{h.definition}"
-				puts "\t q_seq: #{h.q_seq}"
-				puts "\t s_seq: #{h.s_seq}"
-			end
-	end
-	def test_final_hit(final_hit, query_fasta)
-		puts "\t acc: #{final_hit.acc}"
-		puts "\t full_subject_length: #{final_hit.full_subject_length}"
-		puts "\n\t q_frame: #{final_hit.q_frame}"
-		puts "\t reversed: #{final_hit.reversed}"
-		puts "\n\t q_beg-q_end: #{final_hit.q_beg + 1} - #{final_hit.q_end + 1}"
-		puts "\t s_beg - s_end: #{final_hit.s_beg + 1} - #{final_hit.s_end + 1}"
-		puts "\n\t score: #{final_hit.score}, bit_score: #{final_hit.bit_score}, ident: #{final_hit.ident}, e_val: #{final_hit.e_val}"
-		puts "\n\t definition: #{final_hit.definition}"
-		puts "\t q_seq: #{final_hit.q_seq}"
-		puts "\t s_seq: #{final_hit.s_seq}"
-		puts "\nnt q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end]}"
-		puts "\n\nprot q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end].translate}"
+		return atg_status, tmp_prot
 	end
-	def error_log(q, seq, warnings, db_name)
-		# seq.annotate(:error,"#{q.query_def}\t#{warnings}\t#{q.hits[0].definition}")
-		if (db_name =~ /^tr_/)
-			if (seq.get_annotations(:tmp_annotation).empty?)
-				if (seq.sec_desc.empty?)
-					if (!q.hits[0].definition.nil?)
-						warnings = "Coding sequence with some errors, #{warnings}"
-						seq.sec_desc = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\t#{db_name}\tMisassembled\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
-						seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
-					else
-						seq.annotate(:apply_tcode,'')
-					end
-				else
-					warnings = "Coding sequence with some errors, #{warnings}"
-					tmp_annot = seq.sec_desc.sub('my_warning',"#{warnings}")
-					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
-				end
-			else
-				save_last_db_annotations(seq)
-			end
-		else
-			if (seq.sec_desc.empty?)
-				if (!q.hits[0].definition.nil?)
-					warnings = "Coding sequence with some errors, #{warnings}"
-					seq.sec_desc = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\t#{db_name}\tMisassembled\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
-				end
-			end
-		end
-	end
-	def save_last_db_annotations(seq)
-		# puts "sequence not complete! recovering annotations from previous database! sldba!!"
-		(q, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
-		print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
-		(name,fasta_length,acc,db_name,final_status,testcode,e_val,ident,my_length,subject_length,warnings,q_frame,q_beg,q_end,s_beg,s_end,description,final_prot) = seq.get_annotations(:tmp_annotation).first[:message][0].split("\t")
-		if (final_hit.reversed)
-			(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
-		end
-		seq.annotate(:protein,seq.get_annotations(:tmp_annotation).first[:message][1])
-		seq.annotate(:alignment,seq.get_annotations(:tmp_annotation).first[:message][2])
-		tmp_annot = "#{name}\t#{fasta_length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
-		seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
-	end
-	def find_start(subject_start, substring, fiable, aas_n_end)
-		tmp_prot = ''
-		msgs = ''
-		atg_status = 'incomplete' # complete, incomplete or putative
-		# puts "\nsubstring (#{substring.length} aas):\n#{substring}"
-		stop_codon = substring.rindex('*')
-		# marcamos la distancia al s_beg desde el principio del substring
-		# s_beg_distance = (substring.length) - subject_start
-		s_beg_distance = (substring.length - 10) - subject_start
-		# marcamos la distancia al s_beg desde el final del substring
-		atg_distance = (subject_start + 1) - (substring.length - 10)
-		if (atg_distance <= 0)
-			atg_distance = 0
-		else
-			# puts "expected atg_distance = 0, your sequence atg_distance = #{atg_distance}; limit (1-15)"
-			msgs = "atg_distance in limit (1-15): atg_distance = #{atg_distance}, "
-		end
-		# puts "s_beg_distance:#{s_beg_distance}, stop_codon: #{stop_codon}, subject_start: #{subject_start + 1}, atg_distance: #{atg_distance}"
-		#----------------------------------------------------------------------------------------------------------
-		# tenemos un codon de parada en el substring 5 prima
-		if (stop_codon)
-			stop_codon += 1
-			# ahora vamos a ver si el stop esta antes o despues del s_beg
-			if (stop_codon <= s_beg_distance) # esta antes
-				substring = substring[stop_codon, substring.length - stop_codon]
-				# puts "\nhay un codon de parada en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon +1}\n#{substring}\n\n"
-				first_m = substring.index('M')
-				if (first_m) # tenemos M y stop ---------------------------------------------------------------------------
-					substring = substring[first_m, substring.length - first_m]
+	def find_start(subject_start, amine_seq, distance)
+		atg_status = 'putative' # complete, incomplete or putative
+		stop_codon = amine_seq.rindex('*')
+		if !stop_codon.nil? # tenemos un codon de parada en el amine_seq 5 prima
+			_5prime_UTR = amine_seq.length - 10 - subject_start # marcamos la distancia al s_beg desde el principio del amine_seq
+			amine_seq = amine_seq[stop_codon + 1 .. amine_seq.length - 1]
+			first_m = amine_seq.index('M')
+			if stop_codon <= _5prime_UTR # Ver si stop está en zona 5 prima UTR
+				if first_m # tenemos M
+					amine_seq = amine_seq[first_m .. amine_seq.length - 1]
 					atg_status = 'complete'
-				else # con STOP pero sin M --------------------------------------------------------------------------------
-					atg_status = 'putative'
-					# puts "there is not a start codon near the expected beginning of your sequence, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
-					msgs += "W1: There is no M at the beginning, "
+				else # con STOP pero sin M
+					$global_warnings << 'noM1'
 				end
-				#----------------------------------------------------------------------------------------------------------
 			else # esta despues, un cambio de fase impide analizar el principio
-				substring = substring[stop_codon, substring.length - stop_codon] # comentar?
-				first_m = substring.index('M') # comentar?
-				if (first_m) # tenemos M y unexpected stop # comentar?
-					substring = substring[first_m, substring.length - first_m] # comentar?
-				end # comentar?
-				# TODO esto se puede cambiar!
-				atg_status = 'putative'
-				msgs += " Unexpected STOP codon in 5 prime region, "
-				# puts "\nhay un codon de parada inesperado en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon}, s_beg_distance: #{s_beg_distance +1}, atg_distance: #{atg_distance}"
+				$global_warnings << 'UnexpSTOP5p'
+ 				amine_seq = amine_seq[first_m .. amine_seq.length - 1] if first_m # tenemos M
 			end
-			#---------------------------------------------------------------------------------------------------------------
 		else # no hay stop codon
-			first_m = substring.index('M')
-			if (first_m) # tenemos M, sin stop
-				m_distance = subject_start - (substring.length - 10 - first_m)
-				substring = substring[first_m, substring.length - first_m]
-				# m_distance = [first_m+1,s_beg_distance].max - [first_m+1,s_beg_distance].min
-				if (m_distance > aas_n_end*2) # sin STOP, con atg pero muy lejos del inicio que marca el subject ---------------
-					# puts "No stop codon before M and M found is too far from subject M, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
-					msgs += "No stop codon before M and M found is too far from subject M, "
+			first_m = amine_seq.index('M')
+			if first_m # tenemos M
+				amine_seq = amine_seq[first_m .. amine_seq.length - 1]
+				m_distance = (subject_start - amine_seq.length).abs - 10
+				if m_distance.abs > distance*2 # con atg pero muy lejos del inicio que marca el subject
+					$global_warnings << 'NoStopMfar'
 					atg_status = 'incomplete'
-				else
-					if (fiable) # Tenemos M y aunque no hay STOP condon el ortologo es fiable ----------------------------------
-						# msgs += "No stop codon before M but high homology subject, "
-						atg_status = 'complete'
-					else # Tenemos M pero no tenemos stop y el ortologo no es fiable -------------------------------------------
-						# puts "No stop codon before M and low homology subject, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
-						msgs += "No stop codon before M and low homology subject, "
-						atg_status = 'putative'
-					end
+				else # Tenemos M
+					atg_status = 'complete'
 				end
-			else # sin M ni STOP -------------------------------------------------------------------------------------------
-				atg_status = 'putative'
-				# puts "your sequence has the subject beginning but there is not start codon at the beginning, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
-				msgs += "W2: There is no M at the beginning, "
+			else # sin M
+				$global_warnings << 'noM2'
 			end
 		end
-		return [substring, atg_status, msgs]
+		return amine_seq, atg_status
 	end
-	def find_end(final_hit, q, full_prot, tmp_prot, end_status, warnings, aas_n_end)
-		# aqui vemos lo que queda sin similitud hasta el final
-		s_end_resto = (final_hit.full_subject_length - (final_hit.s_end.to_i + 1)) # en el subject, numero de aas que necesito cubrir
-		q_end_resto = (q.full_query_length.to_i - final_hit.q_end.to_i)/3 # en el query, numero de aas que tengo
-		sq_end_distance = q_end_resto - s_end_resto
-		cut_in_5p = full_prot.length - tmp_prot.length
-		resto_substring = tmp_prot[0..final_hit.q_end/3 - cut_in_5p - 16]
-		end_substring  =  tmp_prot[final_hit.q_end/3 - cut_in_5p - 15..tmp_prot.length]
-		putative_end = end_substring.index('*')
-		# si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas)
-		if (sq_end_distance + aas_n_end < 0)
+	def find_end(final_hit, max_distance, tmp_prot, query_fasta)
+		frame_shift = check_frame_shift(final_hit)
+		beg_end_string =(final_hit.q_end-final_hit.q_beg)/3 - max_distance # Begin of terminal region (Coordinate) in tmp_prot
+		atg_substring = tmp_prot[0..beg_end_string] # prot without terminal region
+		end_substring = tmp_prot[beg_end_string + 1 ..tmp_prot.length-1] #Take 3' of unigen
+		#puts "\e[32m\nfinal_hit.q_end-final_hit.q_beg: #{final_hit.q_end-final_hit.q_beg} /3  - max_distance: #{max_distance}\e[0m"
+		#puts "\e[33mbeg_end_string: #{beg_end_string}\e[0m"
+		#puts "\e[35mtmp_prot.length: #{tmp_prot.length}\e[0m"
+		if beg_end_string < 0 || end_substring.nil? #Sequences whose homology is at end of it and dont't exits the 3' part of unigene
+			atg_substring = tmp_prot
+			end_substring = ''
 			end_status = 'incomplete'
-			if (putative_end)
-				warnings += " Unexpected STOP codon at 3' end. Distance to subject end: #{sq_end_distance.abs} aas, "
-				end_substring = end_substring[0, putative_end+1] # comentar?
-				# if (@verbose)
-				# 	puts "#{db_name} -- #{q.query_def} --> Unexpected STOP codon at 3' end. Distance to subject end: #{sq_end_distance.abs} aas"
-				# end
-			else
-				warnings += "Distance to subject end: #{sq_end_distance.abs} aas, "
-				# if (@verbose)
-				# 	puts "#{db_name} -- #{q.query_def} --> Distance to subject end: #{sq_end_distance.abs} aas"
-				# end
-			end
+		else
+			end_status = 'putative'
+			putative_end = end_substring.index('*')
+			end_substring = end_substring[0 .. putative_end] if putative_end
+			s_end_resto = final_hit.s_len - (final_hit.s_end + 1) # en el subject, numero de aas que necesito cubrir
+			q_end_resto = (query_fasta.length - final_hit.q_end)/3 # en el query, numero de aas que tengo
+			sq_end_distance = q_end_resto - s_end_resto # La diferencia se hace a partir del final del hit para que el calculo no quede sesgado en caso de que la secuecia este truncada por 5'
-		else # tenemos suficiente secuencia
-			if (putative_end) # tenemos un stop
-				q_stop_resto = (putative_end - 15) # distancia entre el stop y el q_end, si es negativo el stop esta antes del q_end
-				qs_stop_distance = q_stop_resto - s_end_resto # distancia entre los stops del q y el s
-				# puts "putative_end: #{putative_end}, q_stop_resto: #{q_stop_resto}, qs_stop_distance: #{qs_stop_distance}"
-				if (qs_stop_distance + aas_n_end >= 0) # si q_end esta a menos de 15 aas antes o esta despues del s_end; complete
-					end_status = 'complete'
-				elsif (qs_stop_distance + 2*aas_n_end < 0) # si q_end es mas de 30 aas menor que el s_end; putative/Putative chimeric seq
-					end_status = 'putative'
-					warnings += " query STOP codon too far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, putative chimeric sequence, "
-					# if (@verbose)
-					# 	puts "#{db_name} -- #{q.query_def} --> query STOP too far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, putative chimeric sequence"
-					# end
-				elsif (qs_stop_distance + aas_n_end < 0) # si q_end es mas de 15 aas menor pero menos de 30 que el s_end; putative
-					end_status = 'putative'
-					warnings += " query STOP codon is far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas, "
-					# if (@verbose)
-					# 	puts "#{db_name} -- #{q.query_def} --> query STOP far from subject stop. Distance to subject end: #{qs_stop_distance.abs} aas"
-					# end
+			if (final_hit.align_len == final_hit.s_len && putative_end)||(sq_end_distance.abs  <= max_distance && putative_end && putative_end <= max_distance*2) #Stop in a Full-length. max_distance *2 is set by de margin of +-15aa at the end of aligment
+				end_status = 'complete'
+			elsif sq_end_distance  < max_distance # si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas)
+				end_status = 'incomplete'
+				if putative_end
+					$global_warnings << ['UnexpSTOP3pDist', sq_end_distance.abs]
+				else
+					$global_warnings << ['DistSubj', sq_end_distance.abs]
+				end
+			else # tenemos suficiente secuencia
+				if putative_end # tenemos un stop
+					#beg_end_string indica en que punto del unigen se encuentra el area de busqueda del codon stop
+					stop_q_s = beg_end_string + putative_end - final_hit.s_len # Space between query's stop and subject's stop
+					if stop_q_s.abs <= max_distance #Stop codon is in search region
+						end_status = 'complete'
+					elsif stop_q_s < 0
+						$global_warnings << 'UnexpSTOP3p'
+					elsif stop_q_s > 0
+						end_status = 'complete'
+						$global_warnings << 'QueryTooLong'
+					end
+				else # no tenemos codon de parada pero tenemos suficiente secuencia
+					end_status = 'incomplete'
+					$global_warnings << 'ProtFusion'
 				end
-				end_substring = end_substring[0, putative_end+1]
-			else # no tenemos codon de parada pero tenemos suficiente secuencia
-				end_status = 'putative'
-				warnings += " STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas, "
-				# if (@verbose)
-				# 	puts "#{db_name} -- #{q.query_def} --> STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas"
-				# end
 			end
 		end
-		return [resto_substring, end_substring, end_status, warnings, putative_end]
+		final_prot = atg_substring + end_substring
+		end_status = 'complete' if final_prot.length == final_hit.s_len+1 && final_prot[final_prot.length-1] == '*'
+		new_q_end = final_hit.q_beg-1 + final_prot.length * 3 + frame_shift
+		modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) if  $verbose > 1
+		final_hit.q_end = new_q_end
+		return end_status, final_prot
 	end
-	def determine_status(atg_status,end_status)
-		if (atg_status == 'complete') && (end_status == 'complete') # proteina completa
-			final_status = 'Complete'
-		elsif (atg_status == 'putative' && end_status == 'complete') || (atg_status == 'complete' && end_status == 'putative') || (atg_status == 'putative' && end_status == 'putative') # comienzo y/o final putative
-			final_status = 'Putative Complete'
-		elsif (atg_status == 'incomplete') && (end_status == 'incomplete') # region intermedia
-			final_status = 'Internal'
-		elsif (atg_status == 'complete') && (end_status == 'incomplete') # tenemos el principio de la proteina
-			final_status = 'N-terminus'
-		elsif (atg_status == 'putative') && (end_status == 'incomplete') # puede que tengamos el principio de la proteina
-			final_status = 'Putative N-terminus'
-		elsif (atg_status == 'incomplete') && (end_status == 'complete') # tenemos el final de la proteina
-			final_status = 'C-terminus'
-		elsif (atg_status == 'incomplete') && (end_status == 'putative') # puede que tengamos el final de la proteina
-			final_status = 'Putative C-terminus'
+	def determine_status(atg_status, end_status)
+		if atg_status != 'incomplete' && end_status != 'incomplete' # proteina completa
+			type = COMPLETE
+		elsif atg_status == 'incomplete' && end_status == 'incomplete' # region intermedia
+			type = INTERNAL
+		elsif atg_status != 'incomplete' && end_status == 'incomplete' # tenemos el principio de la proteina
+			type = N_TERMINAL
+		elsif atg_status == 'incomplete' && end_status != 'incomplete' # tenemos el final de la proteina
+			type = C_TERMINAL
 		end
-		return final_status
-	end
-	def print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
-		name_diff = q.query_def.length - final_hit.acc.length
-		if (name_diff > 0)
-			spnum = ' '*name_diff.to_i
+		if atg_status == 'putative' || end_status == 'putative'
+			status = FALSE # Putative
 		else
-			spnum = ''
+			status = TRUE # Sure
 		end
-#-------------------------------------------------------------------------------------------------------------------------------------
-		# if the sequence is Complete will be printed                 --------------------------------------------------------------------
-		if (final_status == 'Complete')
-			seq.annotate(:protein,">#{q.query_def}\n#{final_prot}")
-			print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
-			if (final_hit.reversed)
-				(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
-			end
-			seq.annotate(:complete,"#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}")
-			seq.annotate(:alignment,"#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n")
-#-------------------------------------------------------------------------------------------------------------------------------------
-		else # la proteina no esta completa                      -------------------------------------------------------------------------
-			if (!seq.get_annotations(:tmp_annotation).empty?) && (!seq.get_annotations(:tmp_annotation).nil?) # ---> trae informacion de una bd anterior
-				if (db_name =~/^tr_/) #                                          --->  estamos usando el trembl, se dejan las anotaciones que trae
-					# puts "#{db_name} -- #{q.query_def} --> print_annotations: sequence not complete! recovering annotations from previous database!"
-					(kk1, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
-					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
-					(name,fasta_length,acc,db_name,final_status,testcode,e_val,ident,my_length,subject_length,warnings,q_frame,q_beg,q_end,s_beg,s_end,description,final_prot) = seq.get_annotations(:tmp_annotation).first[:message][0].split("\t")
-					if (final_hit.reversed)
-						(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
-					end
-					my_prot = seq.get_annotations(:tmp_annotation).first[:message][1]
-					seq.annotate(:protein,my_prot)
-					my_align = seq.get_annotations(:tmp_annotation).first[:message][2]
-					seq.annotate(:alignment,my_align)
-					tmp_annot = "#{name}\t#{query_fasta.length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
-					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
-				#-----------------------------------------------------------------------------------------------------------------------------
-				# elsif (db_name =~ /^sp_/) #                                       ---> estamos usando el sp, se dejan las anotaciones que trae
-					# puts "#{db_name} -- #{q.query_def} --> print_annotations: Mantenemos las anotaciones de la BD de usuario y pasamos la secuencia al trembl"
-				end
-#-------------------------------------------------------------------------------------------------------------------------------------
-			elsif (seq.get_annotations(:tmp_annotation).empty?) #                                ---> NO trae informacion de una bd anterior
-				if (db_name =~ /^tr_/) #                                                                         ---> estamos usando el trembl
-					# puts "#{db_name} -- #{q.query_def} --> print_annotations: #{q.query_def} is not complete!! se anota con trembl"
-					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
+		return type, status
+	end
-					if (final_hit.reversed)
-						(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
-					end
-					seq.annotate(:alignment,"#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n")
-					seq.annotate(:protein,">#{q.query_def}\n#{final_prot}")
-					tmp_annot = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}"
-					seq.annotate(:tmp_annotation,[tmp_annot, '','',''])
-#-------------------------------------------------------------------------------------------------------------------------------------
-				else #                                                                               cargamos anotaciones para la siguiente BD
-					tmp_prot = ">#{q.query_def}\n#{final_prot}"
-					tmp_align = "#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n"
-					tmp_annot = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}"
-					seq.sec_desc = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\tMisassembled\t\t#{final_hit.e_val}\t#{final_hit.ident}\t\t#{final_hit.full_subject_length}\t#{warnings}\t\t\t\t\t\t#{final_hit.definition}\t"
-					seq.annotate(:tmp_annotation,[tmp_annot, tmp_prot,tmp_align,[q, final_hit, final_prot, query_fasta, final_status]])
-					# puts "\n\n\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.---#{q.query_def}\t#{final_status}\n#{tmp_prot}"
-					# puts "#{db_name} -- #{q.query_def} --> print_annotations: cargamos anotaciones para utilizarlas en la siguiente BD"
+	def compare_seq_length_with_subject(final_prot, distance, final_hit, type, status)
+		if final_prot.length - 2 * distance > final_hit.s_len
+			$global_warnings << ['SeqLonger', final_prot.length, final_hit.s_len]
+		elsif final_prot.length + 2 * distance < final_hit.s_len
+			$global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len]
+			if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len
+				if type == COMPLETE
+					status = FALSE
+					$global_warnings << 'VeryShorter'
 				end
 			end
 		end
+		return status
 	end
-	def print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status)
-		bad_atg = false
-#-------------------------------------------------------------------------------------------------------------  ATG
-		if (final_status == 'Complete') || (final_status == 'Putative Complete') || (final_status == 'Putative N-terminus') || (final_status == 'N-terminus')
-			# puts "entra aqui, final_status: #{final_status}"
-			my_seq_n = query_fasta[final_hit.q_beg - 5..final_hit.q_beg + 5]
-			beg5 = false
-			# -------------------------------------   si my_seq_n = nil puede ser porque q_beg sea < 5
-			if (final_hit.q_beg < 6)
-				my_seq_n = query_fasta[0..10]
-				beg5 = true
-				# puts "empieza en el borde de la seq"
+	def save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name)
+		# if the sequence is Complete or it hasn't previous info will be saved
+		if seq.type == UNKNOWN || (type == COMPLETE && seq.type != COMPLETE)
+			seq.type = type
+			seq.status = status
+			seq.db_name = db_name
+			seq.seq_fasta = query_fasta
+			seq.seq_aa = final_prot
+			seq.hit = final_hit
+			seq.warnings($global_warnings)
+			$global_warnings = [] # Clean all warnings for current sequence
+			seq.seq_nt = mark_nt_seqs(final_hit, query_fasta)
+			if type == COMPLETE
+				seq.ignore = TRUE
 			end
-			atg_found = my_seq_n.index(/ATG/i)
-			atg_found_rv = my_seq_n.rindex(/ATG/i)
-			my_atg_index = nil
 		end
-		if (!atg_found.nil?)
-			if (beg5)
-				my_seq_n.sub!(/ATG/i,'_-_ATG')
-				my_atg_index = atg_found
-				my_seq = my_seq_n + query_fasta[11..query_fasta.length + 1]
-			elsif (atg_found == atg_found_rv)
-				my_seq_n.sub!(/ATG/i,'_-_ATG')
-				my_atg_index = final_hit.q_beg - 5 + atg_found
-				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]
-				# puts "my_seq despues de encontrar el atg: #{my_seq}"
-			elsif (atg_found == 5) || (atg_found_rv == 5)
-				my_seq_n = my_seq_n[0..4]+'_-_'+my_seq_n[5..10]
-				my_atg_index = final_hit.q_beg - 5 + atg_found
-				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]
-			else
-				# puts "#{q.query_def}  tiene mas de un ATG  my_seq_n: #{my_seq_n}"
-				bad_atg = true
-				my_seq = query_fasta
-			end
-		else
-			bad_atg = true
-			# puts "#{q.query_def}  NO TIENE ATG  my_seq_n: #{my_seq_n}"
-			my_seq = query_fasta
+		if  $verbose > 2
+			puts "\e[1mStruct annot: #{seq.prot_annot_calification}\e[0m"
 		end
-#-------------------------------------------------------------------------------------------------------------  STOP
-	stop_c = nil
-		if (final_status == 'Complete') || (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')
+	end
-			if (bad_atg == true)
-				stop_c = my_seq[final_hit.q_end - 2..final_hit.q_end]
-				stop_c_longer = my_seq[final_hit.q_end - 7..final_hit.q_end + 5]
-			else
-				stop_c = my_seq[final_hit.q_end + 3..final_hit.q_end + 5]
-				stop_c_longer = my_seq[final_hit.q_end - 2..final_hit.q_end + 10]
-			end
+	def mark_nt_seqs(final_hit, query_fasta)
+		atg = query_fasta[final_hit.q_beg..final_hit.q_beg + 2]
+		mark_atg = nil
+		if atg == 'ATG'
+			mark_atg = '_-_'
 		end
+		stop = query_fasta[final_hit.q_end - 2..final_hit.q_end]
+		mark_stop = nil
+		if stop == 'TAG' || stop == 'TGA' || stop == 'TAA'
+			mark_stop = '___'
+		end
+		seq5p = query_fasta[0..final_hit.q_beg-1]
+		orf = query_fasta[final_hit.q_beg..final_hit.q_end]
+		seq3p = query_fasta[final_hit.q_end..query_fasta.length]
+		nt_seq = "#{seq5p}#{mark_atg}#{orf}#{mark_stop}#{seq3p}"
+		return nt_seq
+	end
-		if (!stop_c.nil?)
-			# puts stop_c
-			# puts stop_c_longer
-			if (stop_c.translate == '*')
-				if (bad_atg == true)
-					my_seq = my_seq[0..final_hit.q_end] +'___'+ my_seq[final_hit.q_end + 1..my_seq.length + 1]
-					seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG\t\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-				else
-					my_seq = my_seq[0..final_hit.q_end + 5] +'___'+ my_seq[final_hit.q_end + 6..my_seq.length + 1]
-					my_prot = my_seq.sub(/\w+_\-_/,'')
-					my_prot = my_prot.sub(/___\w+/,'')
-					my_prot = my_prot.translate
-					my_prot = my_prot.sub(/x$/,'')
-					simliar_fragment = final_prot.lcs(my_prot)
-					if (simliar_fragment.length == final_prot.length) && (simliar_fragment.length == my_prot.length)
-						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\t\t\t\t\t\t#{my_seq}")
-					else
-						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tthe nucleotide sequence contain a lot of errors\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-						# puts "nt seq: was no possible to find stop codon, the nucleotide sequence contain a lot of errors"
+	def exonerate_fix_frame_shift(query_fasta, hit)
+		frame_shifts = []
+		added_nts = 0
+		hit.each_with_index do |hsp, num|
+			if hsp.class.to_s == 'ExoBlastHit' #Only this type of class of BlastHit has frameshift attributes
+				if !hsp.q_frameshift.empty? #There is frameshift
+					hsp.q_frameshift.each do |position, num_nts|
+						local_add = 3 - num_nts
+						fs_final_position = position + num_nts
+						$global_warnings << ['ExFrameS', fs_final_position]
+						frame_shifts << [fs_final_position, local_add]
+						added_nts += local_add
 					end
 				end
+			end
+			hsp.q_beg += added_nts if num > 0
+			hsp.q_end += added_nts
+		end
+		add = 0
+		frame_shifts.each do |position, num_nts|
+			query_fasta = query_fasta.insert(position+add, 'n'*num_nts)
+			add += num_nts
+		end
+		return query_fasta
+	end
-			else
-				if (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')
-					if (bad_atg == true)
-						stop_c = my_seq[final_hit.q_end+1..final_hit.q_end+3]
-						stop_c_longer = my_seq[final_hit.q_end - 4..final_hit.q_end + 8]
-					else
-						stop_c = my_seq[final_hit.q_end + 7..final_hit.q_end + 9]
-						stop_c_longer = my_seq[final_hit.q_end..final_hit.q_end + 13]
-					end
-					if (!stop_c.nil?)
-						if (stop_c.translate == '*')
-							final_hit.q_end = final_hit.q_end + 3
-							if (bad_atg == true)
-								my_seq = my_seq[0..final_hit.q_end] +'___'+ my_seq[final_hit.q_end + 1..my_seq.length + 1]
-								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG\t\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-							else
-								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-							end
-						else
-							if (bad_atg == true)
-								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-								# puts "find nt end: NO ATG, NO exact STOP"
-							else
-								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
-								# puts "find nt end: GOOD ATG, NO exact STOP"
-							end
-						end
-					end
-				end
+## VERBOSE METHODS
+	def show_nts
+		show = FALSE
+		show = TRUE if $verbose && $verbose > 3
+		return show
+	end
-			end
-		else
+	def modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) ## For visual report
+		if new_q_end > final_hit.q_end #There is an align extension
+			extend_align = query_fasta[final_hit.q_end+1 .. new_q_end].translate
+			final_hit.q_seq = final_hit.q_seq + extend_align
+		elsif new_q_end < final_hit.q_end #The align is cutted
+			upper_limit = final_prot.length - 1 + final_hit.q_seq.count('-')
+			final_hit.q_seq = final_hit.q_seq[0 .. upper_limit]
+		end
+	end
-			if (bad_atg == true)
-				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP\t\t\t\t\t#{my_seq}")
-			else
-				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP\t\t\t\t\t#{my_seq}")
-			end
+	def modify_5p_align(new_q_beg, final_hit, query_fasta) ## For visual report
+		if new_q_beg < final_hit.q_beg #There is an align extension
+			extend_align = query_fasta[new_q_beg .. final_hit.q_beg-1].translate
+			final_hit.q_seq = extend_align + final_hit.q_seq
+		elsif new_q_beg > final_hit.q_beg #The align is cut
+			seq_cut = (new_q_beg - final_hit.q_beg)/3
+			gaps = final_hit.q_seq[0..seq_cut].count('-')
+			seq_cut += gaps
+			final_hit.q_seq = final_hit.q_seq[seq_cut .. final_hit.q_seq.length-1]
 		end
 	end
 end