full_lengther_next 0.0.8 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
 - data/History.txt +2 -2
 - data/Manifest.txt +33 -18
 - data/Rakefile +4 -2
 - data/bin/download_fln_dbs.rb +310 -158
 - data/bin/full_lengther_next +160 -103
 - data/bin/make_test_dataset.rb +236 -0
 - data/bin/make_user_db.rb +101 -117
 - data/bin/plot_fln.rb +270 -0
 - data/bin/plot_taxonomy.rb +70 -0
 - data/lib/expresscanvas.zip +0 -0
 - data/lib/full_lengther_next.rb +3 -3
 - data/lib/full_lengther_next/classes/artifacts.rb +66 -0
 - data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
 - data/lib/full_lengther_next/classes/cdhit.rb +154 -0
 - data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
 - data/lib/full_lengther_next/classes/common_functions.rb +105 -63
 - data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
 - data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
 - data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
 - data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
 - data/lib/full_lengther_next/classes/handle_db.rb +30 -0
 - data/lib/full_lengther_next/classes/my_worker.rb +308 -138
 - data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
 - data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
 - data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
 - data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
 - data/lib/full_lengther_next/classes/reptrans.rb +210 -0
 - data/lib/full_lengther_next/classes/sequence.rb +439 -80
 - data/lib/full_lengther_next/classes/test_code.rb +15 -16
 - data/lib/full_lengther_next/classes/types.rb +12 -0
 - data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
 - data/lib/full_lengther_next/classes/warnings.rb +40 -0
 - metadata +207 -93
 - data/lib/full_lengther_next/classes/lcs.rb +0 -33
 - data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
 
| 
         @@ -1,78 +1,116 @@ 
     | 
|
| 
       1 
1 
     | 
    
         | 
| 
       2 
2 
     | 
    
         
             
            module CommonFunctions
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
     | 
    
         
            -
            	def contenidos_en_prot( 
     | 
| 
       5 
     | 
    
         
            -
            		
         
     | 
| 
       6 
     | 
    
         
            -
            		 
     | 
| 
       7 
     | 
    
         
            -
            		q_index_start =  
     | 
| 
       8 
     | 
    
         
            -
            		 
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
            			 
     | 
| 
       24 
     | 
    
         
            -
            			 
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
            			fr_index_start = full_prot.index(simliar_fragment)
         
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
            			if (q_index_start == 9999)
         
     | 
| 
       29 
     | 
    
         
            -
            				q_index_start = fr_index_start
         
     | 
| 
      
 4 
     | 
    
         
            +
            	def contenidos_en_prot(key_seq, full_prot)	 
         
     | 
| 
      
 5 
     | 
    
         
            +
            		full_prot = full_prot.gsub(/[\-Xx]/,'-')
         
     | 
| 
      
 6 
     | 
    
         
            +
            		compare_prot = key_seq.gsub(/[\-Xx]/,'-')
         
     | 
| 
      
 7 
     | 
    
         
            +
            		q_index_start = full_prot.index(compare_prot) #Full match between hit.q_seq and full_prot (unigene)
         
     | 
| 
      
 8 
     | 
    
         
            +
            		if q_index_start.nil? #There is gaps that unables the full match
         
     | 
| 
      
 9 
     | 
    
         
            +
            			q_index_start = match_with_ungapped_reference(full_prot, compare_prot)
         
     | 
| 
      
 10 
     | 
    
         
            +
            			if q_index_start.nil? && full_prot.include?('-')
         
     | 
| 
      
 11 
     | 
    
         
            +
            				diff = full_prot.length - compare_prot.length
         
     | 
| 
      
 12 
     | 
    
         
            +
            				if scan_sequences(full_prot.split(''), compare_prot.split('')) == compare_prot.length
         
     | 
| 
      
 13 
     | 
    
         
            +
            					q_index_start = 0
         
     | 
| 
      
 14 
     | 
    
         
            +
            				end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            				if diff >0 && scan_sequences(full_prot.split(''), compare_prot.split(''), diff) == compare_prot.length
         
     | 
| 
      
 17 
     | 
    
         
            +
            					q_index_start = diff
         
     | 
| 
      
 18 
     | 
    
         
            +
            				end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            				if q_index_start.nil?	
         
     | 
| 
      
 21 
     | 
    
         
            +
            					q_index_start = match_with_gapped_reference(full_prot, compare_prot)
         
     | 
| 
      
 22 
     | 
    
         
            +
            				end
         
     | 
| 
      
 23 
     | 
    
         
            +
            			end
         
     | 
| 
      
 24 
     | 
    
         
            +
            			if q_index_start.nil?
         
     | 
| 
      
 25 
     | 
    
         
            +
            				q_index_start = 0
         
     | 
| 
       30 
26 
     | 
    
         
             
            			end
         
     | 
| 
       31 
     | 
    
         
            -
            			full_prot = full_prot[(fr_index_start + simliar_fragment.length)..full_prot.length]
         
     | 
| 
       32 
     | 
    
         
            -
            		end
         
     | 
| 
       33 
     | 
    
         
            -
            		
         
     | 
| 
       34 
     | 
    
         
            -
            		simliar_fragment = full_prot.lcs(compare_prot)
         
     | 
| 
       35 
     | 
    
         
            -
            		
         
     | 
| 
       36 
     | 
    
         
            -
            		# if ($verbose)
         
     | 
| 
       37 
     | 
    
         
            -
            			# puts "#{q.query_def}-------------------------------------#{suma_fragments} de #{compare_prot.length}"
         
     | 
| 
       38 
     | 
    
         
            -
            			# puts "#{q.query_def}-------------------------------------#{suma_fragments + masked_x} >= #{compare_prot.length * 0.7}"
         
     | 
| 
       39 
     | 
    
         
            -
            			# puts "\nfull: #{full_prot}\ncomp: #{compare_prot}\nsimliar_fragment: #{simliar_fragment}"
         
     | 
| 
       40 
     | 
    
         
            -
            		# end
         
     | 
| 
       41 
     | 
    
         
            -
            		
         
     | 
| 
       42 
     | 
    
         
            -
            		if (suma_fragments + masked_x >= compare_prot.length * 0.7)
         
     | 
| 
       43 
     | 
    
         
            -
            			is_ok = true
         
     | 
| 
       44 
     | 
    
         
            -
            			# puts "OK -- encontramos suficiente similitud entre query y subject -- OK"
         
     | 
| 
       45 
     | 
    
         
            -
            		else
         
     | 
| 
       46 
     | 
    
         
            -
            			is_ok = false
         
     | 
| 
       47 
     | 
    
         
            -
            			# puts "\nfull: #{full_prot}\ncomp: #{compare_prot}"
         
     | 
| 
       48 
     | 
    
         
            -
            			# puts "Warning!: no match comparing proteins"
         
     | 
| 
       49 
     | 
    
         
            -
            		end
         
     | 
| 
       50 
27 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
            		min_index_start = [min_index_start, q_index_start].min
         
     | 
| 
       52 
     | 
    
         
            -
            		
         
     | 
| 
       53 
     | 
    
         
            -
            		if (min_index_start == 9999)
         
     | 
| 
       54 
     | 
    
         
            -
            			min_index_start = 0
         
     | 
| 
       55 
28 
     | 
    
         
             
            		end
         
     | 
| 
       56 
     | 
    
         
            -
            		
         
     | 
| 
       57 
     | 
    
         
            -
            		return [is_ok, min_index_start]
         
     | 
| 
      
 29 
     | 
    
         
            +
            		return q_index_start
         
     | 
| 
       58 
30 
     | 
    
         
             
            	end
         
     | 
| 
       59 
31 
     | 
    
         | 
| 
       60 
32 
     | 
    
         | 
| 
      
 33 
     | 
    
         
            +
            	def match_with_gapped_reference(full_prot, compare_prot)
         
     | 
| 
      
 34 
     | 
    
         
            +
            		q_index_start = nil
         
     | 
| 
      
 35 
     | 
    
         
            +
            		fragments_array = full_prot.split(/\-+/)
         
     | 
| 
      
 36 
     | 
    
         
            +
            		fragments_array.each_with_index do |seq, i|
         
     | 
| 
      
 37 
     | 
    
         
            +
            			if seq.length > 4
         
     | 
| 
      
 38 
     | 
    
         
            +
            				compare_prot_index = compare_prot.index(seq)
         
     | 
| 
      
 39 
     | 
    
         
            +
            				if compare_prot_index.nil? # In cases that no match by gaps
         
     | 
| 
      
 40 
     | 
    
         
            +
            					seq =seq[0..4]
         
     | 
| 
      
 41 
     | 
    
         
            +
            					compare_prot_index = compare_prot.index(seq)
         
     | 
| 
      
 42 
     | 
    
         
            +
            				end
         
     | 
| 
      
 43 
     | 
    
         
            +
            				if !compare_prot_index.nil?
         
     | 
| 
      
 44 
     | 
    
         
            +
            					q_index_start = full_prot.index(seq)
         
     | 
| 
      
 45 
     | 
    
         
            +
            					if i > 0
         
     | 
| 
      
 46 
     | 
    
         
            +
            						q_index_start, compare_prot_index = extend_match(full_prot, compare_prot, q_index_start, compare_prot_index)
         
     | 
| 
      
 47 
     | 
    
         
            +
            					end
         
     | 
| 
      
 48 
     | 
    
         
            +
            					break
         
     | 
| 
      
 49 
     | 
    
         
            +
            				end
         
     | 
| 
      
 50 
     | 
    
         
            +
            			end			
         
     | 
| 
      
 51 
     | 
    
         
            +
            		end
         
     | 
| 
      
 52 
     | 
    
         
            +
            		return q_index_start
         
     | 
| 
      
 53 
     | 
    
         
            +
            	end
         
     | 
| 
       61 
54 
     | 
    
         | 
| 
      
 55 
     | 
    
         
            +
            	def extend_match(full_prot, compare_prot, q_index_start, compare_prot_index)
         
     | 
| 
      
 56 
     | 
    
         
            +
            		full_prot_substring = full_prot[0..q_index_start-1].reverse.split('')
         
     | 
| 
      
 57 
     | 
    
         
            +
            		compare_prot_substring = compare_prot[0..compare_prot_index-1].reverse.split('')
         
     | 
| 
      
 58 
     | 
    
         
            +
            		extend_match = scan_sequences(full_prot_substring, compare_prot_substring)
         
     | 
| 
      
 59 
     | 
    
         
            +
            		q_index_start -= extend_match
         
     | 
| 
      
 60 
     | 
    
         
            +
            		compare_prot_index -= extend_match
         
     | 
| 
      
 61 
     | 
    
         
            +
            		return q_index_start, compare_prot_index
         
     | 
| 
      
 62 
     | 
    
         
            +
            	end
         
     | 
| 
       62 
63 
     | 
    
         | 
| 
       63 
     | 
    
         
            -
            	def  
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
            		 
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
      
 64 
     | 
    
         
            +
            	def scan_sequences(ref_seq, compare_seq, diff = 0)
         
     | 
| 
      
 65 
     | 
    
         
            +
            		extend_match = 0
         
     | 
| 
      
 66 
     | 
    
         
            +
            		ref_seq.each_with_index do |char,i|
         
     | 
| 
      
 67 
     | 
    
         
            +
            			if i >= diff
         
     | 
| 
      
 68 
     | 
    
         
            +
            				compare_char = compare_seq[extend_match]
         
     | 
| 
      
 69 
     | 
    
         
            +
            				if compare_char.nil? || char != compare_char && char != '-' && compare_char != '-' 
         
     | 
| 
      
 70 
     | 
    
         
            +
            					break
         
     | 
| 
      
 71 
     | 
    
         
            +
            				end
         
     | 
| 
      
 72 
     | 
    
         
            +
            				extend_match += 1
         
     | 
| 
      
 73 
     | 
    
         
            +
            			end
         
     | 
| 
      
 74 
     | 
    
         
            +
            		end
         
     | 
| 
      
 75 
     | 
    
         
            +
            		return extend_match
         
     | 
| 
      
 76 
     | 
    
         
            +
            	end
         
     | 
| 
       69 
77 
     | 
    
         | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
      
 78 
     | 
    
         
            +
            	def match_with_ungapped_reference(full_prot, compare_prot)
         
     | 
| 
      
 79 
     | 
    
         
            +
            		q_index_start = nil
         
     | 
| 
      
 80 
     | 
    
         
            +
            		fragments_array = compare_prot.split(/\-+/)
         
     | 
| 
      
 81 
     | 
    
         
            +
            		fragments_array.each_with_index do |seq, i|
         
     | 
| 
      
 82 
     | 
    
         
            +
            			if q_index_start.nil? && seq.length > 4
         
     | 
| 
      
 83 
     | 
    
         
            +
            				q_index_start = full_prot.index(seq)
         
     | 
| 
      
 84 
     | 
    
         
            +
            				if i > 0 && !q_index_start.nil?
         
     | 
| 
      
 85 
     | 
    
         
            +
            					q_index_start = refine_match(seq, compare_prot, q_index_start) # Correction if first seq isn't enough large
         
     | 
| 
      
 86 
     | 
    
         
            +
            				end
         
     | 
| 
      
 87 
     | 
    
         
            +
            				break
         
     | 
| 
      
 88 
     | 
    
         
            +
            			end
         
     | 
| 
      
 89 
     | 
    
         
            +
            		end
         
     | 
| 
      
 90 
     | 
    
         
            +
            		return q_index_start
         
     | 
| 
      
 91 
     | 
    
         
            +
            	end
         
     | 
| 
       71 
92 
     | 
    
         | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
            		 
     | 
| 
      
 93 
     | 
    
         
            +
            	def refine_match(subseq, seq, q_index_start)
         
     | 
| 
      
 94 
     | 
    
         
            +
            		location_seq = seq.index(subseq)
         
     | 
| 
      
 95 
     | 
    
         
            +
            		gaps_on_location = seq[0..location_seq].count('-')
         
     | 
| 
      
 96 
     | 
    
         
            +
            		q_index_start -=  location_seq - gaps_on_location # Correction if first seq isn't enough large 
         
     | 
| 
      
 97 
     | 
    
         
            +
            		return q_index_start
         
     | 
| 
       74 
98 
     | 
    
         
             
            	end
         
     | 
| 
       75 
99 
     | 
    
         | 
| 
      
 100 
     | 
    
         
            +
            	def reverse_seq(query_fasta, hit)
         
     | 
| 
      
 101 
     | 
    
         
            +
            		hit.q_frame = -hit.q_frame
         
     | 
| 
      
 102 
     | 
    
         
            +
            		hit.q_end = query_fasta.length - 1 - hit.q_end 	
         
     | 
| 
      
 103 
     | 
    
         
            +
            		hit.q_beg = query_fasta.length - 1 - hit.q_beg
         
     | 
| 
      
 104 
     | 
    
         
            +
            		hit.reversed = TRUE
         
     | 
| 
      
 105 
     | 
    
         
            +
            		query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA.
         
     | 
| 
      
 106 
     | 
    
         
            +
            		if hit.class.to_s == 'ExoBlastHit'
         
     | 
| 
      
 107 
     | 
    
         
            +
            			hit.q_frameshift.map!{|position, num_nts|
         
     | 
| 
      
 108 
     | 
    
         
            +
            				reversed_position = query_fasta.length - 1 - position  
         
     | 
| 
      
 109 
     | 
    
         
            +
            				[reversed_position, num_nts]
         
     | 
| 
      
 110 
     | 
    
         
            +
            			}
         
     | 
| 
      
 111 
     | 
    
         
            +
            		end
         
     | 
| 
      
 112 
     | 
    
         
            +
            		return query_fasta
         
     | 
| 
      
 113 
     | 
    
         
            +
            	end
         
     | 
| 
       76 
114 
     | 
    
         | 
| 
       77 
115 
     | 
    
         | 
| 
       78 
116 
     | 
    
         
             
            	def corrige_frame(ref_frame,ref_start,ref_end)
         
     | 
| 
         @@ -89,6 +127,10 @@ module CommonFunctions 
     | 
|
| 
       89 
127 
     | 
    
         | 
| 
       90 
128 
     | 
    
         
             
            	end
         
     | 
| 
       91 
129 
     | 
    
         | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
      
 130 
     | 
    
         
            +
            	def check_frame_shift(hit)
         
     | 
| 
      
 131 
     | 
    
         
            +
            		fs = 0
         
     | 
| 
      
 132 
     | 
    
         
            +
            		prot_length_in_nts = hit.q_end-hit.q_beg+1
         
     | 
| 
      
 133 
     | 
    
         
            +
            		fs = prot_length_in_nts%3
         
     | 
| 
      
 134 
     | 
    
         
            +
            		return fs
         
     | 
| 
      
 135 
     | 
    
         
            +
            	end
         
     | 
| 
      
 136 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,258 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Copyright (c) 2010 Dario Guerrero & Almudena Bocinos
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 4 
     | 
    
         
            +
            # a copy of this software and associated documentation files (the
         
     | 
| 
      
 5 
     | 
    
         
            +
            # 'Software'), to deal in the Software without restriction, including
         
     | 
| 
      
 6 
     | 
    
         
            +
            # without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 7 
     | 
    
         
            +
            # distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 8 
     | 
    
         
            +
            # permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 9 
     | 
    
         
            +
            # the following conditions:
         
     | 
| 
      
 10 
     | 
    
         
            +
            #
         
     | 
| 
      
 11 
     | 
    
         
            +
            # The above copyright notice and this permission notice shall be
         
     | 
| 
      
 12 
     | 
    
         
            +
            # included in all copies or substantial portions of the Software.
         
     | 
| 
      
 13 
     | 
    
         
            +
            #
         
     | 
| 
      
 14 
     | 
    
         
            +
            # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 15 
     | 
    
         
            +
            # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 16 
     | 
    
         
            +
            # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
         
     | 
| 
      
 17 
     | 
    
         
            +
            # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
         
     | 
| 
      
 18 
     | 
    
         
            +
            # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
         
     | 
| 
      
 19 
     | 
    
         
            +
            # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
         
     | 
| 
      
 20 
     | 
    
         
            +
            # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            require 'blast_query'
         
     | 
| 
      
 23 
     | 
    
         
            +
            require 'blast_hit'
         
     | 
| 
      
 24 
     | 
    
         
            +
            require 'fl_string_utils.rb'
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            OPERATION = 0
         
     | 
| 
      
 27 
     | 
    
         
            +
            QUERY = 1
         
     | 
| 
      
 28 
     | 
    
         
            +
            TARGET = 2
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            class ExoBlastHit < BlastHit
         
     | 
| 
      
 31 
     | 
    
         
            +
            	attr_accessor :q_frameshift, :s_frameshift
         
     | 
| 
      
 32 
     | 
    
         
            +
            	def initialize(start_target, ends_target, start_query, ends_query)
         
     | 
| 
      
 33 
     | 
    
         
            +
            		super(start_target, ends_target, start_query, ends_query)
         
     | 
| 
      
 34 
     | 
    
         
            +
            		@s_frameshift=[]
         
     | 
| 
      
 35 
     | 
    
         
            +
            		@q_frameshift=[]
         
     | 
| 
      
 36 
     | 
    
         
            +
            	end
         
     | 
| 
      
 37 
     | 
    
         
            +
            end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            # Extracts results from blast table's file and uses it to create instances of "BlastQuery" and "BlastHit"
         
     | 
| 
      
 40 
     | 
    
         
            +
            class ExonerateResult
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              # Parser initialization
         
     | 
| 
      
 43 
     | 
    
         
            +
              def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
         
     | 
| 
      
 44 
     | 
    
         
            +
                @querys = []
         
     | 
| 
      
 45 
     | 
    
         
            +
                @seqs = seqs #unigenes
         
     | 
| 
      
 46 
     | 
    
         
            +
                @prot_seqs = query_seqs#prot
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                if input.is_a?(Array)
         
     | 
| 
      
 49 
     | 
    
         
            +
                	input.each do |file|
         
     | 
| 
      
 50 
     | 
    
         
            +
            	      	parse_file(File.open(file).readlines, all)
         
     | 
| 
      
 51 
     | 
    
         
            +
                 	end
         
     | 
| 
      
 52 
     | 
    
         
            +
                else
         
     | 
| 
      
 53 
     | 
    
         
            +
                  parse_file(File.open(input).readlines, all)
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
                query_name=''
         
     | 
| 
      
 56 
     | 
    
         
            +
              end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
              	def parse_file(lines, all)
         
     | 
| 
      
 59 
     | 
    
         
            +
            	  	lines_parsed=[] 
         
     | 
| 
      
 60 
     | 
    
         
            +
            	    lines_parsed={} if !all
         
     | 
| 
      
 61 
     | 
    
         
            +
            	    lines.each do |line|
         
     | 
| 
      
 62 
     | 
    
         
            +
            			if line=~ /^vulgar:/
         
     | 
| 
      
 63 
     | 
    
         
            +
            				line.chomp!
         
     | 
| 
      
 64 
     | 
    
         
            +
            				fields=line.split(' ', 11)
         
     | 
| 
      
 65 
     | 
    
         
            +
            				features={'query_id'=> fields[1], 'query_start_align'=> fields[2].to_i, 'query_end_align'=> fields[3].to_i, 'query_strand'=> fields[4],'target_id'=> fields[5], 'target_start_align'=> fields[6].to_i, 'target_end_align'=> fields[7].to_i, 'target_strand'=> fields[8], 'score'=> fields[9].to_i, 'align_data'=> fields[10]}
         
     | 
| 
      
 66 
     | 
    
         
            +
            				if all
         
     | 
| 
      
 67 
     | 
    
         
            +
            					lines_parsed << features
         
     | 
| 
      
 68 
     | 
    
         
            +
            				else
         
     | 
| 
      
 69 
     | 
    
         
            +
            				  if !lines_parsed.key?(features['target_id']) # Añadir valor si no existe
         
     | 
| 
      
 70 
     | 
    
         
            +
            				    lines_parsed[features['target_id']]=features
         
     | 
| 
      
 71 
     | 
    
         
            +
            				  else
         
     | 
| 
      
 72 
     | 
    
         
            +
            				    if features['score']>lines_parsed[features['target_id']]['score'] # Si ya existe una query, ver si la nueva presenta un mayor score y reemplazar la antigua
         
     | 
| 
      
 73 
     | 
    
         
            +
            				    	lines_parsed[features['target_id']]=features
         
     | 
| 
      
 74 
     | 
    
         
            +
            				    end
         
     | 
| 
      
 75 
     | 
    
         
            +
            				  end
         
     | 
| 
      
 76 
     | 
    
         
            +
            				end
         
     | 
| 
      
 77 
     | 
    
         
            +
            			end
         
     | 
| 
      
 78 
     | 
    
         
            +
            		end		
         
     | 
| 
      
 79 
     | 
    
         
            +
            		convert_parsed_lines(lines_parsed)	
         
     | 
| 
      
 80 
     | 
    
         
            +
              	end
         
     | 
| 
      
 81 
     | 
    
         
            +
              
         
     | 
| 
      
 82 
     | 
    
         
            +
            	def convert_parsed_lines(lines_parsed)
         
     | 
| 
      
 83 
     | 
    
         
            +
            		last_query = nil
         
     | 
| 
      
 84 
     | 
    
         
            +
            		query = nil
         
     | 
| 
      
 85 
     | 
    
         
            +
            		lines_parsed.each_with_index do |line|
         
     | 
| 
      
 86 
     | 
    
         
            +
            			begin
         
     | 
| 
      
 87 
     | 
    
         
            +
            				if lines_parsed.class.to_s=='Array'
         
     | 
| 
      
 88 
     | 
    
         
            +
            					align_data=line['align_data']
         
     | 
| 
      
 89 
     | 
    
         
            +
            					features=line
         
     | 
| 
      
 90 
     | 
    
         
            +
            				else #hash
         
     | 
| 
      
 91 
     | 
    
         
            +
            					align_data=line[1]['align_data']
         
     | 
| 
      
 92 
     | 
    
         
            +
            					features=line[1]        
         
     | 
| 
      
 93 
     | 
    
         
            +
            				end
         
     | 
| 
      
 94 
     | 
    
         
            +
            				tags = align_data.scan(/([MFG53S]) ([0-9]+) ([0-9]+)/)
         
     | 
| 
      
 95 
     | 
    
         
            +
            				tags.map!{|tag| [tag[0], tag[1].to_i, tag[2].to_i]}
         
     | 
| 
      
 96 
     | 
    
         
            +
            				if features['target_id'] != last_query
         
     | 
| 
      
 97 
     | 
    
         
            +
            					last_query = features['target_id']
         
     | 
| 
      
 98 
     | 
    
         
            +
            					query = BlastQuery.new(features['target_id'])
         
     | 
| 
      
 99 
     | 
    
         
            +
            					@querys << query
         
     | 
| 
      
 100 
     | 
    
         
            +
            				end
         
     | 
| 
      
 101 
     | 
    
         
            +
            			  	hiting(features,tags, query)
         
     | 
| 
      
 102 
     | 
    
         
            +
            			rescue
         
     | 
| 
      
 103 
     | 
    
         
            +
            				puts "Result: #{features['target_id']} => #{features['query_id']} hasn't been parsed\n#{line}"
         
     | 
| 
      
 104 
     | 
    
         
            +
            			end
         
     | 
| 
      
 105 
     | 
    
         
            +
            		end    
         
     | 
| 
      
 106 
     | 
    
         
            +
            	end
         
     | 
| 
      
 107 
     | 
    
         
            +
               
         
     | 
| 
      
 108 
     | 
    
         
            +
               #this method only works fine with --model protein2dna parameter of exonerate
         
     | 
| 
      
 109 
     | 
    
         
            +
             	def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits		
         
     | 
| 
      
 110 
     | 
    
         
            +
            		do_align = FALSE
         
     | 
| 
      
 111 
     | 
    
         
            +
            		do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
         
     | 
| 
      
 112 
     | 
    
         
            +
            		start_target = features['target_start_align']#Unigen
         
     | 
| 
      
 113 
     | 
    
         
            +
            		start_query = features['query_start_align'] #proteina
         
     | 
| 
      
 114 
     | 
    
         
            +
            		ends_target = features['target_end_align']
         
     | 
| 
      
 115 
     | 
    
         
            +
            		ends_query = features['query_end_align']-1 # -> Exonerate don't set to 0 position the ends of target and query
         
     | 
| 
      
 116 
     | 
    
         
            +
            		if features['target_strand'] == '-' #-> Exonerate don't set to 0 position the ends of target and query
         
     | 
| 
      
 117 
     | 
    
         
            +
            			start_target -= 1 # Start target is end target when mathc is in reversed complementary strand
         
     | 
| 
      
 118 
     | 
    
         
            +
            		else
         
     | 
| 
      
 119 
     | 
    
         
            +
            			ends_target -= 1
         
     | 
| 
      
 120 
     | 
    
         
            +
            		end
         
     | 
| 
      
 121 
     | 
    
         
            +
            		hit = ExoBlastHit.new(start_target+1, ends_target+1, start_query+1, ends_query+1)
         
     | 
| 
      
 122 
     | 
    
         
            +
            		define_hit_parameters(hit, features, tags)
         
     | 
| 
      
 123 
     | 
    
         
            +
            		query.add_hit(hit)
         
     | 
| 
      
 124 
     | 
    
         
            +
            		
         
     | 
| 
      
 125 
     | 
    
         
            +
            		#Define alignment and blast like parameters
         
     | 
| 
      
 126 
     | 
    
         
            +
            		target_alignment = ''
         
     | 
| 
      
 127 
     | 
    
         
            +
            		query_alignment = ''
         
     | 
| 
      
 128 
     | 
    
         
            +
            		counter_target = start_target
         
     | 
| 
      
 129 
     | 
    
         
            +
            		counter_query = start_query
         
     | 
| 
      
 130 
     | 
    
         
            +
            		if do_align #get seqs
         
     | 
| 
      
 131 
     | 
    
         
            +
            			query_seq = @prot_seqs[features['query_id']]
         
     | 
| 
      
 132 
     | 
    
         
            +
            			target_seq = @seqs[features['target_id']]
         
     | 
| 
      
 133 
     | 
    
         
            +
            		end
         
     | 
| 
      
 134 
     | 
    
         
            +
            		counter_target, target_seq = do_reverso_complementary(counter_target, target_seq) if features['target_strand'] == '-'
         
     | 
| 
      
 135 
     | 
    
         
            +
            		query_frameshift = []
         
     | 
| 
      
 136 
     | 
    
         
            +
            		target_frameshift = []
         
     | 
| 
      
 137 
     | 
    
         
            +
            		gap_shift = 0
         
     | 
| 
      
 138 
     | 
    
         
            +
            		#puts features['query_id']+ ' ' +features['target_strand'], '-----------------------'
         
     | 
| 
      
 139 
     | 
    
         
            +
            		tags.each_with_index do |tag, n_operation|
         
     | 
| 
      
 140 
     | 
    
         
            +
            			#puts tag.inspect
         
     | 
| 
      
 141 
     | 
    
         
            +
            			if do_align
         
     | 
| 
      
 142 
     | 
    
         
            +
            				gap_shift = 0 if tag[OPERATION] != 'G'
         
     | 
| 
      
 143 
     | 
    
         
            +
            				query_alignment << query_seq[counter_query, tag[QUERY]]
         
     | 
| 
      
 144 
     | 
    
         
            +
            				target_alignment << target_seq[counter_target, tag[TARGET]].translate
         
     | 
| 
      
 145 
     | 
    
         
            +
            			end
         
     | 
| 
      
 146 
     | 
    
         
            +
            			if tag[OPERATION] == 'F'
         
     | 
| 
      
 147 
     | 
    
         
            +
            				if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
         
     | 
| 
      
 148 
     | 
    
         
            +
            					gap_shift += 1
         
     | 
| 
      
 149 
     | 
    
         
            +
            					if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
         
     | 
| 
      
 150 
     | 
    
         
            +
            						query_alignment <<  '-' if do_align
         
     | 
| 
      
 151 
     | 
    
         
            +
            					end
         
     | 
| 
      
 152 
     | 
    
         
            +
            				else
         
     | 
| 
      
 153 
     | 
    
         
            +
            					query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
         
     | 
| 
      
 154 
     | 
    
         
            +
            				end
         
     | 
| 
      
 155 
     | 
    
         
            +
            				query_frameshift << counter_query
         
     | 
| 
      
 156 
     | 
    
         
            +
            				fs_counter_target = counter_target
         
     | 
| 
      
 157 
     | 
    
         
            +
            				fs_counter_target = target_seq.length - counter_target if features['target_strand'] == '-' # ESto es un apaño, habria que plantear el parseo de las reversas como reduccion en el contador del formato del exonerate, en vez de como adiccion
         
     | 
| 
      
 158 
     | 
    
         
            +
            				if tag[TARGET] > 3
         
     | 
| 
      
 159 
     | 
    
         
            +
            					real_fs = tag[TARGET]%3
         
     | 
| 
      
 160 
     | 
    
         
            +
            					real_gap = tag[TARGET] - real_fs
         
     | 
| 
      
 161 
     | 
    
         
            +
            					fs = [fs_counter_target + real_gap, real_fs]
         
     | 
| 
      
 162 
     | 
    
         
            +
            				else
         
     | 
| 
      
 163 
     | 
    
         
            +
            					fs = [fs_counter_target, tag[TARGET]]
         
     | 
| 
      
 164 
     | 
    
         
            +
            				end
         
     | 
| 
      
 165 
     | 
    
         
            +
            				target_frameshift << fs
         
     | 
| 
      
 166 
     | 
    
         
            +
            			elsif tag[OPERATION] == 'G'
         
     | 
| 
      
 167 
     | 
    
         
            +
            				query_alignment <<  '-' * (tag[TARGET]/3.0).ceil if do_align
         
     | 
| 
      
 168 
     | 
    
         
            +
            				diff = tag[QUERY] - gap_shift
         
     | 
| 
      
 169 
     | 
    
         
            +
            				target_alignment << '-' * diff if do_align && diff > 0
         
     | 
| 
      
 170 
     | 
    
         
            +
            				gap_shift = 0
         
     | 
| 
      
 171 
     | 
    
         
            +
            			end	
         
     | 
| 
      
 172 
     | 
    
         
            +
            			counter_query += tag[QUERY]
         
     | 
| 
      
 173 
     | 
    
         
            +
            			counter_target += tag[TARGET]
         
     | 
| 
      
 174 
     | 
    
         
            +
            		end
         
     | 
| 
      
 175 
     | 
    
         
            +
            		hit.s_frameshift = query_frameshift
         
     | 
| 
      
 176 
     | 
    
         
            +
            		hit.q_frameshift = target_frameshift
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
            		#puts "\e[33m#{target_alignment}\e[0m", "\e[36m#{query_alignment}\e[0m"
         
     | 
| 
      
 179 
     | 
    
         
            +
            		if do_align
         
     | 
| 
      
 180 
     | 
    
         
            +
            			hit.q_seq = target_alignment
         
     | 
| 
      
 181 
     | 
    
         
            +
            			hit.s_seq = query_alignment
         
     | 
| 
      
 182 
     | 
    
         
            +
            			hit.align_len = query_alignment.length
         
     | 
| 
      
 183 
     | 
    
         
            +
            			hit.ident = set_ident(target_alignment,query_alignment)
         
     | 
| 
      
 184 
     | 
    
         
            +
            		end
         
     | 
| 
      
 185 
     | 
    
         
            +
            	end #def
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
            	def do_reverso_complementary(counter_target, target_seq)
         
     | 
| 
      
 188 
     | 
    
         
            +
            		counter_target = target_seq.length - 1 - counter_target 
         
     | 
| 
      
 189 
     | 
    
         
            +
            		target_seq = target_seq.complementary_dna
         
     | 
| 
      
 190 
     | 
    
         
            +
            		return counter_target, target_seq
         
     | 
| 
      
 191 
     | 
    
         
            +
            	end
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
            	def set_ident(target_alignment, query_alignment)
         
     | 
| 
      
 194 
     | 
    
         
            +
            		matchs = 0
         
     | 
| 
      
 195 
     | 
    
         
            +
            		position = 0
         
     | 
| 
      
 196 
     | 
    
         
            +
            		target_alignment.each_char do |char|
         
     | 
| 
      
 197 
     | 
    
         
            +
            			matchs +=1 if char == query_alignment[position]
         
     | 
| 
      
 198 
     | 
    
         
            +
            			position +=1
         
     | 
| 
      
 199 
     | 
    
         
            +
            		end
         
     | 
| 
      
 200 
     | 
    
         
            +
            		perc_ident = ('%.2f' % (matchs*100.0/target_alignment.length)).to_f
         
     | 
| 
      
 201 
     | 
    
         
            +
            		return perc_ident
         
     | 
| 
      
 202 
     | 
    
         
            +
            	end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
            	def define_hit_parameters(hit, features, tags)
         
     | 
| 
      
 205 
     | 
    
         
            +
            		hit.gaps = 0
         
     | 
| 
      
 206 
     | 
    
         
            +
            		tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
         
     | 
| 
      
 207 
     | 
    
         
            +
            		hit.reversed = FALSE
         
     | 
| 
      
 208 
     | 
    
         
            +
            		hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1 
         
     | 
| 
      
 209 
     | 
    
         
            +
            		hit.mismatches=0
         
     | 
| 
      
 210 
     | 
    
         
            +
            		hit.e_val=0
         
     | 
| 
      
 211 
     | 
    
         
            +
            		hit.bit_score=0
         
     | 
| 
      
 212 
     | 
    
         
            +
            		hit.score = features['score']
         
     | 
| 
      
 213 
     | 
    
         
            +
            		hit.s_frame = nil
         
     | 
| 
      
 214 
     | 
    
         
            +
            		strand = 1
         
     | 
| 
      
 215 
     | 
    
         
            +
            		strand = -1 if features['target_strand'] == '-'
         
     | 
| 
      
 216 
     | 
    
         
            +
            		hit.q_frame = (((features['target_start_align']) % 3) +1) *strand
         
     | 
| 
      
 217 
     | 
    
         
            +
            		hit.subject_id = features['query_id']
         
     | 
| 
      
 218 
     | 
    
         
            +
            		hit.full_subject_length=0
         
     | 
| 
      
 219 
     | 
    
         
            +
            		hit.definition=''
         
     | 
| 
      
 220 
     | 
    
         
            +
            		hit.acc=features['query_id']
         
     | 
| 
      
 221 
     | 
    
         
            +
            		hit.q_seq=''
         
     | 
| 
      
 222 
     | 
    
         
            +
            		hit.s_seq=''
         
     | 
| 
      
 223 
     | 
    
         
            +
            	end
         
     | 
| 
      
 224 
     | 
    
         
            +
             
     | 
| 
      
 225 
     | 
    
         
            +
              # inspect results
         
     | 
| 
      
 226 
     | 
    
         
            +
              def inspect
         
     | 
| 
      
 227 
     | 
    
         
            +
                res = "Exonerate results:\n"
         
     | 
| 
      
 228 
     | 
    
         
            +
                res+= '-'*20
         
     | 
| 
      
 229 
     | 
    
         
            +
                res+= "\nQuerys: #{@querys.count}\n"
         
     | 
| 
      
 230 
     | 
    
         
            +
                @querys.each{|q| res+=q.inspect+"\n"}
         
     | 
| 
      
 231 
     | 
    
         
            +
                return res
         
     | 
| 
      
 232 
     | 
    
         
            +
              end
         
     | 
| 
      
 233 
     | 
    
         
            +
             
     | 
| 
      
 234 
     | 
    
         
            +
              # find query by name
         
     | 
| 
      
 235 
     | 
    
         
            +
              def find_query(querys,name_q)
         
     | 
| 
      
 236 
     | 
    
         
            +
                #  newq = querys.find{|q| ( q.find{|h| (h.subject_id)})}
         
     | 
| 
      
 237 
     | 
    
         
            +
                new_q=nil
         
     | 
| 
      
 238 
     | 
    
         
            +
             
     | 
| 
      
 239 
     | 
    
         
            +
                if !querys.empty?
         
     | 
| 
      
 240 
     | 
    
         
            +
                  new_q=querys.find{|q| (q.query_id==name_q)}
         
     | 
| 
      
 241 
     | 
    
         
            +
                end
         
     | 
| 
      
 242 
     | 
    
         
            +
             
     | 
| 
      
 243 
     | 
    
         
            +
                return new_q
         
     | 
| 
      
 244 
     | 
    
         
            +
              end
         
     | 
| 
      
 245 
     | 
    
         
            +
             
     | 
| 
      
 246 
     | 
    
         
            +
              # check if there are querys
         
     | 
| 
      
 247 
     | 
    
         
            +
              def empty?
         
     | 
| 
      
 248 
     | 
    
         
            +
             
     | 
| 
      
 249 
     | 
    
         
            +
                return @querys.empty?
         
     | 
| 
      
 250 
     | 
    
         
            +
              end
         
     | 
| 
      
 251 
     | 
    
         
            +
             
     | 
| 
      
 252 
     | 
    
         
            +
              # get query count
         
     | 
| 
      
 253 
     | 
    
         
            +
              def size
         
     | 
| 
      
 254 
     | 
    
         
            +
                @querys.size
         
     | 
| 
      
 255 
     | 
    
         
            +
              end
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
              attr_accessor :querys
         
     | 
| 
      
 258 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,688 +1,297 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require 'types'
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'une_los_hit'
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            module FlAnalysis
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
     | 
    
         
            -
            	 
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
            		 
     | 
| 
       10 
     | 
    
         
            -
            		#  
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
            		 
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
            		#  
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
            	$global_warnings = []
         
     | 
| 
      
 7 
     | 
    
         
            +
             
         
     | 
| 
      
 8 
     | 
    
         
            +
            	def analiza_orf_y_fl(seq, hit, options, db_name)
         
     | 
| 
      
 9 
     | 
    
         
            +
            		query_fasta = seq.seq_fasta.upcase.dup # Upcase for prevents complications with masked sequences, dup for discard changes
         
     | 
| 
      
 10 
     | 
    
         
            +
            		if hit.count > 1 # if the sequence has more than one hit, the frames are checked and fixed to get a single hit
         
     | 
| 
      
 11 
     | 
    
         
            +
            				seq_unida = UneLosHit.new(hit, query_fasta)
         
     | 
| 
      
 12 
     | 
    
         
            +
            				full_prot =		seq_unida.full_prot	
         
     | 
| 
      
 13 
     | 
    
         
            +
            				query_fasta =	seq_unida.output_seq	# repaired fasta
         
     | 
| 
      
 14 
     | 
    
         
            +
            				final_hit =		seq_unida.final_hit		# single hit
         
     | 
| 
      
 15 
     | 
    
         
            +
            				$global_warnings +=	seq_unida.msgs		# warning messages
         
     | 
| 
      
 16 
     | 
    
         
            +
            		else
         
     | 
| 
      
 17 
     | 
    
         
            +
            			query_fasta = reverse_seq(query_fasta, hit.first) if hit.first.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
         
     | 
| 
      
 18 
     | 
    
         
            +
            			final_hit = hit.first # single hit
         
     | 
| 
       17 
19 
     | 
    
         
             
            		end
         
     | 
| 
       18 
     | 
    
         
            -
            		
         
     | 
| 
       19 
     | 
    
         
            -
            		q=blast_query
         
     | 
| 
       20 
     | 
    
         
            -
            		msgs = ''
         
     | 
| 
       21 
     | 
    
         
            -
            		atg_status = ''
         
     | 
| 
       22 
     | 
    
         
            -
            		end_status = ''
         
     | 
| 
       23 
     | 
    
         
            -
            		final_status = ''
         
     | 
| 
       24 
     | 
    
         
            -
            		
         
     | 
| 
       25 
     | 
    
         
            -
            		# the fasta sequence is saved
         
     | 
| 
       26 
     | 
    
         
            -
            		query_fasta = seq.seq_fasta
         
     | 
| 
      
 20 
     | 
    
         
            +
            		query_fasta = exonerate_fix_frame_shift(query_fasta, hit) if options[:exonerate]	
         
     | 
| 
       27 
21 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
            		 
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
            				if (seq.get_annotations(:tmp_annotation).empty?)
         
     | 
| 
       34 
     | 
    
         
            -
            					if (seq.sec_desc.empty?)
         
     | 
| 
       35 
     | 
    
         
            -
            						seq.annotate(:apply_tcode,'')
         
     | 
| 
       36 
     | 
    
         
            -
            					else
         
     | 
| 
       37 
     | 
    
         
            -
            						seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
         
     | 
| 
       38 
     | 
    
         
            -
            					end
         
     | 
| 
       39 
     | 
    
         
            -
            				else
         
     | 
| 
       40 
     | 
    
         
            -
            					save_last_db_annotations(seq)
         
     | 
| 
       41 
     | 
    
         
            -
            				end
         
     | 
| 
       42 
     | 
    
         
            -
            			end
         
     | 
| 
       43 
     | 
    
         
            -
            			
         
     | 
| 
       44 
     | 
    
         
            -
            			return
         
     | 
| 
       45 
     | 
    
         
            -
            		end
         
     | 
| 
       46 
     | 
    
         
            -
            #----------------------------------------------------------------------------------------------------------
         
     | 
| 
       47 
     | 
    
         
            -
            		warnings = ''
         
     | 
| 
       48 
     | 
    
         
            -
            		errors = ''
         
     | 
| 
       49 
     | 
    
         
            -
            		wrong_seq = false
         
     | 
| 
      
 22 
     | 
    
         
            +
            		full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate
         
     | 
| 
      
 23 
     | 
    
         
            +
            		original_query_coordinates = [final_hit.q_beg, final_hit.q_end] ## VERBOSE
         
     | 
| 
      
 24 
     | 
    
         
            +
            		seq.show_alignment(final_hit, query_fasta, show_nts) if  $verbose > 2 ## VERBOSE
         
     | 
| 
      
 25 
     | 
    
         
            +
            		atg_status, tmp_prot = set_start_codon(final_hit, options[:distance], full_prot, query_fasta)
         
     | 
| 
      
 26 
     | 
    
         
            +
            		end_status, final_prot = find_end(final_hit, options[:distance], tmp_prot, query_fasta)
         
     | 
| 
       50 
27 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
            		 
     | 
| 
       52 
     | 
    
         
            -
            		if  
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
            			seq_unida = UneLosHit.new(q, query_fasta, pident_threshold)
         
     | 
| 
       55 
     | 
    
         
            -
            			
         
     | 
| 
       56 
     | 
    
         
            -
            			wrong_seq = seq_unida.wrong_seq
         
     | 
| 
       57 
     | 
    
         
            -
            			is_ok = seq_unida.is_ok
         
     | 
| 
       58 
     | 
    
         
            -
            			q_index_start = seq_unida.q_index_start
         
     | 
| 
       59 
     | 
    
         
            -
            			full_prot = seq_unida.full_prot
         
     | 
| 
       60 
     | 
    
         
            -
            			
         
     | 
| 
       61 
     | 
    
         
            -
            			query_fasta = seq_unida.output_seq # repaired fasta
         
     | 
| 
       62 
     | 
    
         
            -
            			
         
     | 
| 
       63 
     | 
    
         
            -
            			final_hit = seq_unida.final_hit # single hit
         
     | 
| 
       64 
     | 
    
         
            -
            			msgs = seq_unida.msgs # warning messages
         
     | 
| 
       65 
     | 
    
         
            -
            			x_number = seq_unida.number_x # number of nucleotides used to fix frame errors
         
     | 
| 
       66 
     | 
    
         
            -
            			
         
     | 
| 
       67 
     | 
    
         
            -
            		else # if there is only one hit
         
     | 
| 
      
 28 
     | 
    
         
            +
            		puts "\n------------------- POST EXTENSION---------------------" if $verbose > 1 ## VERBOSE
         
     | 
| 
      
 29 
     | 
    
         
            +
            		seq.show_alignment(final_hit, query_fasta, show_nts, original_query_coordinates) if  $verbose > 1 ## VERBOSE
         
     | 
| 
      
 30 
     | 
    
         
            +
            		puts "ATG: #{atg_status}  STOP: #{end_status}" if  $verbose > 2 ## VERBOSE
         
     | 
| 
       68 
31 
     | 
    
         | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
            			
         
     | 
| 
       74 
     | 
    
         
            -
            			final_hit = q.hits[0] # single hit
         
     | 
| 
       75 
     | 
    
         
            -
            			x_number = 0 # number of nucleotides used to fix frame errors
         
     | 
| 
       76 
     | 
    
         
            -
            			
         
     | 
| 
       77 
     | 
    
         
            -
            			full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate
         
     | 
| 
       78 
     | 
    
         
            -
            			(is_ok, q_index_start) = contenidos_en_prot(final_hit, full_prot, q)
         
     | 
| 
       79 
     | 
    
         
            -
            		end
         
     | 
| 
       80 
     | 
    
         
            -
            		# test_final_hit(final_hit, query_fasta)
         
     | 
| 
       81 
     | 
    
         
            -
            #----------------------------------------------------------------------------------------------------------
         
     | 
| 
       82 
     | 
    
         
            -
            		if wrong_seq
         
     | 
| 
       83 
     | 
    
         
            -
            			warnings = "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence, " + warnings
         
     | 
| 
       84 
     | 
    
         
            -
            			# puts "ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence"
         
     | 
| 
       85 
     | 
    
         
            -
            			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#1\tcontains sense and antisense hits!!!, putative chimeric sequence, "
         
     | 
| 
       86 
     | 
    
         
            -
            			error_log(q, seq, warnings, db_name)
         
     | 
| 
       87 
     | 
    
         
            -
            			return
         
     | 
| 
       88 
     | 
    
         
            -
            		end
         
     | 
| 
       89 
     | 
    
         
            -
            		#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       90 
     | 
    
         
            -
            		warnings += msgs
         
     | 
| 
       91 
     | 
    
         
            -
            		msgs = ''
         
     | 
| 
       92 
     | 
    
         
            -
            		#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       93 
     | 
    
         
            -
            		if (x_number < 0)
         
     | 
| 
       94 
     | 
    
         
            -
            			warnings = "ERROR#2, unexpected negative index in x_number, " + warnings
         
     | 
| 
       95 
     | 
    
         
            -
            			# puts "ERROR#2, unexpected negative index in x_number"
         
     | 
| 
       96 
     | 
    
         
            -
            			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#2\tunexpected negative index in x_number, "
         
     | 
| 
       97 
     | 
    
         
            -
            			error_log(q, seq, warnings, db_name)
         
     | 
| 
       98 
     | 
    
         
            -
            			return
         
     | 
| 
       99 
     | 
    
         
            -
            		end
         
     | 
| 
       100 
     | 
    
         
            -
            		#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       101 
     | 
    
         
            -
            		if (!is_ok)
         
     | 
| 
       102 
     | 
    
         
            -
            			warnings = "ERROR#3, very serious frame error, " + warnings
         
     | 
| 
       103 
     | 
    
         
            -
            			# puts "#{q.query_def} ERROR#3, hit was NOT found in the protein"
         
     | 
| 
       104 
     | 
    
         
            -
            			errors = "#{db_name}\t#{q.hits[0].acc}\tERROR#3\thit was NOT found in the protein, "
         
     | 
| 
       105 
     | 
    
         
            -
            			# error_log(q, seq, warnings, db_name)
         
     | 
| 
       106 
     | 
    
         
            -
            			# return
         
     | 
| 
       107 
     | 
    
         
            -
            		end
         
     | 
| 
       108 
     | 
    
         
            -
            #----------------------------------------------------------------------------------------------------------
         
     | 
| 
       109 
     | 
    
         
            -
            		fiable = false
         
     | 
| 
       110 
     | 
    
         
            -
            		if ((final_hit.ident >= pident_threshold) && (final_hit.e_val <= evalue_threshold))
         
     | 
| 
       111 
     | 
    
         
            -
            			fiable = true
         
     | 
| 
      
 32 
     | 
    
         
            +
            		# decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus)
         
     | 
| 
      
 33 
     | 
    
         
            +
            		type, status = determine_status(atg_status, end_status)
         
     | 
| 
      
 34 
     | 
    
         
            +
            		status = compare_seq_length_with_subject(final_prot, options[:distance], final_hit, type, status)
         
     | 
| 
      
 35 
     | 
    
         
            +
            		if final_prot.length >= 25 && final_prot.length.to_f/final_hit.full_subject_length >= options[:subject_coverage] # Prot length min of 25 aa and subject coverage by generated prot of 25%
         
     | 
| 
      
 36 
     | 
    
         
            +
            			save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name)
         
     | 
| 
       112 
37 
     | 
    
         
             
            		end
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
            		if (final_hit.q_beg/3 + aas_n_end >= final_hit.s_beg.to_i) 
         
     | 
| 
       115 
     | 
    
         
            -
            			substring = full_prot[0, q_index_start + 10]
         
     | 
| 
       116 
     | 
    
         
            -
            			resto_substring = full_prot[q_index_start + 10, full_prot.length - q_index_start - 10]
         
     | 
| 
      
 38 
     | 
    
         
            +
            	end
         
     | 
| 
       117 
39 
     | 
    
         | 
| 
       118 
     | 
    
         
            -
            			# to look for the beginning of the protein
         
     | 
| 
       119 
     | 
    
         
            -
            			(m_substring, atg_status, msgs) = find_start(final_hit.s_beg, substring, fiable, aas_n_end)
         
     | 
| 
       120 
40 
     | 
    
         | 
| 
       121 
     | 
    
         
            -
             
     | 
| 
       122 
     | 
    
         
            -
             
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
             
     | 
| 
      
 41 
     | 
    
         
            +
            	def set_start_codon(final_hit, distance, full_prot, query_fasta)
         
     | 
| 
      
 42 
     | 
    
         
            +
            		q_index_start = contenidos_en_prot(final_hit.q_seq, full_prot) 
         
     | 
| 
      
 43 
     | 
    
         
            +
            		atg_status = nil
         
     | 
| 
      
 44 
     | 
    
         
            +
            		_5prima = q_index_start + distance
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            		if  final_hit.s_beg == 0 && final_hit.q_seq[0] == 'M' && final_hit.s_seq[0] == 'M' #there is M in query and subject at first position of alignment and subject's M is in first position
         
     | 
| 
      
 47 
     | 
    
         
            +
            			atg_status = 'complete'
         
     | 
| 
      
 48 
     | 
    
         
            +
            			tmp_prot = full_prot[q_index_start..full_prot.length]
         
     | 
| 
      
 49 
     | 
    
         
            +
            		elsif _5prima >= final_hit.s_beg
         
     | 
| 
      
 50 
     | 
    
         
            +
            			amine_seq = full_prot[0, _5prima] #Contiene parte amino de la proteina
         
     | 
| 
      
 51 
     | 
    
         
            +
            			carboxile_seq = full_prot[_5prima, full_prot.length - _5prima] #Contiene parte carboxilo de la proteina hasta el fin de la secuencia
         
     | 
| 
      
 52 
     | 
    
         
            +
            			length_before_cut = amine_seq.length
         
     | 
| 
      
 53 
     | 
    
         
            +
            			amine_seq, atg_status = find_start(final_hit.s_beg, amine_seq, distance) # to look for the beginning of the protein
         
     | 
| 
      
 54 
     | 
    
         
            +
            			tmp_prot = "#{amine_seq}#{carboxile_seq}" # merge seqs in prot
         
     | 
| 
      
 55 
     | 
    
         
            +
            			new_q_beg = final_hit.q_frame-1 + (length_before_cut - amine_seq.length) * 3
         
     | 
| 
      
 56 
     | 
    
         
            +
            			modify_5p_align(new_q_beg, final_hit, query_fasta)	if  $verbose > 1 ## VERBOSE, Modify query align
         
     | 
| 
      
 57 
     | 
    
         
            +
            			final_hit.q_beg = new_q_beg # to get the value of the start_ORF index
         
     | 
| 
       125 
58 
     | 
    
         
             
            		else
         
     | 
| 
       126 
     | 
    
         
            -
            			 
     | 
| 
       127 
     | 
    
         
            -
            				# puts "beginning too short!"
         
     | 
| 
       128 
     | 
    
         
            -
            			# end
         
     | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
      
 59 
     | 
    
         
            +
            			$global_warnings << 'UnexpStopBegSeq' if full_prot[0, q_index_start].rindex('*')
         
     | 
| 
       130 
60 
     | 
    
         
             
            			atg_status = 'incomplete'
         
     | 
| 
       131 
     | 
    
         
            -
            			substring = full_prot[0, q_index_start]
         
     | 
| 
       132 
     | 
    
         
            -
            			distance_s_atg = (final_hit.s_beg.to_i - final_hit.q_beg/3) + 1
         
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
            			if (substring.rindex('*'))
         
     | 
| 
       135 
     | 
    
         
            -
            				warnings += "Unexpected stop codon in the beginning of your sequence, "
         
     | 
| 
       136 
     | 
    
         
            -
            				# if (@verbose)
         
     | 
| 
       137 
     | 
    
         
            -
            					# puts "#{db_name} -- #{q.query_def} --> Unexpected stop codon in the beginning of your sequence"
         
     | 
| 
       138 
     | 
    
         
            -
            				# end
         
     | 
| 
       139 
     | 
    
         
            -
            			end
         
     | 
| 
       140 
     | 
    
         
            -
             
     | 
| 
       141 
     | 
    
         
            -
            			final_hit.q_beg = final_hit.q_beg.to_i - (substring.length * 3)
         
     | 
| 
       142 
61 
     | 
    
         
             
            			tmp_prot = full_prot
         
     | 
| 
       143 
62 
     | 
    
         
             
            		end
         
     | 
| 
       144 
     | 
    
         
            -
            #----------------------------------------------------------------------------------------------------------
         
     | 
| 
       145 
     | 
    
         
            -
            		# look for the end of the protein
         
     | 
| 
       146 
     | 
    
         
            -
            		(resto_substring, end_substring, end_status, warnings, putative_end) = find_end(final_hit, q, full_prot, tmp_prot, end_status, warnings, aas_n_end)
         
     | 
| 
       147 
     | 
    
         
            -
            #----------------------------------------------------------------------------------------------------------
         
     | 
| 
       148 
     | 
    
         
            -
            		final_prot = "#{resto_substring}#{end_substring}"
         
     | 
| 
       149 
     | 
    
         
            -
            		
         
     | 
| 
       150 
     | 
    
         
            -
            		warnings += msgs
         
     | 
| 
       151 
     | 
    
         
            -
            		
         
     | 
| 
       152 
     | 
    
         
            -
            		# to get the value of the end_ORF index
         
     | 
| 
       153 
     | 
    
         
            -
            		if (atg_status == 'complete')
         
     | 
| 
       154 
     | 
    
         
            -
            			final_hit.q_end = final_hit.q_beg - 3 + (final_prot.length * 3)
         
     | 
| 
       155 
     | 
    
         
            -
            		else
         
     | 
| 
       156 
     | 
    
         
            -
            			if (putative_end)
         
     | 
| 
       157 
     | 
    
         
            -
            				final_hit.q_end = final_hit.q_end - 45 + (putative_end*3)
         
     | 
| 
       158 
     | 
    
         
            -
            			end
         
     | 
| 
       159 
     | 
    
         
            -
            		end
         
     | 
| 
       160 
     | 
    
         
            -
            		
         
     | 
| 
       161 
     | 
    
         
            -
            #--------------------------------------------------------------------------------------------------------------
         
     | 
| 
       162 
     | 
    
         
            -
            		# decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus)
         
     | 
| 
       163 
     | 
    
         
            -
            		final_status = determine_status(atg_status,end_status)
         
     | 
| 
       164 
     | 
    
         
            -
            		#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       165 
     | 
    
         
            -
            		if (final_prot.length - 2*aas_n_end > final_hit.full_subject_length)
         
     | 
| 
       166 
     | 
    
         
            -
            			warnings += " your sequence is longer than subject: #{final_prot.length} - #{final_hit.full_subject_length}"
         
     | 
| 
       167 
63 
     | 
    
         | 
| 
       168 
     | 
    
         
            -
            		 
     | 
| 
       169 
     | 
    
         
            -
            			warnings += " your sequence is shorter than subject: #{final_prot.length} - #{final_hit.full_subject_length}"
         
     | 
| 
       170 
     | 
    
         
            -
            			if (final_prot.length + 100 < final_hit.full_subject_length) || (final_prot.length*2 < final_hit.full_subject_length)
         
     | 
| 
       171 
     | 
    
         
            -
            				
         
     | 
| 
       172 
     | 
    
         
            -
            				if (final_status == 'Complete')
         
     | 
| 
       173 
     | 
    
         
            -
            					final_status = 'Putative Complete'
         
     | 
| 
       174 
     | 
    
         
            -
            					warnings += ". Was predicted as Complete, but is very much shorter than de subject"
         
     | 
| 
       175 
     | 
    
         
            -
            					# if (@verbose)
         
     | 
| 
       176 
     | 
    
         
            -
            					# 	puts "#{db_name} -- #{q.query_def} --> your sequence is 100 aas shorter than the subject or shorter than the half length of the subject"
         
     | 
| 
       177 
     | 
    
         
            -
            					# end
         
     | 
| 
       178 
     | 
    
         
            -
            				end
         
     | 
| 
       179 
     | 
    
         
            -
            			end
         
     | 
| 
       180 
     | 
    
         
            -
            		end
         
     | 
| 
       181 
     | 
    
         
            -
            		
         
     | 
| 
       182 
     | 
    
         
            -
            		# test_final_hit(final_hit, query_fasta)
         
     | 
| 
       183 
     | 
    
         
            -
            		print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
         
     | 
| 
       184 
     | 
    
         
            -
            		
         
     | 
| 
       185 
     | 
    
         
            -
            	end
         
     | 
| 
       186 
     | 
    
         
            -
             
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
            	def test_blast_hits(q)
         
     | 
| 
       189 
     | 
    
         
            -
            		
         
     | 
| 
       190 
     | 
    
         
            -
            			puts "query_def: #{q.query_def} full_query_length: #{q.full_query_length} ------------------------------------------------"
         
     | 
| 
       191 
     | 
    
         
            -
            			
         
     | 
| 
       192 
     | 
    
         
            -
            			q.hits.each do |h|
         
     | 
| 
       193 
     | 
    
         
            -
            				puts "\t subject_id: #{h.acc}"
         
     | 
| 
       194 
     | 
    
         
            -
            				puts "\t acc: #{h.acc}"
         
     | 
| 
       195 
     | 
    
         
            -
            				puts "\t full_subject_length: #{h.full_subject_length}"
         
     | 
| 
       196 
     | 
    
         
            -
            				puts "\t q_beg: #{h.q_beg + 1}"
         
     | 
| 
       197 
     | 
    
         
            -
            				puts "\t q_end: #{h.q_end + 1}"
         
     | 
| 
       198 
     | 
    
         
            -
            				puts "\t q_frame: #{h.q_frame}"
         
     | 
| 
       199 
     | 
    
         
            -
            				puts "\t s_beg: #{h.s_beg + 1}"
         
     | 
| 
       200 
     | 
    
         
            -
            				puts "\t s_end: #{h.s_end + 1}"
         
     | 
| 
       201 
     | 
    
         
            -
            				puts "\t s_frame: #{h.s_frame}"
         
     | 
| 
       202 
     | 
    
         
            -
            				puts "\t align_len: #{h.align_len}"
         
     | 
| 
       203 
     | 
    
         
            -
            				puts "\t gaps: #{h.gaps}"
         
     | 
| 
       204 
     | 
    
         
            -
            				puts "\t mismatches: #{h.mismatches}"
         
     | 
| 
       205 
     | 
    
         
            -
            				puts "\t reversed: #{h.reversed}"
         
     | 
| 
       206 
     | 
    
         
            -
            				puts "\t score: #{h.score}"
         
     | 
| 
       207 
     | 
    
         
            -
            				puts "\t bit_score: #{h.bit_score}"
         
     | 
| 
       208 
     | 
    
         
            -
            				puts "\t ident: #{h.ident}"
         
     | 
| 
       209 
     | 
    
         
            -
            				puts "\t e_val: #{h.e_val}"
         
     | 
| 
       210 
     | 
    
         
            -
            				puts "\t definition: #{h.definition}"
         
     | 
| 
       211 
     | 
    
         
            -
            				puts "\t q_seq: #{h.q_seq}"
         
     | 
| 
       212 
     | 
    
         
            -
            				puts "\t s_seq: #{h.s_seq}"
         
     | 
| 
       213 
     | 
    
         
            -
            				
         
     | 
| 
       214 
     | 
    
         
            -
            			end
         
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
            	end
         
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
             
     | 
| 
       219 
     | 
    
         
            -
            	def test_final_hit(final_hit, query_fasta)
         
     | 
| 
       220 
     | 
    
         
            -
            		
         
     | 
| 
       221 
     | 
    
         
            -
            		puts "\t acc: #{final_hit.acc}"
         
     | 
| 
       222 
     | 
    
         
            -
            		puts "\t full_subject_length: #{final_hit.full_subject_length}"
         
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
       224 
     | 
    
         
            -
            		puts "\n\t q_frame: #{final_hit.q_frame}"
         
     | 
| 
       225 
     | 
    
         
            -
            		puts "\t reversed: #{final_hit.reversed}"
         
     | 
| 
       226 
     | 
    
         
            -
            		
         
     | 
| 
       227 
     | 
    
         
            -
            		puts "\n\t q_beg-q_end: #{final_hit.q_beg + 1} - #{final_hit.q_end + 1}"
         
     | 
| 
       228 
     | 
    
         
            -
            		puts "\t s_beg - s_end: #{final_hit.s_beg + 1} - #{final_hit.s_end + 1}"
         
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
            		puts "\n\t score: #{final_hit.score}, bit_score: #{final_hit.bit_score}, ident: #{final_hit.ident}, e_val: #{final_hit.e_val}"
         
     | 
| 
       231 
     | 
    
         
            -
             
     | 
| 
       232 
     | 
    
         
            -
            		puts "\n\t definition: #{final_hit.definition}"
         
     | 
| 
       233 
     | 
    
         
            -
            		puts "\t q_seq: #{final_hit.q_seq}"
         
     | 
| 
       234 
     | 
    
         
            -
            		puts "\t s_seq: #{final_hit.s_seq}"
         
     | 
| 
       235 
     | 
    
         
            -
            		
         
     | 
| 
       236 
     | 
    
         
            -
            		puts "\nnt q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end]}"
         
     | 
| 
       237 
     | 
    
         
            -
            		puts "\n\nprot q_beg-q_end\n#{query_fasta[final_hit.q_beg..final_hit.q_end].translate}"
         
     | 
| 
       238 
     | 
    
         
            -
            		
         
     | 
| 
      
 64 
     | 
    
         
            +
            		return atg_status, tmp_prot
         
     | 
| 
       239 
65 
     | 
    
         
             
            	end
         
     | 
| 
       240 
66 
     | 
    
         | 
| 
       241 
67 
     | 
    
         | 
| 
       242 
     | 
    
         
            -
            	def  
     | 
| 
       243 
     | 
    
         
            -
            		#  
     | 
| 
       244 
     | 
    
         
            -
            		
         
     | 
| 
       245 
     | 
    
         
            -
            		if  
     | 
| 
       246 
     | 
    
         
            -
            			 
     | 
| 
       247 
     | 
    
         
            -
             
     | 
| 
       248 
     | 
    
         
            -
             
     | 
| 
       249 
     | 
    
         
            -
             
     | 
| 
       250 
     | 
    
         
            -
             
     | 
| 
       251 
     | 
    
         
            -
             
     | 
| 
       252 
     | 
    
         
            -
            					else
         
     | 
| 
       253 
     | 
    
         
            -
            						seq.annotate(:apply_tcode,'')
         
     | 
| 
       254 
     | 
    
         
            -
            					end
         
     | 
| 
       255 
     | 
    
         
            -
            				else
         
     | 
| 
       256 
     | 
    
         
            -
            					warnings = "Coding sequence with some errors, #{warnings}"
         
     | 
| 
       257 
     | 
    
         
            -
            					tmp_annot = seq.sec_desc.sub('my_warning',"#{warnings}")
         
     | 
| 
       258 
     | 
    
         
            -
            					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
         
     | 
| 
       259 
     | 
    
         
            -
            				end
         
     | 
| 
       260 
     | 
    
         
            -
            			else
         
     | 
| 
       261 
     | 
    
         
            -
            				save_last_db_annotations(seq)
         
     | 
| 
       262 
     | 
    
         
            -
            			end
         
     | 
| 
       263 
     | 
    
         
            -
            		else
         
     | 
| 
       264 
     | 
    
         
            -
            			if (seq.sec_desc.empty?)
         
     | 
| 
       265 
     | 
    
         
            -
            				if (!q.hits[0].definition.nil?)
         
     | 
| 
       266 
     | 
    
         
            -
            					warnings = "Coding sequence with some errors, #{warnings}"
         
     | 
| 
       267 
     | 
    
         
            -
            					seq.sec_desc = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\t#{db_name}\tMisassembled\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
         
     | 
| 
       268 
     | 
    
         
            -
            				end
         
     | 
| 
       269 
     | 
    
         
            -
            			end
         
     | 
| 
       270 
     | 
    
         
            -
            		end
         
     | 
| 
       271 
     | 
    
         
            -
            		
         
     | 
| 
       272 
     | 
    
         
            -
            	end
         
     | 
| 
       273 
     | 
    
         
            -
             
     | 
| 
       274 
     | 
    
         
            -
             
     | 
| 
       275 
     | 
    
         
            -
            	def save_last_db_annotations(seq)
         
     | 
| 
       276 
     | 
    
         
            -
            	
         
     | 
| 
       277 
     | 
    
         
            -
            		# puts "sequence not complete! recovering annotations from previous database! sldba!!"
         
     | 
| 
       278 
     | 
    
         
            -
            		(q, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
         
     | 
| 
       279 
     | 
    
         
            -
            		print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
         
     | 
| 
       280 
     | 
    
         
            -
             
     | 
| 
       281 
     | 
    
         
            -
            		(name,fasta_length,acc,db_name,final_status,testcode,e_val,ident,my_length,subject_length,warnings,q_frame,q_beg,q_end,s_beg,s_end,description,final_prot) = seq.get_annotations(:tmp_annotation).first[:message][0].split("\t")
         
     | 
| 
       282 
     | 
    
         
            -
            		if (final_hit.reversed) 
         
     | 
| 
       283 
     | 
    
         
            -
            			(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
         
     | 
| 
       284 
     | 
    
         
            -
            		end
         
     | 
| 
       285 
     | 
    
         
            -
            	
         
     | 
| 
       286 
     | 
    
         
            -
            		seq.annotate(:protein,seq.get_annotations(:tmp_annotation).first[:message][1])
         
     | 
| 
       287 
     | 
    
         
            -
            		seq.annotate(:alignment,seq.get_annotations(:tmp_annotation).first[:message][2])
         
     | 
| 
       288 
     | 
    
         
            -
            		tmp_annot = "#{name}\t#{fasta_length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
         
     | 
| 
       289 
     | 
    
         
            -
            		seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
         
     | 
| 
       290 
     | 
    
         
            -
            	
         
     | 
| 
       291 
     | 
    
         
            -
            	end
         
     | 
| 
       292 
     | 
    
         
            -
             
     | 
| 
       293 
     | 
    
         
            -
             
     | 
| 
       294 
     | 
    
         
            -
            	def find_start(subject_start, substring, fiable, aas_n_end)
         
     | 
| 
       295 
     | 
    
         
            -
            		
         
     | 
| 
       296 
     | 
    
         
            -
            		tmp_prot = ''
         
     | 
| 
       297 
     | 
    
         
            -
            		msgs = ''
         
     | 
| 
       298 
     | 
    
         
            -
            		atg_status = 'incomplete' # complete, incomplete or putative
         
     | 
| 
       299 
     | 
    
         
            -
             
     | 
| 
       300 
     | 
    
         
            -
            		# puts "\nsubstring (#{substring.length} aas):\n#{substring}"
         
     | 
| 
       301 
     | 
    
         
            -
            		stop_codon = substring.rindex('*')
         
     | 
| 
       302 
     | 
    
         
            -
            		
         
     | 
| 
       303 
     | 
    
         
            -
            		# marcamos la distancia al s_beg desde el principio del substring
         
     | 
| 
       304 
     | 
    
         
            -
            		# s_beg_distance = (substring.length) - subject_start
         
     | 
| 
       305 
     | 
    
         
            -
            		s_beg_distance = (substring.length - 10) - subject_start
         
     | 
| 
       306 
     | 
    
         
            -
            		# marcamos la distancia al s_beg desde el final del substring
         
     | 
| 
       307 
     | 
    
         
            -
            		atg_distance = (subject_start + 1) - (substring.length - 10)
         
     | 
| 
       308 
     | 
    
         
            -
            		if (atg_distance <= 0) 
         
     | 
| 
       309 
     | 
    
         
            -
            			atg_distance = 0
         
     | 
| 
       310 
     | 
    
         
            -
            		else
         
     | 
| 
       311 
     | 
    
         
            -
            			# puts "expected atg_distance = 0, your sequence atg_distance = #{atg_distance}; limit (1-15)"
         
     | 
| 
       312 
     | 
    
         
            -
            			msgs = "atg_distance in limit (1-15): atg_distance = #{atg_distance}, "
         
     | 
| 
       313 
     | 
    
         
            -
            		end
         
     | 
| 
       314 
     | 
    
         
            -
            		
         
     | 
| 
       315 
     | 
    
         
            -
            		# puts "s_beg_distance:#{s_beg_distance}, stop_codon: #{stop_codon}, subject_start: #{subject_start + 1}, atg_distance: #{atg_distance}"
         
     | 
| 
       316 
     | 
    
         
            -
            		#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       317 
     | 
    
         
            -
            		# tenemos un codon de parada en el substring 5 prima
         
     | 
| 
       318 
     | 
    
         
            -
            		if (stop_codon)
         
     | 
| 
       319 
     | 
    
         
            -
            			stop_codon += 1
         
     | 
| 
       320 
     | 
    
         
            -
            			# ahora vamos a ver si el stop esta antes o despues del s_beg
         
     | 
| 
       321 
     | 
    
         
            -
            			if (stop_codon <= s_beg_distance) # esta antes
         
     | 
| 
       322 
     | 
    
         
            -
            				substring = substring[stop_codon, substring.length - stop_codon]
         
     | 
| 
       323 
     | 
    
         
            -
            				# puts "\nhay un codon de parada en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon +1}\n#{substring}\n\n"
         
     | 
| 
       324 
     | 
    
         
            -
            				
         
     | 
| 
       325 
     | 
    
         
            -
            				first_m = substring.index('M')
         
     | 
| 
       326 
     | 
    
         
            -
            				
         
     | 
| 
       327 
     | 
    
         
            -
            				if (first_m) # tenemos M y stop ---------------------------------------------------------------------------
         
     | 
| 
       328 
     | 
    
         
            -
            					substring = substring[first_m, substring.length - first_m]
         
     | 
| 
       329 
     | 
    
         
            -
            					
         
     | 
| 
      
 68 
     | 
    
         
            +
            	def find_start(subject_start, amine_seq, distance)		
         
     | 
| 
      
 69 
     | 
    
         
            +
            		atg_status = 'putative' # complete, incomplete or putative
         
     | 
| 
      
 70 
     | 
    
         
            +
            		stop_codon = amine_seq.rindex('*')
         
     | 
| 
      
 71 
     | 
    
         
            +
            		if !stop_codon.nil? # tenemos un codon de parada en el amine_seq 5 prima
         
     | 
| 
      
 72 
     | 
    
         
            +
            			_5prime_UTR = amine_seq.length - 10 - subject_start # marcamos la distancia al s_beg desde el principio del amine_seq
         
     | 
| 
      
 73 
     | 
    
         
            +
            			amine_seq = amine_seq[stop_codon + 1 .. amine_seq.length - 1]
         
     | 
| 
      
 74 
     | 
    
         
            +
            			first_m = amine_seq.index('M')
         
     | 
| 
      
 75 
     | 
    
         
            +
            			if stop_codon <= _5prime_UTR # Ver si stop está en zona 5 prima UTR
         
     | 
| 
      
 76 
     | 
    
         
            +
            				if first_m # tenemos M 
         
     | 
| 
      
 77 
     | 
    
         
            +
            					amine_seq = amine_seq[first_m .. amine_seq.length - 1]					
         
     | 
| 
       330 
78 
     | 
    
         
             
            					atg_status = 'complete'
         
     | 
| 
       331 
     | 
    
         
            -
            				else # con STOP pero sin M  
     | 
| 
       332 
     | 
    
         
            -
            					 
     | 
| 
       333 
     | 
    
         
            -
            					# puts "there is not a start codon near the expected beginning of your sequence, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
         
     | 
| 
       334 
     | 
    
         
            -
            					msgs += "W1: There is no M at the beginning, "
         
     | 
| 
      
 79 
     | 
    
         
            +
            				else # con STOP pero sin M 
         
     | 
| 
      
 80 
     | 
    
         
            +
            					$global_warnings << 'noM1'
         
     | 
| 
       335 
81 
     | 
    
         
             
            				end
         
     | 
| 
       336 
     | 
    
         
            -
            				#----------------------------------------------------------------------------------------------------------
         
     | 
| 
       337 
82 
     | 
    
         
             
            			else # esta despues, un cambio de fase impide analizar el principio
         
     | 
| 
       338 
     | 
    
         
            -
            				 
     | 
| 
       339 
     | 
    
         
            -
             
     | 
| 
       340 
     | 
    
         
            -
            				if (first_m) # tenemos M y unexpected stop # comentar?
         
     | 
| 
       341 
     | 
    
         
            -
            					substring = substring[first_m, substring.length - first_m] # comentar?
         
     | 
| 
       342 
     | 
    
         
            -
            				end # comentar?
         
     | 
| 
       343 
     | 
    
         
            -
            				# TODO esto se puede cambiar!
         
     | 
| 
       344 
     | 
    
         
            -
            				atg_status = 'putative'
         
     | 
| 
       345 
     | 
    
         
            -
            				msgs += " Unexpected STOP codon in 5 prime region, "
         
     | 
| 
       346 
     | 
    
         
            -
            				# puts "\nhay un codon de parada inesperado en el substring (#{substring.length} aas)\tstop_codon:#{stop_codon}, s_beg_distance: #{s_beg_distance +1}, atg_distance: #{atg_distance}"
         
     | 
| 
      
 83 
     | 
    
         
            +
            				$global_warnings << 'UnexpSTOP5p'
         
     | 
| 
      
 84 
     | 
    
         
            +
             				amine_seq = amine_seq[first_m .. amine_seq.length - 1] if first_m # tenemos M
         
     | 
| 
       347 
85 
     | 
    
         
             
            			end
         
     | 
| 
       348 
     | 
    
         
            -
            			#---------------------------------------------------------------------------------------------------------------
         
     | 
| 
       349 
86 
     | 
    
         
             
            		else # no hay stop codon
         
     | 
| 
       350 
     | 
    
         
            -
            			first_m =  
     | 
| 
       351 
     | 
    
         
            -
            			if  
     | 
| 
       352 
     | 
    
         
            -
            				 
     | 
| 
       353 
     | 
    
         
            -
            				 
     | 
| 
       354 
     | 
    
         
            -
            				 
     | 
| 
       355 
     | 
    
         
            -
             
     | 
| 
       356 
     | 
    
         
            -
            				if (m_distance > aas_n_end*2) # sin STOP, con atg pero muy lejos del inicio que marca el subject ---------------
         
     | 
| 
       357 
     | 
    
         
            -
            					# puts "No stop codon before M and M found is too far from subject M, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
         
     | 
| 
       358 
     | 
    
         
            -
            					msgs += "No stop codon before M and M found is too far from subject M, "
         
     | 
| 
      
 87 
     | 
    
         
            +
            			first_m = amine_seq.index('M')
         
     | 
| 
      
 88 
     | 
    
         
            +
            			if first_m # tenemos M
         
     | 
| 
      
 89 
     | 
    
         
            +
            				amine_seq = amine_seq[first_m .. amine_seq.length - 1]
         
     | 
| 
      
 90 
     | 
    
         
            +
            				m_distance = (subject_start - amine_seq.length).abs - 10
         
     | 
| 
      
 91 
     | 
    
         
            +
            				if m_distance.abs > distance*2 # con atg pero muy lejos del inicio que marca el subject
         
     | 
| 
      
 92 
     | 
    
         
            +
            					$global_warnings << 'NoStopMfar'
         
     | 
| 
       359 
93 
     | 
    
         
             
            					atg_status = 'incomplete'
         
     | 
| 
       360 
     | 
    
         
            -
            				else
         
     | 
| 
       361 
     | 
    
         
            -
            					 
     | 
| 
       362 
     | 
    
         
            -
            						# msgs += "No stop codon before M but high homology subject, "
         
     | 
| 
       363 
     | 
    
         
            -
            						atg_status = 'complete'
         
     | 
| 
       364 
     | 
    
         
            -
            					else # Tenemos M pero no tenemos stop y el ortologo no es fiable -------------------------------------------
         
     | 
| 
       365 
     | 
    
         
            -
            						# puts "No stop codon before M and low homology subject, distance to subject ATG= #{m_distance} aas --> good simil: #{fiable}"
         
     | 
| 
       366 
     | 
    
         
            -
            						msgs += "No stop codon before M and low homology subject, "
         
     | 
| 
       367 
     | 
    
         
            -
            						atg_status = 'putative'
         
     | 
| 
       368 
     | 
    
         
            -
            					end
         
     | 
| 
      
 94 
     | 
    
         
            +
            				else # Tenemos M
         
     | 
| 
      
 95 
     | 
    
         
            +
            					atg_status = 'complete'
         
     | 
| 
       369 
96 
     | 
    
         
             
            				end
         
     | 
| 
       370 
     | 
    
         
            -
            			else # sin M 
     | 
| 
       371 
     | 
    
         
            -
            				 
     | 
| 
       372 
     | 
    
         
            -
            				# puts "your sequence has the subject beginning but there is not start codon at the beginning, distance to subject ATG= #{atg_distance} aas --> good simil: #{fiable}"
         
     | 
| 
       373 
     | 
    
         
            -
            				msgs += "W2: There is no M at the beginning, "
         
     | 
| 
      
 97 
     | 
    
         
            +
            			else # sin M
         
     | 
| 
      
 98 
     | 
    
         
            +
            				$global_warnings << 'noM2'
         
     | 
| 
       374 
99 
     | 
    
         
             
            			end
         
     | 
| 
       375 
100 
     | 
    
         
             
            		end
         
     | 
| 
       376 
     | 
    
         
            -
            		
         
     | 
| 
       377 
     | 
    
         
            -
            		return [substring, atg_status, msgs]
         
     | 
| 
       378 
     | 
    
         
            -
            		
         
     | 
| 
      
 101 
     | 
    
         
            +
            		return amine_seq, atg_status
         
     | 
| 
       379 
102 
     | 
    
         
             
            	end
         
     | 
| 
       380 
103 
     | 
    
         | 
| 
       381 
104 
     | 
    
         | 
| 
       382 
     | 
    
         
            -
            	def find_end(final_hit,  
     | 
| 
       383 
     | 
    
         
            -
            		 
     | 
| 
       384 
     | 
    
         
            -
            		 
     | 
| 
       385 
     | 
    
         
            -
            		 
     | 
| 
       386 
     | 
    
         
            -
            		 
     | 
| 
       387 
     | 
    
         
            -
            		
         
     | 
| 
       388 
     | 
    
         
            -
            		 
     | 
| 
       389 
     | 
    
         
            -
             
     | 
| 
       390 
     | 
    
         
            -
            		 
     | 
| 
       391 
     | 
    
         
            -
             
     | 
| 
       392 
     | 
    
         
            -
             
     | 
| 
       393 
     | 
    
         
            -
            		
         
     | 
| 
       394 
     | 
    
         
            -
            		# si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas)
         
     | 
| 
       395 
     | 
    
         
            -
            		if (sq_end_distance + aas_n_end < 0)
         
     | 
| 
      
 105 
     | 
    
         
            +
            	def find_end(final_hit, max_distance, tmp_prot, query_fasta)
         
     | 
| 
      
 106 
     | 
    
         
            +
            		frame_shift = check_frame_shift(final_hit)
         
     | 
| 
      
 107 
     | 
    
         
            +
            		beg_end_string =(final_hit.q_end-final_hit.q_beg)/3 - max_distance # Begin of terminal region (Coordinate) in tmp_prot
         
     | 
| 
      
 108 
     | 
    
         
            +
            		atg_substring = tmp_prot[0..beg_end_string] # prot without terminal region
         
     | 
| 
      
 109 
     | 
    
         
            +
            		end_substring = tmp_prot[beg_end_string + 1 ..tmp_prot.length-1] #Take 3' of unigen
         
     | 
| 
      
 110 
     | 
    
         
            +
            		#puts "\e[32m\nfinal_hit.q_end-final_hit.q_beg: #{final_hit.q_end-final_hit.q_beg} /3  - max_distance: #{max_distance}\e[0m"
         
     | 
| 
      
 111 
     | 
    
         
            +
            		#puts "\e[33mbeg_end_string: #{beg_end_string}\e[0m"
         
     | 
| 
      
 112 
     | 
    
         
            +
            		#puts "\e[35mtmp_prot.length: #{tmp_prot.length}\e[0m"
         
     | 
| 
      
 113 
     | 
    
         
            +
            		if beg_end_string < 0 || end_substring.nil? #Sequences whose homology is at end of it and dont't exits the 3' part of unigene
         
     | 
| 
      
 114 
     | 
    
         
            +
            			atg_substring = tmp_prot
         
     | 
| 
      
 115 
     | 
    
         
            +
            			end_substring = ''
         
     | 
| 
       396 
116 
     | 
    
         
             
            			end_status = 'incomplete'
         
     | 
| 
       397 
     | 
    
         
            -
             
     | 
| 
       398 
     | 
    
         
            -
             
     | 
| 
       399 
     | 
    
         
            -
             
     | 
| 
       400 
     | 
    
         
            -
             
     | 
| 
       401 
     | 
    
         
            -
             
     | 
| 
       402 
     | 
    
         
            -
             
     | 
| 
       403 
     | 
    
         
            -
            			 
     | 
| 
       404 
     | 
    
         
            -
             
     | 
| 
       405 
     | 
    
         
            -
            				# if (@verbose)
         
     | 
| 
       406 
     | 
    
         
            -
            				# 	puts "#{db_name} -- #{q.query_def} --> Distance to subject end: #{sq_end_distance.abs} aas"
         
     | 
| 
       407 
     | 
    
         
            -
            				# end
         
     | 
| 
       408 
     | 
    
         
            -
            			end
         
     | 
| 
      
 117 
     | 
    
         
            +
            		else
         
     | 
| 
      
 118 
     | 
    
         
            +
            			end_status = 'putative'
         
     | 
| 
      
 119 
     | 
    
         
            +
            			putative_end = end_substring.index('*')
         
     | 
| 
      
 120 
     | 
    
         
            +
            			end_substring = end_substring[0 .. putative_end] if putative_end
         
     | 
| 
      
 121 
     | 
    
         
            +
            			
         
     | 
| 
      
 122 
     | 
    
         
            +
            			s_end_resto = final_hit.s_len - (final_hit.s_end + 1) # en el subject, numero de aas que necesito cubrir
         
     | 
| 
      
 123 
     | 
    
         
            +
            			q_end_resto = (query_fasta.length - final_hit.q_end)/3 # en el query, numero de aas que tengo	
         
     | 
| 
      
 124 
     | 
    
         
            +
            			sq_end_distance = q_end_resto - s_end_resto # La diferencia se hace a partir del final del hit para que el calculo no quede sesgado en caso de que la secuecia este truncada por 5'
         
     | 
| 
       409 
125 
     | 
    
         | 
| 
       410 
     | 
    
         
            -
             
     | 
| 
       411 
     | 
    
         
            -
             
     | 
| 
       412 
     | 
    
         
            -
             
     | 
| 
       413 
     | 
    
         
            -
            				 
     | 
| 
       414 
     | 
    
         
            -
            				
         
     | 
| 
       415 
     | 
    
         
            -
             
     | 
| 
       416 
     | 
    
         
            -
            				
         
     | 
| 
       417 
     | 
    
         
            -
             
     | 
| 
       418 
     | 
    
         
            -
             
     | 
| 
       419 
     | 
    
         
            -
             
     | 
| 
       420 
     | 
    
         
            -
             
     | 
| 
       421 
     | 
    
         
            -
            					 
     | 
| 
       422 
     | 
    
         
            -
            					#  
     | 
| 
       423 
     | 
    
         
            -
            					 
     | 
| 
       424 
     | 
    
         
            -
             
     | 
| 
       425 
     | 
    
         
            -
             
     | 
| 
       426 
     | 
    
         
            -
             
     | 
| 
       427 
     | 
    
         
            -
            					 
     | 
| 
       428 
     | 
    
         
            -
             
     | 
| 
       429 
     | 
    
         
            -
             
     | 
| 
       430 
     | 
    
         
            -
            					 
     | 
| 
      
 126 
     | 
    
         
            +
            			if (final_hit.align_len == final_hit.s_len && putative_end)||(sq_end_distance.abs  <= max_distance && putative_end && putative_end <= max_distance*2) #Stop in a Full-length. max_distance *2 is set by de margin of +-15aa at the end of aligment 
         
     | 
| 
      
 127 
     | 
    
         
            +
            				end_status = 'complete'
         
     | 
| 
      
 128 
     | 
    
         
            +
            			elsif sq_end_distance  < max_distance # si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas)
         
     | 
| 
      
 129 
     | 
    
         
            +
            				end_status = 'incomplete'
         
     | 
| 
      
 130 
     | 
    
         
            +
            				if putative_end
         
     | 
| 
      
 131 
     | 
    
         
            +
            					$global_warnings << ['UnexpSTOP3pDist', sq_end_distance.abs]
         
     | 
| 
      
 132 
     | 
    
         
            +
            				else
         
     | 
| 
      
 133 
     | 
    
         
            +
            					$global_warnings << ['DistSubj', sq_end_distance.abs]
         
     | 
| 
      
 134 
     | 
    
         
            +
            				end
         
     | 
| 
      
 135 
     | 
    
         
            +
            			else # tenemos suficiente secuencia
         
     | 
| 
      
 136 
     | 
    
         
            +
            				if putative_end # tenemos un stop
         
     | 
| 
      
 137 
     | 
    
         
            +
            					#beg_end_string indica en que punto del unigen se encuentra el area de busqueda del codon stop
         
     | 
| 
      
 138 
     | 
    
         
            +
            					stop_q_s = beg_end_string + putative_end - final_hit.s_len # Space between query's stop and subject's stop
         
     | 
| 
      
 139 
     | 
    
         
            +
            					if stop_q_s.abs <= max_distance #Stop codon is in search region
         
     | 
| 
      
 140 
     | 
    
         
            +
            						end_status = 'complete'
         
     | 
| 
      
 141 
     | 
    
         
            +
            					elsif stop_q_s < 0
         
     | 
| 
      
 142 
     | 
    
         
            +
            						$global_warnings << 'UnexpSTOP3p'
         
     | 
| 
      
 143 
     | 
    
         
            +
            					elsif stop_q_s > 0
         
     | 
| 
      
 144 
     | 
    
         
            +
            						end_status = 'complete'
         
     | 
| 
      
 145 
     | 
    
         
            +
            						$global_warnings << 'QueryTooLong'
         
     | 
| 
      
 146 
     | 
    
         
            +
            					end
         
     | 
| 
      
 147 
     | 
    
         
            +
            				else # no tenemos codon de parada pero tenemos suficiente secuencia
         
     | 
| 
      
 148 
     | 
    
         
            +
            					end_status = 'incomplete'
         
     | 
| 
      
 149 
     | 
    
         
            +
            					$global_warnings << 'ProtFusion'
         
     | 
| 
       431 
150 
     | 
    
         
             
            				end
         
     | 
| 
       432 
     | 
    
         
            -
            				end_substring = end_substring[0, putative_end+1]
         
     | 
| 
       433 
     | 
    
         
            -
            				
         
     | 
| 
       434 
     | 
    
         
            -
            			else # no tenemos codon de parada pero tenemos suficiente secuencia
         
     | 
| 
       435 
     | 
    
         
            -
            				end_status = 'putative'
         
     | 
| 
       436 
     | 
    
         
            -
            				warnings += " STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas, "
         
     | 
| 
       437 
     | 
    
         
            -
            				# if (@verbose)
         
     | 
| 
       438 
     | 
    
         
            -
            				# 	puts "#{db_name} -- #{q.query_def} --> STOP codon was not found. Distance to subject end: #{sq_end_distance.abs} aas"
         
     | 
| 
       439 
     | 
    
         
            -
            				# end
         
     | 
| 
       440 
151 
     | 
    
         
             
            			end
         
     | 
| 
       441 
     | 
    
         
            -
            			
         
     | 
| 
       442 
152 
     | 
    
         
             
            		end
         
     | 
| 
       443 
     | 
    
         
            -
            		
         
     | 
| 
       444 
     | 
    
         
            -
            		 
     | 
| 
      
 153 
     | 
    
         
            +
            		final_prot = atg_substring + end_substring
         
     | 
| 
      
 154 
     | 
    
         
            +
            		end_status = 'complete' if final_prot.length == final_hit.s_len+1 && final_prot[final_prot.length-1] == '*'
         
     | 
| 
      
 155 
     | 
    
         
            +
            		new_q_end = final_hit.q_beg-1 + final_prot.length * 3 + frame_shift
         
     | 
| 
      
 156 
     | 
    
         
            +
            		modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) if  $verbose > 1
         
     | 
| 
      
 157 
     | 
    
         
            +
            		final_hit.q_end = new_q_end  
         
     | 
| 
      
 158 
     | 
    
         
            +
            		return end_status, final_prot
         
     | 
| 
       445 
159 
     | 
    
         
             
            	end
         
     | 
| 
       446 
160 
     | 
    
         | 
| 
       447 
161 
     | 
    
         | 
| 
       448 
     | 
    
         
            -
            	def determine_status(atg_status,end_status)
         
     | 
| 
       449 
     | 
    
         
            -
            		
         
     | 
| 
       450 
     | 
    
         
            -
             
     | 
| 
       451 
     | 
    
         
            -
             
     | 
| 
       452 
     | 
    
         
            -
             
     | 
| 
       453 
     | 
    
         
            -
             
     | 
| 
       454 
     | 
    
         
            -
             
     | 
| 
       455 
     | 
    
         
            -
             
     | 
| 
       456 
     | 
    
         
            -
             
     | 
| 
       457 
     | 
    
         
            -
            			final_status = 'N-terminus'
         
     | 
| 
       458 
     | 
    
         
            -
            		elsif (atg_status == 'putative') && (end_status == 'incomplete') # puede que tengamos el principio de la proteina
         
     | 
| 
       459 
     | 
    
         
            -
            			final_status = 'Putative N-terminus'
         
     | 
| 
       460 
     | 
    
         
            -
            		elsif (atg_status == 'incomplete') && (end_status == 'complete') # tenemos el final de la proteina
         
     | 
| 
       461 
     | 
    
         
            -
            			final_status = 'C-terminus'
         
     | 
| 
       462 
     | 
    
         
            -
            		elsif (atg_status == 'incomplete') && (end_status == 'putative') # puede que tengamos el final de la proteina
         
     | 
| 
       463 
     | 
    
         
            -
            			final_status = 'Putative C-terminus'
         
     | 
| 
      
 162 
     | 
    
         
            +
            	def determine_status(atg_status, end_status)
         
     | 
| 
      
 163 
     | 
    
         
            +
            		if atg_status != 'incomplete' && end_status != 'incomplete' # proteina completa
         
     | 
| 
      
 164 
     | 
    
         
            +
            			type = COMPLETE
         
     | 
| 
      
 165 
     | 
    
         
            +
            		elsif atg_status == 'incomplete' && end_status == 'incomplete' # region intermedia
         
     | 
| 
      
 166 
     | 
    
         
            +
            			type = INTERNAL
         
     | 
| 
      
 167 
     | 
    
         
            +
            		elsif atg_status != 'incomplete' && end_status == 'incomplete' # tenemos el principio de la proteina
         
     | 
| 
      
 168 
     | 
    
         
            +
            			type = N_TERMINAL
         
     | 
| 
      
 169 
     | 
    
         
            +
            		elsif atg_status == 'incomplete' && end_status != 'incomplete' # tenemos el final de la proteina
         
     | 
| 
      
 170 
     | 
    
         
            +
            			type = C_TERMINAL
         
     | 
| 
       464 
171 
     | 
    
         
             
            		end
         
     | 
| 
       465 
172 
     | 
    
         | 
| 
       466 
     | 
    
         
            -
            		 
     | 
| 
       467 
     | 
    
         
            -
             
     | 
| 
       468 
     | 
    
         
            -
             
     | 
| 
       469 
     | 
    
         
            -
             
     | 
| 
       470 
     | 
    
         
            -
            	def print_annotations(seq, q, final_hit, final_status, final_prot, warnings, query_fasta, db_name)
         
     | 
| 
       471 
     | 
    
         
            -
            		name_diff = q.query_def.length - final_hit.acc.length
         
     | 
| 
       472 
     | 
    
         
            -
            		if (name_diff > 0)
         
     | 
| 
       473 
     | 
    
         
            -
            			spnum = ' '*name_diff.to_i
         
     | 
| 
      
 173 
     | 
    
         
            +
            		if atg_status == 'putative' || end_status == 'putative'
         
     | 
| 
      
 174 
     | 
    
         
            +
            			status = FALSE # Putative
         
     | 
| 
       474 
175 
     | 
    
         
             
            		else
         
     | 
| 
       475 
     | 
    
         
            -
            			 
     | 
| 
      
 176 
     | 
    
         
            +
            			status = TRUE # Sure
         
     | 
| 
       476 
177 
     | 
    
         
             
            		end
         
     | 
| 
       477 
     | 
    
         
            -
            #-------------------------------------------------------------------------------------------------------------------------------------
         
     | 
| 
       478 
     | 
    
         
            -
            		# if the sequence is Complete will be printed                 --------------------------------------------------------------------
         
     | 
| 
       479 
     | 
    
         
            -
            		if (final_status == 'Complete') 
         
     | 
| 
       480 
     | 
    
         
            -
            			seq.annotate(:protein,">#{q.query_def}\n#{final_prot}")
         
     | 
| 
       481 
     | 
    
         
            -
            			print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
         
     | 
| 
       482 
     | 
    
         
            -
             
     | 
| 
       483 
     | 
    
         
            -
            			if (final_hit.reversed) 
         
     | 
| 
       484 
     | 
    
         
            -
            				(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
         
     | 
| 
       485 
     | 
    
         
            -
            			end
         
     | 
| 
       486 
     | 
    
         
            -
            			seq.annotate(:complete,"#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}")
         
     | 
| 
       487 
     | 
    
         
            -
            			seq.annotate(:alignment,"#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n")
         
     | 
| 
       488 
     | 
    
         
            -
            #-------------------------------------------------------------------------------------------------------------------------------------
         
     | 
| 
       489 
     | 
    
         
            -
            		else # la proteina no esta completa                      -------------------------------------------------------------------------
         
     | 
| 
       490 
     | 
    
         
            -
            			if (!seq.get_annotations(:tmp_annotation).empty?) && (!seq.get_annotations(:tmp_annotation).nil?) # ---> trae informacion de una bd anterior
         
     | 
| 
       491 
     | 
    
         
            -
            				if (db_name =~/^tr_/) #                                          --->  estamos usando el trembl, se dejan las anotaciones que trae
         
     | 
| 
       492 
     | 
    
         
            -
            					# puts "#{db_name} -- #{q.query_def} --> print_annotations: sequence not complete! recovering annotations from previous database!"
         
     | 
| 
       493 
     | 
    
         
            -
            					(kk1, final_hit, final_prot, query_fasta, final_status) = seq.get_annotations(:tmp_annotation).first[:message][3]
         
     | 
| 
       494 
     | 
    
         
            -
            					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
         
     | 
| 
       495 
178 
     | 
    
         | 
| 
       496 
     | 
    
         
            -
             
     | 
| 
       497 
     | 
    
         
            -
             
     | 
| 
       498 
     | 
    
         
            -
            						(kk, q_frame, q_end, q_beg) = reverse_seq(query_fasta, q_frame.to_i, q_beg.to_i, q_end.to_i)
         
     | 
| 
       499 
     | 
    
         
            -
            					end
         
     | 
| 
       500 
     | 
    
         
            -
            					
         
     | 
| 
       501 
     | 
    
         
            -
            					my_prot = seq.get_annotations(:tmp_annotation).first[:message][1]
         
     | 
| 
       502 
     | 
    
         
            -
            					seq.annotate(:protein,my_prot)
         
     | 
| 
       503 
     | 
    
         
            -
            					my_align = seq.get_annotations(:tmp_annotation).first[:message][2]
         
     | 
| 
       504 
     | 
    
         
            -
            					seq.annotate(:alignment,my_align)
         
     | 
| 
       505 
     | 
    
         
            -
            					
         
     | 
| 
       506 
     | 
    
         
            -
            					tmp_annot = "#{name}\t#{query_fasta.length}\t#{acc}\t#{db_name}\t#{final_status}\t\t#{e_val}\t#{ident}\t#{my_length}\t#{subject_length}\t#{warnings}\t#{q_frame}\t#{q_beg}\t#{q_end}\t#{s_beg}\t#{s_end}\t#{description}\t#{final_prot}"
         
     | 
| 
       507 
     | 
    
         
            -
            					seq.annotate(:tmp_annotation,[tmp_annot, '','',''],true)
         
     | 
| 
       508 
     | 
    
         
            -
            				#-----------------------------------------------------------------------------------------------------------------------------
         
     | 
| 
       509 
     | 
    
         
            -
            				# elsif (db_name =~ /^sp_/) #                                       ---> estamos usando el sp, se dejan las anotaciones que trae
         
     | 
| 
       510 
     | 
    
         
            -
            					
         
     | 
| 
       511 
     | 
    
         
            -
            					# puts "#{db_name} -- #{q.query_def} --> print_annotations: Mantenemos las anotaciones de la BD de usuario y pasamos la secuencia al trembl"
         
     | 
| 
       512 
     | 
    
         
            -
            				end
         
     | 
| 
       513 
     | 
    
         
            -
            #-------------------------------------------------------------------------------------------------------------------------------------
         
     | 
| 
       514 
     | 
    
         
            -
            			elsif (seq.get_annotations(:tmp_annotation).empty?) #                                ---> NO trae informacion de una bd anterior
         
     | 
| 
       515 
     | 
    
         
            -
            				if (db_name =~ /^tr_/) #                                                                         ---> estamos usando el trembl
         
     | 
| 
       516 
     | 
    
         
            -
            					# puts "#{db_name} -- #{q.query_def} --> print_annotations: #{q.query_def} is not complete!! se anota con trembl"
         
     | 
| 
       517 
     | 
    
         
            -
            					print_nt_seqs(seq, q, final_hit, final_prot, query_fasta, final_status) # funcion para marcar ATG (_-_) y STOP (___)
         
     | 
| 
      
 179 
     | 
    
         
            +
            		return type, status
         
     | 
| 
      
 180 
     | 
    
         
            +
            	end
         
     | 
| 
       518 
181 
     | 
    
         | 
| 
       519 
     | 
    
         
            -
            					if (final_hit.reversed) 
         
     | 
| 
       520 
     | 
    
         
            -
            						(kk, final_hit.q_frame, final_hit.q_end, final_hit.q_beg) = reverse_seq(seq.seq_fasta, final_hit.q_frame.to_i, final_hit.q_beg.to_i, final_hit.q_end.to_i)
         
     | 
| 
       521 
     | 
    
         
            -
            					end
         
     | 
| 
       522 
182 
     | 
    
         | 
| 
       523 
     | 
    
         
            -
             
     | 
| 
       524 
     | 
    
         
            -
             
     | 
| 
       525 
     | 
    
         
            -
             
     | 
| 
       526 
     | 
    
         
            -
             
     | 
| 
       527 
     | 
    
         
            -
             
     | 
| 
       528 
     | 
    
         
            -
             
     | 
| 
       529 
     | 
    
         
            -
             
     | 
| 
       530 
     | 
    
         
            -
            					 
     | 
| 
       531 
     | 
    
         
            -
            					 
     | 
| 
       532 
     | 
    
         
            -
            					seq.sec_desc = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\tMisassembled\t\t#{final_hit.e_val}\t#{final_hit.ident}\t\t#{final_hit.full_subject_length}\t#{warnings}\t\t\t\t\t\t#{final_hit.definition}\t"
         
     | 
| 
       533 
     | 
    
         
            -
            					seq.annotate(:tmp_annotation,[tmp_annot, tmp_prot,tmp_align,[q, final_hit, final_prot, query_fasta, final_status]])
         
     | 
| 
       534 
     | 
    
         
            -
            					
         
     | 
| 
       535 
     | 
    
         
            -
            					# puts "\n\n\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.---#{q.query_def}\t#{final_status}\n#{tmp_prot}"
         
     | 
| 
       536 
     | 
    
         
            -
            					# puts "#{db_name} -- #{q.query_def} --> print_annotations: cargamos anotaciones para utilizarlas en la siguiente BD"
         
     | 
| 
      
 183 
     | 
    
         
            +
            	def compare_seq_length_with_subject(final_prot, distance, final_hit, type, status)
         
     | 
| 
      
 184 
     | 
    
         
            +
            		if final_prot.length - 2 * distance > final_hit.s_len
         
     | 
| 
      
 185 
     | 
    
         
            +
            			$global_warnings << ['SeqLonger', final_prot.length, final_hit.s_len]
         
     | 
| 
      
 186 
     | 
    
         
            +
            		elsif final_prot.length + 2 * distance < final_hit.s_len
         
     | 
| 
      
 187 
     | 
    
         
            +
            			$global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len]
         
     | 
| 
      
 188 
     | 
    
         
            +
            			if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len				
         
     | 
| 
      
 189 
     | 
    
         
            +
            				if type == COMPLETE
         
     | 
| 
      
 190 
     | 
    
         
            +
            					status = FALSE
         
     | 
| 
      
 191 
     | 
    
         
            +
            					$global_warnings << 'VeryShorter'
         
     | 
| 
       537 
192 
     | 
    
         
             
            				end
         
     | 
| 
       538 
193 
     | 
    
         
             
            			end
         
     | 
| 
       539 
194 
     | 
    
         
             
            		end
         
     | 
| 
      
 195 
     | 
    
         
            +
            		return status
         
     | 
| 
       540 
196 
     | 
    
         
             
            	end
         
     | 
| 
       541 
197 
     | 
    
         | 
| 
       542 
198 
     | 
    
         | 
| 
       543 
     | 
    
         
            -
            	def  
     | 
| 
       544 
     | 
    
         
            -
            		
         
     | 
| 
       545 
     | 
    
         
            -
            		 
     | 
| 
       546 
     | 
    
         
            -
             
     | 
| 
       547 
     | 
    
         
            -
             
     | 
| 
       548 
     | 
    
         
            -
             
     | 
| 
       549 
     | 
    
         
            -
            			 
     | 
| 
       550 
     | 
    
         
            -
            			 
     | 
| 
       551 
     | 
    
         
            -
             
     | 
| 
       552 
     | 
    
         
            -
            			 
     | 
| 
       553 
     | 
    
         
            -
            			 
     | 
| 
       554 
     | 
    
         
            -
            			 
     | 
| 
       555 
     | 
    
         
            -
             
     | 
| 
       556 
     | 
    
         
            -
            				 
     | 
| 
       557 
     | 
    
         
            -
            				# puts "empieza en el borde de la seq"
         
     | 
| 
      
 199 
     | 
    
         
            +
            	def save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name)
         
     | 
| 
      
 200 
     | 
    
         
            +
            		# if the sequence is Complete or it hasn't previous info will be saved
         
     | 
| 
      
 201 
     | 
    
         
            +
            		if seq.type == UNKNOWN || (type == COMPLETE && seq.type != COMPLETE)
         
     | 
| 
      
 202 
     | 
    
         
            +
            			seq.type = type
         
     | 
| 
      
 203 
     | 
    
         
            +
            			seq.status = status
         
     | 
| 
      
 204 
     | 
    
         
            +
            			seq.db_name = db_name
         
     | 
| 
      
 205 
     | 
    
         
            +
            			seq.seq_fasta = query_fasta
         
     | 
| 
      
 206 
     | 
    
         
            +
            			seq.seq_aa = final_prot
         
     | 
| 
      
 207 
     | 
    
         
            +
            			seq.hit = final_hit
         
     | 
| 
      
 208 
     | 
    
         
            +
            			seq.warnings($global_warnings)
         
     | 
| 
      
 209 
     | 
    
         
            +
            			$global_warnings = [] # Clean all warnings for current sequence
         
     | 
| 
      
 210 
     | 
    
         
            +
            			seq.seq_nt = mark_nt_seqs(final_hit, query_fasta)
         
     | 
| 
      
 211 
     | 
    
         
            +
            			if type == COMPLETE
         
     | 
| 
      
 212 
     | 
    
         
            +
            				seq.ignore = TRUE
         
     | 
| 
       558 
213 
     | 
    
         
             
            			end
         
     | 
| 
       559 
     | 
    
         
            -
             
     | 
| 
       560 
     | 
    
         
            -
            			atg_found = my_seq_n.index(/ATG/i)
         
     | 
| 
       561 
     | 
    
         
            -
            			atg_found_rv = my_seq_n.rindex(/ATG/i)
         
     | 
| 
       562 
     | 
    
         
            -
            			my_atg_index = nil
         
     | 
| 
       563 
214 
     | 
    
         
             
            		end
         
     | 
| 
       564 
     | 
    
         
            -
             
     | 
| 
       565 
     | 
    
         
            -
             
     | 
| 
       566 
     | 
    
         
            -
            			if (beg5)
         
     | 
| 
       567 
     | 
    
         
            -
             
     | 
| 
       568 
     | 
    
         
            -
            				my_seq_n.sub!(/ATG/i,'_-_ATG')
         
     | 
| 
       569 
     | 
    
         
            -
            				my_atg_index = atg_found
         
     | 
| 
       570 
     | 
    
         
            -
            				my_seq = my_seq_n + query_fasta[11..query_fasta.length + 1]
         
     | 
| 
       571 
     | 
    
         
            -
             
     | 
| 
       572 
     | 
    
         
            -
            			elsif (atg_found == atg_found_rv)
         
     | 
| 
       573 
     | 
    
         
            -
             
     | 
| 
       574 
     | 
    
         
            -
            				my_seq_n.sub!(/ATG/i,'_-_ATG')
         
     | 
| 
       575 
     | 
    
         
            -
            				my_atg_index = final_hit.q_beg - 5 + atg_found
         
     | 
| 
       576 
     | 
    
         
            -
            				
         
     | 
| 
       577 
     | 
    
         
            -
            				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]
         
     | 
| 
       578 
     | 
    
         
            -
            				
         
     | 
| 
       579 
     | 
    
         
            -
            				# puts "my_seq despues de encontrar el atg: #{my_seq}"
         
     | 
| 
       580 
     | 
    
         
            -
            			elsif (atg_found == 5) || (atg_found_rv == 5)
         
     | 
| 
       581 
     | 
    
         
            -
             
     | 
| 
       582 
     | 
    
         
            -
            				my_seq_n = my_seq_n[0..4]+'_-_'+my_seq_n[5..10]
         
     | 
| 
       583 
     | 
    
         
            -
            				my_atg_index = final_hit.q_beg - 5 + atg_found
         
     | 
| 
       584 
     | 
    
         
            -
            				my_seq = query_fasta[0..final_hit.q_beg - 6] + my_seq_n + query_fasta[final_hit.q_beg + 6..query_fasta.length + 1]
         
     | 
| 
       585 
     | 
    
         
            -
             
     | 
| 
       586 
     | 
    
         
            -
            			else
         
     | 
| 
       587 
     | 
    
         
            -
             
     | 
| 
       588 
     | 
    
         
            -
            				# puts "#{q.query_def}  tiene mas de un ATG  my_seq_n: #{my_seq_n}"
         
     | 
| 
       589 
     | 
    
         
            -
            				bad_atg = true
         
     | 
| 
       590 
     | 
    
         
            -
            				my_seq = query_fasta
         
     | 
| 
       591 
     | 
    
         
            -
            			end
         
     | 
| 
       592 
     | 
    
         
            -
             
     | 
| 
       593 
     | 
    
         
            -
            		else
         
     | 
| 
       594 
     | 
    
         
            -
             
     | 
| 
       595 
     | 
    
         
            -
            			bad_atg = true
         
     | 
| 
       596 
     | 
    
         
            -
            			# puts "#{q.query_def}  NO TIENE ATG  my_seq_n: #{my_seq_n}"
         
     | 
| 
       597 
     | 
    
         
            -
            			my_seq = query_fasta
         
     | 
| 
       598 
     | 
    
         
            -
             
     | 
| 
      
 215 
     | 
    
         
            +
            		if  $verbose > 2
         
     | 
| 
      
 216 
     | 
    
         
            +
            			puts "\e[1mStruct annot: #{seq.prot_annot_calification}\e[0m"
         
     | 
| 
       599 
217 
     | 
    
         
             
            		end
         
     | 
| 
       600 
     | 
    
         
            -
             
     | 
| 
       601 
     | 
    
         
            -
            	stop_c = nil
         
     | 
| 
       602 
     | 
    
         
            -
            		if (final_status == 'Complete') || (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')
         
     | 
| 
      
 218 
     | 
    
         
            +
            	end
         
     | 
| 
       603 
219 
     | 
    
         | 
| 
       604 
     | 
    
         
            -
            			if (bad_atg == true)
         
     | 
| 
       605 
     | 
    
         
            -
            				stop_c = my_seq[final_hit.q_end - 2..final_hit.q_end]
         
     | 
| 
       606 
     | 
    
         
            -
            				stop_c_longer = my_seq[final_hit.q_end - 7..final_hit.q_end + 5]
         
     | 
| 
       607 
     | 
    
         
            -
            			else
         
     | 
| 
       608 
     | 
    
         
            -
            				stop_c = my_seq[final_hit.q_end + 3..final_hit.q_end + 5]
         
     | 
| 
       609 
     | 
    
         
            -
            				stop_c_longer = my_seq[final_hit.q_end - 2..final_hit.q_end + 10]
         
     | 
| 
       610 
     | 
    
         
            -
            			end
         
     | 
| 
       611 
220 
     | 
    
         | 
| 
      
 221 
     | 
    
         
            +
            	def mark_nt_seqs(final_hit, query_fasta)
         
     | 
| 
      
 222 
     | 
    
         
            +
            		atg = query_fasta[final_hit.q_beg..final_hit.q_beg + 2]
         
     | 
| 
      
 223 
     | 
    
         
            +
            		mark_atg = nil
         
     | 
| 
      
 224 
     | 
    
         
            +
            		if atg == 'ATG'
         
     | 
| 
      
 225 
     | 
    
         
            +
            			mark_atg = '_-_'	
         
     | 
| 
       612 
226 
     | 
    
         
             
            		end
         
     | 
| 
      
 227 
     | 
    
         
            +
            		stop = query_fasta[final_hit.q_end - 2..final_hit.q_end]
         
     | 
| 
      
 228 
     | 
    
         
            +
            		mark_stop = nil
         
     | 
| 
      
 229 
     | 
    
         
            +
            		if stop == 'TAG' || stop == 'TGA' || stop == 'TAA'
         
     | 
| 
      
 230 
     | 
    
         
            +
            			mark_stop = '___'
         
     | 
| 
      
 231 
     | 
    
         
            +
            		end
         
     | 
| 
      
 232 
     | 
    
         
            +
            		seq5p = query_fasta[0..final_hit.q_beg-1]
         
     | 
| 
      
 233 
     | 
    
         
            +
            		orf = query_fasta[final_hit.q_beg..final_hit.q_end]
         
     | 
| 
      
 234 
     | 
    
         
            +
            		seq3p = query_fasta[final_hit.q_end..query_fasta.length]
         
     | 
| 
      
 235 
     | 
    
         
            +
            		nt_seq = "#{seq5p}#{mark_atg}#{orf}#{mark_stop}#{seq3p}"
         
     | 
| 
      
 236 
     | 
    
         
            +
            		return nt_seq
         
     | 
| 
      
 237 
     | 
    
         
            +
            	end
         
     | 
| 
       613 
238 
     | 
    
         | 
| 
       614 
     | 
    
         
            -
             
     | 
| 
       615 
     | 
    
         
            -
             
     | 
| 
       616 
     | 
    
         
            -
             
     | 
| 
       617 
     | 
    
         
            -
             
     | 
| 
       618 
     | 
    
         
            -
             
     | 
| 
       619 
     | 
    
         
            -
            				if  
     | 
| 
       620 
     | 
    
         
            -
            					 
     | 
| 
       621 
     | 
    
         
            -
             
     | 
| 
       622 
     | 
    
         
            -
             
     | 
| 
       623 
     | 
    
         
            -
             
     | 
| 
       624 
     | 
    
         
            -
             
     | 
| 
       625 
     | 
    
         
            -
             
     | 
| 
       626 
     | 
    
         
            -
            					my_prot = my_prot.sub(/___\w+/,'')
         
     | 
| 
       627 
     | 
    
         
            -
            					my_prot = my_prot.translate
         
     | 
| 
       628 
     | 
    
         
            -
            					my_prot = my_prot.sub(/x$/,'')
         
     | 
| 
       629 
     | 
    
         
            -
             
     | 
| 
       630 
     | 
    
         
            -
            					simliar_fragment = final_prot.lcs(my_prot)
         
     | 
| 
       631 
     | 
    
         
            -
             
     | 
| 
       632 
     | 
    
         
            -
            					if (simliar_fragment.length == final_prot.length) && (simliar_fragment.length == my_prot.length)
         
     | 
| 
       633 
     | 
    
         
            -
            						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\t\t\t\t\t\t#{my_seq}")
         
     | 
| 
       634 
     | 
    
         
            -
            					else
         
     | 
| 
       635 
     | 
    
         
            -
            						seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tthe nucleotide sequence contain a lot of errors\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
         
     | 
| 
       636 
     | 
    
         
            -
            						# puts "nt seq: was no possible to find stop codon, the nucleotide sequence contain a lot of errors"
         
     | 
| 
      
 239 
     | 
    
         
            +
            	def exonerate_fix_frame_shift(query_fasta, hit)
         
     | 
| 
      
 240 
     | 
    
         
            +
            		frame_shifts = []
         
     | 
| 
      
 241 
     | 
    
         
            +
            		added_nts = 0
         
     | 
| 
      
 242 
     | 
    
         
            +
            		hit.each_with_index do |hsp, num|
         
     | 
| 
      
 243 
     | 
    
         
            +
            			if hsp.class.to_s == 'ExoBlastHit' #Only this type of class of BlastHit has frameshift attributes
         
     | 
| 
      
 244 
     | 
    
         
            +
            				if !hsp.q_frameshift.empty? #There is frameshift
         
     | 
| 
      
 245 
     | 
    
         
            +
            					hsp.q_frameshift.each do |position, num_nts|
         
     | 
| 
      
 246 
     | 
    
         
            +
            						local_add = 3 - num_nts
         
     | 
| 
      
 247 
     | 
    
         
            +
            						fs_final_position = position + num_nts 
         
     | 
| 
      
 248 
     | 
    
         
            +
            						$global_warnings << ['ExFrameS', fs_final_position]
         
     | 
| 
      
 249 
     | 
    
         
            +
            						frame_shifts << [fs_final_position, local_add]
         
     | 
| 
      
 250 
     | 
    
         
            +
            						added_nts += local_add
         
     | 
| 
       637 
251 
     | 
    
         
             
            					end
         
     | 
| 
       638 
     | 
    
         
            -
             
     | 
| 
       639 
252 
     | 
    
         
             
            				end
         
     | 
| 
      
 253 
     | 
    
         
            +
            			end
         
     | 
| 
      
 254 
     | 
    
         
            +
            			hsp.q_beg += added_nts if num > 0
         
     | 
| 
      
 255 
     | 
    
         
            +
            			hsp.q_end += added_nts
         
     | 
| 
      
 256 
     | 
    
         
            +
            		end
         
     | 
| 
      
 257 
     | 
    
         
            +
            		add = 0
         
     | 
| 
      
 258 
     | 
    
         
            +
            		frame_shifts.each do |position, num_nts|
         
     | 
| 
      
 259 
     | 
    
         
            +
            			query_fasta = query_fasta.insert(position+add, 'n'*num_nts)
         
     | 
| 
      
 260 
     | 
    
         
            +
            			add += num_nts
         
     | 
| 
      
 261 
     | 
    
         
            +
            		end
         
     | 
| 
      
 262 
     | 
    
         
            +
            		return query_fasta
         
     | 
| 
      
 263 
     | 
    
         
            +
            	end
         
     | 
| 
       640 
264 
     | 
    
         | 
| 
       641 
     | 
    
         
            -
            			else
         
     | 
| 
       642 
     | 
    
         
            -
            				if (final_status == 'Putative Complete') || (final_status == 'C-terminus') || (final_status == 'Putative C-terminus')
         
     | 
| 
       643 
265 
     | 
    
         | 
| 
       644 
     | 
    
         
            -
             
     | 
| 
       645 
     | 
    
         
            -
             
     | 
| 
       646 
     | 
    
         
            -
             
     | 
| 
       647 
     | 
    
         
            -
             
     | 
| 
       648 
     | 
    
         
            -
             
     | 
| 
       649 
     | 
    
         
            -
             
     | 
| 
       650 
     | 
    
         
            -
            					end
         
     | 
| 
       651 
     | 
    
         
            -
            					
         
     | 
| 
       652 
     | 
    
         
            -
            					if (!stop_c.nil?)
         
     | 
| 
       653 
     | 
    
         
            -
            						if (stop_c.translate == '*')
         
     | 
| 
       654 
     | 
    
         
            -
            							final_hit.q_end = final_hit.q_end + 3
         
     | 
| 
       655 
     | 
    
         
            -
            							if (bad_atg == true)
         
     | 
| 
       656 
     | 
    
         
            -
            								my_seq = my_seq[0..final_hit.q_end] +'___'+ my_seq[final_hit.q_end + 1..my_seq.length + 1]
         
     | 
| 
       657 
     | 
    
         
            -
            								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG\t\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
         
     | 
| 
       658 
     | 
    
         
            -
            							else
         
     | 
| 
       659 
     | 
    
         
            -
            								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
         
     | 
| 
       660 
     | 
    
         
            -
            							end
         
     | 
| 
       661 
     | 
    
         
            -
            						else
         
     | 
| 
       662 
     | 
    
         
            -
            							if (bad_atg == true)
         
     | 
| 
       663 
     | 
    
         
            -
            								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
         
     | 
| 
       664 
     | 
    
         
            -
            								# puts "find nt end: NO ATG, NO exact STOP"
         
     | 
| 
       665 
     | 
    
         
            -
            							else
         
     | 
| 
       666 
     | 
    
         
            -
            								seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP exacto\tstop: #{stop_c_longer}\t#{stop_c}\t#{final_hit.q_beg + 1}\t#{final_hit.q_end + 1}\t#{my_seq}")
         
     | 
| 
       667 
     | 
    
         
            -
            								# puts "find nt end: GOOD ATG, NO exact STOP"
         
     | 
| 
       668 
     | 
    
         
            -
            							end
         
     | 
| 
       669 
     | 
    
         
            -
            						end
         
     | 
| 
       670 
     | 
    
         
            -
            					end
         
     | 
| 
       671 
     | 
    
         
            -
            				end
         
     | 
| 
       672 
     | 
    
         
            -
            				
         
     | 
| 
      
 266 
     | 
    
         
            +
            ## VERBOSE METHODS
         
     | 
| 
      
 267 
     | 
    
         
            +
            	def show_nts
         
     | 
| 
      
 268 
     | 
    
         
            +
            		show = FALSE	
         
     | 
| 
      
 269 
     | 
    
         
            +
            		show = TRUE if $verbose && $verbose > 3
         
     | 
| 
      
 270 
     | 
    
         
            +
            		return show
         
     | 
| 
      
 271 
     | 
    
         
            +
            	end
         
     | 
| 
       673 
272 
     | 
    
         | 
| 
       674 
     | 
    
         
            -
            			end
         
     | 
| 
       675 
273 
     | 
    
         | 
| 
       676 
     | 
    
         
            -
             
     | 
| 
      
 274 
     | 
    
         
            +
            	def modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) ## For visual report
         
     | 
| 
      
 275 
     | 
    
         
            +
            		if new_q_end > final_hit.q_end #There is an align extension
         
     | 
| 
      
 276 
     | 
    
         
            +
            			extend_align = query_fasta[final_hit.q_end+1 .. new_q_end].translate
         
     | 
| 
      
 277 
     | 
    
         
            +
            			final_hit.q_seq = final_hit.q_seq + extend_align
         
     | 
| 
      
 278 
     | 
    
         
            +
            		elsif new_q_end < final_hit.q_end #The align is cutted
         
     | 
| 
      
 279 
     | 
    
         
            +
            			upper_limit = final_prot.length - 1 + final_hit.q_seq.count('-')
         
     | 
| 
      
 280 
     | 
    
         
            +
            			final_hit.q_seq = final_hit.q_seq[0 .. upper_limit]
         
     | 
| 
      
 281 
     | 
    
         
            +
            		end
         
     | 
| 
      
 282 
     | 
    
         
            +
            	end
         
     | 
| 
       677 
283 
     | 
    
         | 
| 
       678 
     | 
    
         
            -
            			if (bad_atg == true)
         
     | 
| 
       679 
     | 
    
         
            -
            				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO ATG NO STOP\t\t\t\t\t#{my_seq}")
         
     | 
| 
       680 
     | 
    
         
            -
            			else
         
     | 
| 
       681 
     | 
    
         
            -
            				seq.annotate(:nucleotide,"#{q.query_def}\t#{final_status}\tNO STOP\t\t\t\t\t#{my_seq}")
         
     | 
| 
       682 
     | 
    
         
            -
            			end
         
     | 
| 
       683 
284 
     | 
    
         | 
| 
      
 285 
     | 
    
         
            +
            	def modify_5p_align(new_q_beg, final_hit, query_fasta) ## For visual report
         
     | 
| 
      
 286 
     | 
    
         
            +
            		if new_q_beg < final_hit.q_beg #There is an align extension
         
     | 
| 
      
 287 
     | 
    
         
            +
            			extend_align = query_fasta[new_q_beg .. final_hit.q_beg-1].translate
         
     | 
| 
      
 288 
     | 
    
         
            +
            			final_hit.q_seq = extend_align + final_hit.q_seq
         
     | 
| 
      
 289 
     | 
    
         
            +
            		elsif new_q_beg > final_hit.q_beg #The align is cut
         
     | 
| 
      
 290 
     | 
    
         
            +
            			seq_cut = (new_q_beg - final_hit.q_beg)/3
         
     | 
| 
      
 291 
     | 
    
         
            +
            			gaps = final_hit.q_seq[0..seq_cut].count('-')
         
     | 
| 
      
 292 
     | 
    
         
            +
            			seq_cut += gaps
         
     | 
| 
      
 293 
     | 
    
         
            +
            			final_hit.q_seq = final_hit.q_seq[seq_cut .. final_hit.q_seq.length-1]
         
     | 
| 
       684 
294 
     | 
    
         
             
            		end
         
     | 
| 
       685 
     | 
    
         
            -
            		
         
     | 
| 
       686 
295 
     | 
    
         
             
            	end
         
     | 
| 
       687 
296 
     | 
    
         | 
| 
       688 
297 
     | 
    
         
             
            end
         
     |