full_lengther_next 0.0.8 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/.gemtest +0 -0
  2. data/History.txt +2 -2
  3. data/Manifest.txt +33 -18
  4. data/Rakefile +4 -2
  5. data/bin/download_fln_dbs.rb +310 -158
  6. data/bin/full_lengther_next +160 -103
  7. data/bin/make_test_dataset.rb +236 -0
  8. data/bin/make_user_db.rb +101 -117
  9. data/bin/plot_fln.rb +270 -0
  10. data/bin/plot_taxonomy.rb +70 -0
  11. data/lib/expresscanvas.zip +0 -0
  12. data/lib/full_lengther_next.rb +3 -3
  13. data/lib/full_lengther_next/classes/artifacts.rb +66 -0
  14. data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
  15. data/lib/full_lengther_next/classes/cdhit.rb +154 -0
  16. data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
  17. data/lib/full_lengther_next/classes/common_functions.rb +105 -63
  18. data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
  19. data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
  20. data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
  21. data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
  22. data/lib/full_lengther_next/classes/handle_db.rb +30 -0
  23. data/lib/full_lengther_next/classes/my_worker.rb +308 -138
  24. data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
  25. data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
  26. data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
  27. data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
  28. data/lib/full_lengther_next/classes/reptrans.rb +210 -0
  29. data/lib/full_lengther_next/classes/sequence.rb +439 -80
  30. data/lib/full_lengther_next/classes/test_code.rb +15 -16
  31. data/lib/full_lengther_next/classes/types.rb +12 -0
  32. data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
  33. data/lib/full_lengther_next/classes/warnings.rb +40 -0
  34. metadata +207 -93
  35. data/lib/full_lengther_next/classes/lcs.rb +0 -33
  36. data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -18,16 +18,11 @@ class TestCode
18
18
  protein = ''
19
19
  p_long = 0
20
20
 
21
- if (seq.fasta_length < 200)
22
- ref_name = seq.seq_name
23
- ref_code = 0.0
24
- ref_frame = 0
25
- ref_status = 'unknown'
26
- ref_orf = ''
27
- ref_msgs = 'Sequence length < 200 nt'
28
-
29
- seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
30
- # seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
21
+ if seq.fasta_length < 200
22
+ seq.type = UNKNOWN
23
+ seq.test_code(0.0)
24
+ seq.warnings('<200nt')
25
+ seq.hit = [ref_start, ref_end, 0] #Last element is ref_frame
31
26
  else
32
27
 
33
28
  # para probar tescode con toda la secuencia, en lugar de con los ORFs ----------------------------------------------------------------------
@@ -43,11 +38,15 @@ class TestCode
43
38
 
44
39
 
45
40
  # see add_region filter
46
- (name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame) = t_code(seq)
47
- if (status == :unknown)
48
- seq.annotate(:tcode_unknown,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
41
+ name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame = t_code(seq)
42
+ seq.test_code(t_code)
43
+ seq.warnings(ref_msgs)
44
+ seq.hit = [ref_start, ref_end, ref_frame]
45
+
46
+ if status == :unknown
47
+ seq.type = UNKNOWN
49
48
  else
50
- seq.annotate(:tcode,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
49
+ seq.type = CODING
51
50
  end
52
51
 
53
52
  # if (ref_msgs.nil?)
@@ -157,7 +156,7 @@ class TestCode
157
156
  ref_end = 0
158
157
  ref_frame = 0
159
158
  ref_orf = ''
160
- ref_type = 'unknown'
159
+ ref_type = :unknown
161
160
  ref_msgs = 'Non coding ORF found >= 200 nt '
162
161
  return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false]
163
162
  end
@@ -772,7 +771,7 @@ class TestCode
772
771
  valueY = ((valueY*1000.0).round/1000.0);
773
772
 
774
773
  # return 'The TestCode value is <b>' + valueY.to_s + '</b>, which indicates that the sequence ' + getConclusion(valueY) + '.';
775
- return [valueY.to_s, getConclusion(valueY)]
774
+ return [valueY, getConclusion(valueY)]
776
775
  end
777
776
 
778
777
  def calcParam (valueOne,valueTwo,valueThree)
@@ -0,0 +1,12 @@
1
+ FAILED = -4
2
+ OTHER = -3
3
+ CHIMERA = -2
4
+ MISASSEMBLED = -1
5
+ UNKNOWN = 0
6
+ COMPLETE = 1
7
+ N_TERMINAL = 2
8
+ C_TERMINAL = 3
9
+ INTERNAL = 4
10
+ NCRNA = 5
11
+ CODING = 6
12
+
@@ -4,284 +4,202 @@ include CommonFunctions
4
4
 
5
5
  class UneLosHit
6
6
 
7
- attr_reader :output_seq, :final_hit, :msgs, :number_x, :wrong_seq, :is_ok, :q_index_start, :full_prot
7
+ attr_reader :output_seq, :final_hit, :msgs, :number_x, :is_ok, :q_index_start, :full_prot
8
8
 
9
- def initialize(q, query_fasta, pident_threshold)
10
-
11
- (mismas_ids_array, query_fasta, wrong_seq) = hits_misma_id(q, query_fasta)
12
-
13
- @wrong_seq = wrong_seq
9
+ def initialize(full_hit, query_fasta)
10
+ #puts 'BEG ___________________'
11
+ #full_hit.map{|hsp| puts hsp.inspect}
12
+ mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
13
+ #puts '..................'
14
14
  @mismas_ids_array = mismas_ids_array
15
- @msgs = ''
16
- @number_x = 0
17
- num_x = ''
18
-
15
+ @msgs = []
19
16
  @output_seq = query_fasta
20
-
21
- if (mismas_ids_array.count > 1)
22
- mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
23
- @final_hit = mismas_ids_array[0].dup
24
17
 
18
+ if mismas_ids_array.count > 1
19
+ mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
20
+ @final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
21
+ #puts @output_seq.length
22
+ #puts "\e[32m#{@final_hit.inspect}\e[0m"
25
23
  mismas_ids_array.each do |hit|
26
- if (hit.ident >= pident_threshold)
27
- # if ($verbose)
28
- # puts "#{hit.acc}\tsc:#{hit.score}\teval:#{hit.e_val}\tid:#{hit.ident}\tframe:#{hit.q_frame}\tqb:#{hit.q_beg + 1}\tqe:#{hit.q_end + 1}\tsb:#{hit.s_beg + 1}\tse:#{hit.s_end + 1}"
29
- # puts "#{query_fasta[hit.q_beg..hit.q_end].translate}"
30
- # end
31
-
32
- same = same_hit(hit)
33
-
34
- if (!same)
35
- if (@final_hit.q_frame == hit.q_frame)
36
- same_frame_hits(hit)
37
- else
38
- # condiciones para corregir el frame en el que tiene que continuar la seq de nt
39
- correccion_x = 0
40
- if (@final_hit.q_frame - hit.q_frame == 1)
41
- correccion_x = 1
42
- elsif (@final_hit.q_frame - hit.q_frame == 2)
43
- correccion_x = 2
44
- elsif (@final_hit.q_frame - hit.q_frame == -1)
45
- correccion_x = 2
46
- elsif (@final_hit.q_frame - hit.q_frame == -2)
47
- correccion_x = 1
48
- end
49
-
50
- # las secuencias solapan en el query
51
- if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
52
- overlapped_hits(hit, correccion_x, q)
53
- # puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
54
-
55
- elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
56
- separated_hits(hit, correccion_x, q)
57
- # puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
58
- else
59
- @msgs = 'warning!, putative chimeric sequence! or repetitive structure'
60
- # puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
61
- end
62
- end
24
+ #puts '.....', "\e[31m#{hit.inspect}\e[0m"
25
+ ##if @final_hit.q_frame == hit.q_frame #Same frame
26
+ ## puts "\e[33mSame Frame\e[0m"
27
+ ## same_frame_hits_query(hit)
28
+ if overlapping_hits?(hit) #Diff frame
29
+ if @msgs.empty?
30
+ @msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
31
+ else
32
+ @msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
63
33
  end
64
- end # pident
34
+ #puts "\e[33mOverlapped hits\e[0m"
35
+ overlapped_hits_query(hit)
36
+ elsif separated_hits?(hit) #Diff frame
37
+ if @msgs.empty?
38
+ @msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
39
+ else
40
+ @msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
41
+ end
42
+ #puts "\e[33mSeparated hits\e[0m"
43
+ separated_hits(hit) #Diff frame
44
+ end
45
+ #puts @output_seq.length
46
+ #puts '.....'
47
+ #puts "\e[32m#{@final_hit.inspect}\e[0m"
65
48
  end
66
-
67
49
  else
68
-
69
- @final_hit = mismas_ids_array[0].dup
70
-
71
- # if ($verbose)
72
- # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
73
- # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
74
- # end
50
+ @final_hit = mismas_ids_array.shift
75
51
  end
76
- # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
77
- # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
78
-
79
- @full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
80
- # puts "\nfull_prot_ulh: \n#{full_prot}"
81
-
82
- (@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
83
-
52
+ #puts 'END ___________________'
53
+ #puts @final_hit.inspect
84
54
  end
85
55
 
86
-
87
-
88
- def same_hit(hit)
89
-
90
- same = false
91
- if (hit.score == @final_hit.score &&
92
- hit.q_beg == @final_hit.q_beg &&
93
- hit.q_end == @final_hit.q_end &&
94
- hit.s_beg == @final_hit.s_beg &&
95
- hit.s_end == @final_hit.s_end)
96
-
97
- same = true
98
- end
99
-
100
- return same
56
+ def same_frame_hits_query(hit)
57
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
58
+ @final_hit.q_end = hit.q_end
59
+ @final_hit.s_end = hit.s_end
60
+ @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
101
61
  end
102
62
 
103
63
 
64
+ def overlapped_hits_query(hit)
65
+ overlapped_nts = @final_hit.q_end - hit.q_beg + 1
66
+ add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
67
+ @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
104
68
 
105
- def same_frame_hits(hit)
106
-
107
- @final_hit.q_beg = @final_hit.q_beg
108
- @final_hit.q_end = hit.q_end
109
-
110
- @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
111
- @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
69
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
70
+ @final_hit.q_end = hit.q_end + add_nt
112
71
 
72
+ @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
73
+ @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
74
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
75
+ @final_hit.q_len = @output_seq.length
113
76
  end
114
77
 
115
78
 
79
+ def separated_hits_query(hit)
80
+ separated_nts = hit.q_beg - @final_hit.q_end + 1
81
+ add_nt = ajust_nt(separated_nts) # Fix frame-shift
116
82
 
117
- def overlapped_hits(hit,correccion_x,q)
118
- # puts q.inspect
119
- # puts "los hits solapan!!!"
120
-
121
- if (@msgs.empty?)
122
- @msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
123
- else
124
- @msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
125
- end
126
-
127
- # ------------------------------------- preparamos los nt y aas que solapan
128
- overlapped_nt = 0
129
- overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)
130
-
131
- overlapped_aas = 0
132
- overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
133
- # -------------------------------------
134
- if (overlapped_nt % 3 == 1)
135
- overlapped_nt += 2
136
- elsif (overlapped_nt % 3 == 2)
137
- overlapped_nt += 1
138
- end
139
- # ------------------------------------- calculamos el numero de x a añadir
140
- @number_x = (((correccion_x + overlapped_nt)/3)+1)*3
141
- @number_x_aa = overlapped_aas
142
- num_x = ''
143
- num_x_aa = ''
83
+ @output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
84
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
144
85
 
145
- if (@number_x.to_i > 0)
146
- num_x = 'x'*@number_x.to_i
147
- elsif (@number_x.to_i == 0)
148
- num_x = ''
149
- else
150
- @msgs = "ERROR#2 unexpected negative index in x_number, "
151
- # puts "ERROR#2 unexpected negative index in x_number"
152
- end
86
+ @final_hit.q_end = hit.q_end + add_nt
87
+ @final_hit.s_end = hit.s_end
88
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
89
+ @final_hit.q_len = @output_seq.length
90
+ end
153
91
 
154
- if (@number_x_aa.to_i > 0)
155
- num_x_aa = 'x'*@number_x_aa.to_i
156
- elsif (@number_x_aa.to_i == 0)
157
- num_x_aa = ''
158
- else
159
- num_x_aa = 'x'*@number_x_aa.to_i.abs
160
- @msgs = "Warning!, your query overlaps and the subject is separated, "
161
- end
92
+ def same_frame_hits(hit)
93
+ add = (hit.s_beg - @final_hit.s_end) + 1
94
+ nt_add = add *3
95
+ @final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
96
+ @final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
97
+ @output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add + @output_seq[hit.q_beg+1..@output_seq.length-1]
98
+ @final_hit.q_end = hit.q_end
99
+ @final_hit.s_end = hit.s_end
100
+ @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
101
+ end
162
102
 
163
- if (@number_x_aa.to_i >= 0)
164
- @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.q_seq[overlapped_aas..hit.q_seq.length]}"
165
- @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.s_seq[overlapped_aas..hit.s_seq.length]}"
166
- else
167
- @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1]}#{num_x_aa}#{hit.q_seq[0..hit.q_seq.length]}"
168
- @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1]}#{num_x_aa}#{hit.s_seq[0..hit.s_seq.length]}"
103
+ def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
104
+ overlapped_aas = @final_hit.s_end - hit.s_beg + 1
105
+ overlapped_nts = @final_hit.q_end - hit.q_beg + 1
106
+ align_len_final_hit = @final_hit.q_seq.length
107
+ hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
108
+ hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
109
+ total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps añaden aa q no existen, x lo q han de descontarse
110
+ nt_discount = (overlapped_aas ) * 3
111
+
112
+ absolute_overlap = 1
113
+ if nt_discount < 0 #Not overlap on subject
114
+ overlapped_aas = overlapped_aas.abs
115
+ absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
116
+ nt_discount = @final_hit.q_end - hit.q_beg + 1
117
+ nt_discount += ajust_nt(nt_discount)
169
118
  end
170
-
171
-
172
- output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"
119
+ add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
120
+ @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
173
121
 
174
- full_prot_tmp = output_seq_tmp[@final_hit.q_frame-1, output_seq_tmp.length+1].translate
175
-
176
- (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
122
+ #q_seq and s_seq are aa sequences
123
+ final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
124
+ @final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
125
+ @final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]
177
126
 
178
- @output_seq = output_seq_tmp.dup
179
-
180
- @final_hit.q_beg = @final_hit.q_beg
181
127
  @final_hit.q_end = hit.q_end
182
128
 
183
- @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
184
- @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
185
-
129
+ @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
130
+ @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
131
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
186
132
  end
187
133
 
134
+ def separated_hits(hit)
135
+ number_x = hit.q_beg - @final_hit.q_end - 1
136
+ number_x += ajust_nt(number_x)
188
137
 
189
-
190
- def separated_hits(hit,correccion_x,q)
191
-
192
- # puts "los hits estan separados!!!"
193
-
194
- if (@msgs.empty?)
195
- @msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
196
- else
197
- @msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
198
- end
199
-
200
- # PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
201
- @number_x = (hit.q_beg - @final_hit.q_end - 1)
202
- # @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
203
- if (@number_x % 3 == 1)
204
- @number_x += 2
205
- elsif (@number_x % 3 == 2)
206
- @number_x += 1
207
- end
208
-
209
-
210
- if (@number_x.to_i > 0)
211
- num_x = 'x'*@number_x.to_i
212
- num_x_aa = 'x'*(@number_x.to_i/3)
213
- elsif (@number_x.to_i == 0)
214
- num_x = ''
215
- num_x_aa = ''
216
- else
217
- @msgs = "ERROR#2 unexpected negative index in x_number"
218
- # puts "ERROR#2 unexpected negative index in x_number"
138
+ num_x = ''
139
+ num_x_aa = ''
140
+ if number_x > 0
141
+ num_x = 'n'*number_x
142
+ num_x_aa = 'x'*(number_x/3)
219
143
  end
144
+ @output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
145
+ @final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
146
+ @final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]
220
147
 
221
- @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
222
- # @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
223
-
224
- @final_hit.score += 1
225
- @final_hit.q_beg = @final_hit.q_beg
226
148
  @final_hit.q_end = hit.q_end
227
- @final_hit.s_beg = @final_hit.s_beg
228
149
  @final_hit.s_end = hit.s_end
229
-
230
- @final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
231
- @final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"
232
-
233
- num_x = ''
234
- num_x_aa = ''
235
-
236
- full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
237
- # puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
238
- # puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
239
-
240
- (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
241
-
242
- # puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
150
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
243
151
  end
244
152
 
245
153
 
246
154
 
247
155
  # creamos un array en el que esten solo los hits con la misma id.
248
- def hits_misma_id(q, query_fasta_ori)
249
-
250
- # Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
251
- wrong_seq = false
252
-
156
+ def hits_misma_id(full_hit, query_fasta_ori)
157
+ # Se les hace la reverso complementaria si es necesario
253
158
  misma_id = []
254
-
255
159
  query_fasta = query_fasta_ori.dup
256
- frame_ori = q.hits[0].q_frame
257
-
258
- q.hits.each do |h|
259
-
260
- # puts "#{q.query_def} f_ori :#{frame_ori} y h_f: #{h.q_frame}"
261
- reversed_hit = false # con respecto al primer hit, que es el de mayor score o evalue
262
- if (h.acc == q.hits[0].acc)
263
-
264
- # comprobar si los frames tiene el mismo sentido
265
- if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
266
- wrong_seq = true
267
- reversed_hit = true
268
- # puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
269
- end
270
-
271
- if (reversed_hit == false)
272
- if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
273
- (query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
274
- h.reversed = true
160
+ #frame_ori = q.hits.first.q_frame
161
+
162
+ full_hit.each do |h|
163
+ if h.acc == full_hit.first.acc
164
+ # comprobar si los frames tienen el mismo sentido
165
+ #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
166
+ if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
167
+ query_fasta = reverse_seq(query_fasta_ori, h)
168
+ h.reversed = TRUE
275
169
  end
276
- misma_id.push h
277
- end
278
-
170
+ misma_id << h
171
+ #end
279
172
  end
173
+ end
174
+
175
+ return misma_id, query_fasta
176
+ end
280
177
 
178
+ def overlapping_hits?(hit)
179
+ overlap = FALSE
180
+ if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
181
+ overlap = TRUE
281
182
  end
183
+ return overlap
184
+ end
282
185
 
283
- return [misma_id, query_fasta, wrong_seq]
186
+ def separated_hits?(hit)
187
+ separated=FALSE
188
+ if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
189
+ separated = TRUE
190
+ end
191
+ return separated
284
192
  end
285
193
 
286
194
 
195
+
196
+ def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)
197
+ add=0
198
+ if nt % 3 == 1
199
+ add = 2
200
+ elsif nt % 3 == 2
201
+ add = 1
202
+ end
203
+ return add
204
+ end
287
205
  end