full_lengther_next 0.0.8 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/.gemtest +0 -0
  2. data/History.txt +2 -2
  3. data/Manifest.txt +33 -18
  4. data/Rakefile +4 -2
  5. data/bin/download_fln_dbs.rb +310 -158
  6. data/bin/full_lengther_next +160 -103
  7. data/bin/make_test_dataset.rb +236 -0
  8. data/bin/make_user_db.rb +101 -117
  9. data/bin/plot_fln.rb +270 -0
  10. data/bin/plot_taxonomy.rb +70 -0
  11. data/lib/expresscanvas.zip +0 -0
  12. data/lib/full_lengther_next.rb +3 -3
  13. data/lib/full_lengther_next/classes/artifacts.rb +66 -0
  14. data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
  15. data/lib/full_lengther_next/classes/cdhit.rb +154 -0
  16. data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
  17. data/lib/full_lengther_next/classes/common_functions.rb +105 -63
  18. data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
  19. data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
  20. data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
  21. data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
  22. data/lib/full_lengther_next/classes/handle_db.rb +30 -0
  23. data/lib/full_lengther_next/classes/my_worker.rb +308 -138
  24. data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
  25. data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
  26. data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
  27. data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
  28. data/lib/full_lengther_next/classes/reptrans.rb +210 -0
  29. data/lib/full_lengther_next/classes/sequence.rb +439 -80
  30. data/lib/full_lengther_next/classes/test_code.rb +15 -16
  31. data/lib/full_lengther_next/classes/types.rb +12 -0
  32. data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
  33. data/lib/full_lengther_next/classes/warnings.rb +40 -0
  34. metadata +207 -93
  35. data/lib/full_lengther_next/classes/lcs.rb +0 -33
  36. data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -18,16 +18,11 @@ class TestCode
18
18
  protein = ''
19
19
  p_long = 0
20
20
 
21
- if (seq.fasta_length < 200)
22
- ref_name = seq.seq_name
23
- ref_code = 0.0
24
- ref_frame = 0
25
- ref_status = 'unknown'
26
- ref_orf = ''
27
- ref_msgs = 'Sequence length < 200 nt'
28
-
29
- seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
30
- # seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
21
+ if seq.fasta_length < 200
22
+ seq.type = UNKNOWN
23
+ seq.test_code(0.0)
24
+ seq.warnings('<200nt')
25
+ seq.hit = [ref_start, ref_end, 0] #Last element is ref_frame
31
26
  else
32
27
 
33
28
  # para probar tescode con toda la secuencia, en lugar de con los ORFs ----------------------------------------------------------------------
@@ -43,11 +38,15 @@ class TestCode
43
38
 
44
39
 
45
40
  # see add_region filter
46
- (name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame) = t_code(seq)
47
- if (status == :unknown)
48
- seq.annotate(:tcode_unknown,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
41
+ name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame = t_code(seq)
42
+ seq.test_code(t_code)
43
+ seq.warnings(ref_msgs)
44
+ seq.hit = [ref_start, ref_end, ref_frame]
45
+
46
+ if status == :unknown
47
+ seq.type = UNKNOWN
49
48
  else
50
- seq.annotate(:tcode,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
49
+ seq.type = CODING
51
50
  end
52
51
 
53
52
  # if (ref_msgs.nil?)
@@ -157,7 +156,7 @@ class TestCode
157
156
  ref_end = 0
158
157
  ref_frame = 0
159
158
  ref_orf = ''
160
- ref_type = 'unknown'
159
+ ref_type = :unknown
161
160
  ref_msgs = 'Non coding ORF found >= 200 nt '
162
161
  return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false]
163
162
  end
@@ -772,7 +771,7 @@ class TestCode
772
771
  valueY = ((valueY*1000.0).round/1000.0);
773
772
 
774
773
  # return 'The TestCode value is <b>' + valueY.to_s + '</b>, which indicates that the sequence ' + getConclusion(valueY) + '.';
775
- return [valueY.to_s, getConclusion(valueY)]
774
+ return [valueY, getConclusion(valueY)]
776
775
  end
777
776
 
778
777
  def calcParam (valueOne,valueTwo,valueThree)
@@ -0,0 +1,12 @@
1
+ FAILED = -4
2
+ OTHER = -3
3
+ CHIMERA = -2
4
+ MISASSEMBLED = -1
5
+ UNKNOWN = 0
6
+ COMPLETE = 1
7
+ N_TERMINAL = 2
8
+ C_TERMINAL = 3
9
+ INTERNAL = 4
10
+ NCRNA = 5
11
+ CODING = 6
12
+
@@ -4,284 +4,202 @@ include CommonFunctions
4
4
 
5
5
  class UneLosHit
6
6
 
7
- attr_reader :output_seq, :final_hit, :msgs, :number_x, :wrong_seq, :is_ok, :q_index_start, :full_prot
7
+ attr_reader :output_seq, :final_hit, :msgs, :number_x, :is_ok, :q_index_start, :full_prot
8
8
 
9
- def initialize(q, query_fasta, pident_threshold)
10
-
11
- (mismas_ids_array, query_fasta, wrong_seq) = hits_misma_id(q, query_fasta)
12
-
13
- @wrong_seq = wrong_seq
9
+ def initialize(full_hit, query_fasta)
10
+ #puts 'BEG ___________________'
11
+ #full_hit.map{|hsp| puts hsp.inspect}
12
+ mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
13
+ #puts '..................'
14
14
  @mismas_ids_array = mismas_ids_array
15
- @msgs = ''
16
- @number_x = 0
17
- num_x = ''
18
-
15
+ @msgs = []
19
16
  @output_seq = query_fasta
20
-
21
- if (mismas_ids_array.count > 1)
22
- mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
23
- @final_hit = mismas_ids_array[0].dup
24
17
 
18
+ if mismas_ids_array.count > 1
19
+ mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
20
+ @final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
21
+ #puts @output_seq.length
22
+ #puts "\e[32m#{@final_hit.inspect}\e[0m"
25
23
  mismas_ids_array.each do |hit|
26
- if (hit.ident >= pident_threshold)
27
- # if ($verbose)
28
- # puts "#{hit.acc}\tsc:#{hit.score}\teval:#{hit.e_val}\tid:#{hit.ident}\tframe:#{hit.q_frame}\tqb:#{hit.q_beg + 1}\tqe:#{hit.q_end + 1}\tsb:#{hit.s_beg + 1}\tse:#{hit.s_end + 1}"
29
- # puts "#{query_fasta[hit.q_beg..hit.q_end].translate}"
30
- # end
31
-
32
- same = same_hit(hit)
33
-
34
- if (!same)
35
- if (@final_hit.q_frame == hit.q_frame)
36
- same_frame_hits(hit)
37
- else
38
- # condiciones para corregir el frame en el que tiene que continuar la seq de nt
39
- correccion_x = 0
40
- if (@final_hit.q_frame - hit.q_frame == 1)
41
- correccion_x = 1
42
- elsif (@final_hit.q_frame - hit.q_frame == 2)
43
- correccion_x = 2
44
- elsif (@final_hit.q_frame - hit.q_frame == -1)
45
- correccion_x = 2
46
- elsif (@final_hit.q_frame - hit.q_frame == -2)
47
- correccion_x = 1
48
- end
49
-
50
- # las secuencias solapan en el query
51
- if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
52
- overlapped_hits(hit, correccion_x, q)
53
- # puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
54
-
55
- elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
56
- separated_hits(hit, correccion_x, q)
57
- # puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
58
- else
59
- @msgs = 'warning!, putative chimeric sequence! or repetitive structure'
60
- # puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
61
- end
62
- end
24
+ #puts '.....', "\e[31m#{hit.inspect}\e[0m"
25
+ ##if @final_hit.q_frame == hit.q_frame #Same frame
26
+ ## puts "\e[33mSame Frame\e[0m"
27
+ ## same_frame_hits_query(hit)
28
+ if overlapping_hits?(hit) #Diff frame
29
+ if @msgs.empty?
30
+ @msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
31
+ else
32
+ @msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
63
33
  end
64
- end # pident
34
+ #puts "\e[33mOverlapped hits\e[0m"
35
+ overlapped_hits_query(hit)
36
+ elsif separated_hits?(hit) #Diff frame
37
+ if @msgs.empty?
38
+ @msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
39
+ else
40
+ @msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
41
+ end
42
+ #puts "\e[33mSeparated hits\e[0m"
43
+ separated_hits(hit) #Diff frame
44
+ end
45
+ #puts @output_seq.length
46
+ #puts '.....'
47
+ #puts "\e[32m#{@final_hit.inspect}\e[0m"
65
48
  end
66
-
67
49
  else
68
-
69
- @final_hit = mismas_ids_array[0].dup
70
-
71
- # if ($verbose)
72
- # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
73
- # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
74
- # end
50
+ @final_hit = mismas_ids_array.shift
75
51
  end
76
- # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
77
- # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
78
-
79
- @full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
80
- # puts "\nfull_prot_ulh: \n#{full_prot}"
81
-
82
- (@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
83
-
52
+ #puts 'END ___________________'
53
+ #puts @final_hit.inspect
84
54
  end
85
55
 
86
-
87
-
88
- def same_hit(hit)
89
-
90
- same = false
91
- if (hit.score == @final_hit.score &&
92
- hit.q_beg == @final_hit.q_beg &&
93
- hit.q_end == @final_hit.q_end &&
94
- hit.s_beg == @final_hit.s_beg &&
95
- hit.s_end == @final_hit.s_end)
96
-
97
- same = true
98
- end
99
-
100
- return same
56
+ def same_frame_hits_query(hit)
57
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
58
+ @final_hit.q_end = hit.q_end
59
+ @final_hit.s_end = hit.s_end
60
+ @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
101
61
  end
102
62
 
103
63
 
64
+ def overlapped_hits_query(hit)
65
+ overlapped_nts = @final_hit.q_end - hit.q_beg + 1
66
+ add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
67
+ @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
104
68
 
105
- def same_frame_hits(hit)
106
-
107
- @final_hit.q_beg = @final_hit.q_beg
108
- @final_hit.q_end = hit.q_end
109
-
110
- @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
111
- @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
69
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
70
+ @final_hit.q_end = hit.q_end + add_nt
112
71
 
72
+ @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
73
+ @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
74
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
75
+ @final_hit.q_len = @output_seq.length
113
76
  end
114
77
 
115
78
 
79
+ def separated_hits_query(hit)
80
+ separated_nts = hit.q_beg - @final_hit.q_end + 1
81
+ add_nt = ajust_nt(separated_nts) # Fix frame-shift
116
82
 
117
- def overlapped_hits(hit,correccion_x,q)
118
- # puts q.inspect
119
- # puts "los hits solapan!!!"
120
-
121
- if (@msgs.empty?)
122
- @msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
123
- else
124
- @msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
125
- end
126
-
127
- # ------------------------------------- preparamos los nt y aas que solapan
128
- overlapped_nt = 0
129
- overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)
130
-
131
- overlapped_aas = 0
132
- overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
133
- # -------------------------------------
134
- if (overlapped_nt % 3 == 1)
135
- overlapped_nt += 2
136
- elsif (overlapped_nt % 3 == 2)
137
- overlapped_nt += 1
138
- end
139
- # ------------------------------------- calculamos el numero de x a añadir
140
- @number_x = (((correccion_x + overlapped_nt)/3)+1)*3
141
- @number_x_aa = overlapped_aas
142
- num_x = ''
143
- num_x_aa = ''
83
+ @output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
84
+ @final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
144
85
 
145
- if (@number_x.to_i > 0)
146
- num_x = 'x'*@number_x.to_i
147
- elsif (@number_x.to_i == 0)
148
- num_x = ''
149
- else
150
- @msgs = "ERROR#2 unexpected negative index in x_number, "
151
- # puts "ERROR#2 unexpected negative index in x_number"
152
- end
86
+ @final_hit.q_end = hit.q_end + add_nt
87
+ @final_hit.s_end = hit.s_end
88
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
89
+ @final_hit.q_len = @output_seq.length
90
+ end
153
91
 
154
- if (@number_x_aa.to_i > 0)
155
- num_x_aa = 'x'*@number_x_aa.to_i
156
- elsif (@number_x_aa.to_i == 0)
157
- num_x_aa = ''
158
- else
159
- num_x_aa = 'x'*@number_x_aa.to_i.abs
160
- @msgs = "Warning!, your query overlaps and the subject is separated, "
161
- end
92
+ def same_frame_hits(hit)
93
+ add = (hit.s_beg - @final_hit.s_end) + 1
94
+ nt_add = add *3
95
+ @final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
96
+ @final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
97
+ @output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add + @output_seq[hit.q_beg+1..@output_seq.length-1]
98
+ @final_hit.q_end = hit.q_end
99
+ @final_hit.s_end = hit.s_end
100
+ @final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
101
+ end
162
102
 
163
- if (@number_x_aa.to_i >= 0)
164
- @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.q_seq[overlapped_aas..hit.q_seq.length]}"
165
- @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.s_seq[overlapped_aas..hit.s_seq.length]}"
166
- else
167
- @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1]}#{num_x_aa}#{hit.q_seq[0..hit.q_seq.length]}"
168
- @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1]}#{num_x_aa}#{hit.s_seq[0..hit.s_seq.length]}"
103
+ def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
104
+ overlapped_aas = @final_hit.s_end - hit.s_beg + 1
105
+ overlapped_nts = @final_hit.q_end - hit.q_beg + 1
106
+ align_len_final_hit = @final_hit.q_seq.length
107
+ hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
108
+ hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
109
+ total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps añaden aa q no existen, x lo q han de descontarse
110
+ nt_discount = (overlapped_aas ) * 3
111
+
112
+ absolute_overlap = 1
113
+ if nt_discount < 0 #Not overlap on subject
114
+ overlapped_aas = overlapped_aas.abs
115
+ absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
116
+ nt_discount = @final_hit.q_end - hit.q_beg + 1
117
+ nt_discount += ajust_nt(nt_discount)
169
118
  end
170
-
171
-
172
- output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"
119
+ add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
120
+ @output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
173
121
 
174
- full_prot_tmp = output_seq_tmp[@final_hit.q_frame-1, output_seq_tmp.length+1].translate
175
-
176
- (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
122
+ #q_seq and s_seq are aa sequences
123
+ final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
124
+ @final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
125
+ @final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]
177
126
 
178
- @output_seq = output_seq_tmp.dup
179
-
180
- @final_hit.q_beg = @final_hit.q_beg
181
127
  @final_hit.q_end = hit.q_end
182
128
 
183
- @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
184
- @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
185
-
129
+ @final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
130
+ @final_hit.s_end = [@final_hit.s_end, hit.s_end].max
131
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
186
132
  end
187
133
 
134
+ def separated_hits(hit)
135
+ number_x = hit.q_beg - @final_hit.q_end - 1
136
+ number_x += ajust_nt(number_x)
188
137
 
189
-
190
- def separated_hits(hit,correccion_x,q)
191
-
192
- # puts "los hits estan separados!!!"
193
-
194
- if (@msgs.empty?)
195
- @msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
196
- else
197
- @msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
198
- end
199
-
200
- # PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
201
- @number_x = (hit.q_beg - @final_hit.q_end - 1)
202
- # @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
203
- if (@number_x % 3 == 1)
204
- @number_x += 2
205
- elsif (@number_x % 3 == 2)
206
- @number_x += 1
207
- end
208
-
209
-
210
- if (@number_x.to_i > 0)
211
- num_x = 'x'*@number_x.to_i
212
- num_x_aa = 'x'*(@number_x.to_i/3)
213
- elsif (@number_x.to_i == 0)
214
- num_x = ''
215
- num_x_aa = ''
216
- else
217
- @msgs = "ERROR#2 unexpected negative index in x_number"
218
- # puts "ERROR#2 unexpected negative index in x_number"
138
+ num_x = ''
139
+ num_x_aa = ''
140
+ if number_x > 0
141
+ num_x = 'n'*number_x
142
+ num_x_aa = 'x'*(number_x/3)
219
143
  end
144
+ @output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
145
+ @final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
146
+ @final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]
220
147
 
221
- @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
222
- # @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
223
-
224
- @final_hit.score += 1
225
- @final_hit.q_beg = @final_hit.q_beg
226
148
  @final_hit.q_end = hit.q_end
227
- @final_hit.s_beg = @final_hit.s_beg
228
149
  @final_hit.s_end = hit.s_end
229
-
230
- @final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
231
- @final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"
232
-
233
- num_x = ''
234
- num_x_aa = ''
235
-
236
- full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
237
- # puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
238
- # puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
239
-
240
- (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
241
-
242
- # puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
150
+ @final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
243
151
  end
244
152
 
245
153
 
246
154
 
247
155
  # creamos un array en el que esten solo los hits con la misma id.
248
- def hits_misma_id(q, query_fasta_ori)
249
-
250
- # Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
251
- wrong_seq = false
252
-
156
+ def hits_misma_id(full_hit, query_fasta_ori)
157
+ # Se les hace la reverso complementaria si es necesario
253
158
  misma_id = []
254
-
255
159
  query_fasta = query_fasta_ori.dup
256
- frame_ori = q.hits[0].q_frame
257
-
258
- q.hits.each do |h|
259
-
260
- # puts "#{q.query_def} f_ori :#{frame_ori} y h_f: #{h.q_frame}"
261
- reversed_hit = false # con respecto al primer hit, que es el de mayor score o evalue
262
- if (h.acc == q.hits[0].acc)
263
-
264
- # comprobar si los frames tiene el mismo sentido
265
- if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
266
- wrong_seq = true
267
- reversed_hit = true
268
- # puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
269
- end
270
-
271
- if (reversed_hit == false)
272
- if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
273
- (query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
274
- h.reversed = true
160
+ #frame_ori = q.hits.first.q_frame
161
+
162
+ full_hit.each do |h|
163
+ if h.acc == full_hit.first.acc
164
+ # comprobar si los frames tienen el mismo sentido
165
+ #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
166
+ if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
167
+ query_fasta = reverse_seq(query_fasta_ori, h)
168
+ h.reversed = TRUE
275
169
  end
276
- misma_id.push h
277
- end
278
-
170
+ misma_id << h
171
+ #end
279
172
  end
173
+ end
174
+
175
+ return misma_id, query_fasta
176
+ end
280
177
 
178
+ def overlapping_hits?(hit)
179
+ overlap = FALSE
180
+ if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
181
+ overlap = TRUE
281
182
  end
183
+ return overlap
184
+ end
282
185
 
283
- return [misma_id, query_fasta, wrong_seq]
186
+ def separated_hits?(hit)
187
+ separated=FALSE
188
+ if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
189
+ separated = TRUE
190
+ end
191
+ return separated
284
192
  end
285
193
 
286
194
 
195
+
196
+ def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)
197
+ add=0
198
+ if nt % 3 == 1
199
+ add = 2
200
+ elsif nt % 3 == 2
201
+ add = 1
202
+ end
203
+ return add
204
+ end
287
205
  end