full_lengther_next 0.0.8 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.txt +2 -2
- data/Manifest.txt +33 -18
- data/Rakefile +4 -2
- data/bin/download_fln_dbs.rb +310 -158
- data/bin/full_lengther_next +160 -103
- data/bin/make_test_dataset.rb +236 -0
- data/bin/make_user_db.rb +101 -117
- data/bin/plot_fln.rb +270 -0
- data/bin/plot_taxonomy.rb +70 -0
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next.rb +3 -3
- data/lib/full_lengther_next/classes/artifacts.rb +66 -0
- data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
- data/lib/full_lengther_next/classes/cdhit.rb +154 -0
- data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
- data/lib/full_lengther_next/classes/common_functions.rb +105 -63
- data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
- data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
- data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
- data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
- data/lib/full_lengther_next/classes/handle_db.rb +30 -0
- data/lib/full_lengther_next/classes/my_worker.rb +308 -138
- data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
- data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
- data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
- data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
- data/lib/full_lengther_next/classes/reptrans.rb +210 -0
- data/lib/full_lengther_next/classes/sequence.rb +439 -80
- data/lib/full_lengther_next/classes/test_code.rb +15 -16
- data/lib/full_lengther_next/classes/types.rb +12 -0
- data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
- data/lib/full_lengther_next/classes/warnings.rb +40 -0
- metadata +207 -93
- data/lib/full_lengther_next/classes/lcs.rb +0 -33
- data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -18,16 +18,11 @@ class TestCode
|
|
18
18
|
protein = ''
|
19
19
|
p_long = 0
|
20
20
|
|
21
|
-
if
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
ref_orf = ''
|
27
|
-
ref_msgs = 'Sequence length < 200 nt'
|
28
|
-
|
29
|
-
seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
|
30
|
-
# seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
|
21
|
+
if seq.fasta_length < 200
|
22
|
+
seq.type = UNKNOWN
|
23
|
+
seq.test_code(0.0)
|
24
|
+
seq.warnings('<200nt')
|
25
|
+
seq.hit = [ref_start, ref_end, 0] #Last element is ref_frame
|
31
26
|
else
|
32
27
|
|
33
28
|
# para probar tescode con toda la secuencia, en lugar de con los ORFs ----------------------------------------------------------------------
|
@@ -43,11 +38,15 @@ class TestCode
|
|
43
38
|
|
44
39
|
|
45
40
|
# see add_region filter
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame = t_code(seq)
|
42
|
+
seq.test_code(t_code)
|
43
|
+
seq.warnings(ref_msgs)
|
44
|
+
seq.hit = [ref_start, ref_end, ref_frame]
|
45
|
+
|
46
|
+
if status == :unknown
|
47
|
+
seq.type = UNKNOWN
|
49
48
|
else
|
50
|
-
seq.
|
49
|
+
seq.type = CODING
|
51
50
|
end
|
52
51
|
|
53
52
|
# if (ref_msgs.nil?)
|
@@ -157,7 +156,7 @@ class TestCode
|
|
157
156
|
ref_end = 0
|
158
157
|
ref_frame = 0
|
159
158
|
ref_orf = ''
|
160
|
-
ref_type =
|
159
|
+
ref_type = :unknown
|
161
160
|
ref_msgs = 'Non coding ORF found >= 200 nt '
|
162
161
|
return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false]
|
163
162
|
end
|
@@ -772,7 +771,7 @@ class TestCode
|
|
772
771
|
valueY = ((valueY*1000.0).round/1000.0);
|
773
772
|
|
774
773
|
# return 'The TestCode value is <b>' + valueY.to_s + '</b>, which indicates that the sequence ' + getConclusion(valueY) + '.';
|
775
|
-
return [valueY
|
774
|
+
return [valueY, getConclusion(valueY)]
|
776
775
|
end
|
777
776
|
|
778
777
|
def calcParam (valueOne,valueTwo,valueThree)
|
@@ -4,284 +4,202 @@ include CommonFunctions
|
|
4
4
|
|
5
5
|
class UneLosHit
|
6
6
|
|
7
|
-
attr_reader :output_seq, :final_hit, :msgs, :number_x, :
|
7
|
+
attr_reader :output_seq, :final_hit, :msgs, :number_x, :is_ok, :q_index_start, :full_prot
|
8
8
|
|
9
|
-
def initialize(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
def initialize(full_hit, query_fasta)
|
10
|
+
#puts 'BEG ___________________'
|
11
|
+
#full_hit.map{|hsp| puts hsp.inspect}
|
12
|
+
mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
|
13
|
+
#puts '..................'
|
14
14
|
@mismas_ids_array = mismas_ids_array
|
15
|
-
@msgs =
|
16
|
-
@number_x = 0
|
17
|
-
num_x = ''
|
18
|
-
|
15
|
+
@msgs = []
|
19
16
|
@output_seq = query_fasta
|
20
|
-
|
21
|
-
if (mismas_ids_array.count > 1)
|
22
|
-
mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
|
23
|
-
@final_hit = mismas_ids_array[0].dup
|
24
17
|
|
18
|
+
if mismas_ids_array.count > 1
|
19
|
+
mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
|
20
|
+
@final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
|
21
|
+
#puts @output_seq.length
|
22
|
+
#puts "\e[32m#{@final_hit.inspect}\e[0m"
|
25
23
|
mismas_ids_array.each do |hit|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if (@final_hit.q_frame == hit.q_frame)
|
36
|
-
same_frame_hits(hit)
|
37
|
-
else
|
38
|
-
# condiciones para corregir el frame en el que tiene que continuar la seq de nt
|
39
|
-
correccion_x = 0
|
40
|
-
if (@final_hit.q_frame - hit.q_frame == 1)
|
41
|
-
correccion_x = 1
|
42
|
-
elsif (@final_hit.q_frame - hit.q_frame == 2)
|
43
|
-
correccion_x = 2
|
44
|
-
elsif (@final_hit.q_frame - hit.q_frame == -1)
|
45
|
-
correccion_x = 2
|
46
|
-
elsif (@final_hit.q_frame - hit.q_frame == -2)
|
47
|
-
correccion_x = 1
|
48
|
-
end
|
49
|
-
|
50
|
-
# las secuencias solapan en el query
|
51
|
-
if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
|
52
|
-
overlapped_hits(hit, correccion_x, q)
|
53
|
-
# puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
|
54
|
-
|
55
|
-
elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
|
56
|
-
separated_hits(hit, correccion_x, q)
|
57
|
-
# puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
|
58
|
-
else
|
59
|
-
@msgs = 'warning!, putative chimeric sequence! or repetitive structure'
|
60
|
-
# puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
|
61
|
-
end
|
62
|
-
end
|
24
|
+
#puts '.....', "\e[31m#{hit.inspect}\e[0m"
|
25
|
+
##if @final_hit.q_frame == hit.q_frame #Same frame
|
26
|
+
## puts "\e[33mSame Frame\e[0m"
|
27
|
+
## same_frame_hits_query(hit)
|
28
|
+
if overlapping_hits?(hit) #Diff frame
|
29
|
+
if @msgs.empty?
|
30
|
+
@msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
31
|
+
else
|
32
|
+
@msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
63
33
|
end
|
64
|
-
|
34
|
+
#puts "\e[33mOverlapped hits\e[0m"
|
35
|
+
overlapped_hits_query(hit)
|
36
|
+
elsif separated_hits?(hit) #Diff frame
|
37
|
+
if @msgs.empty?
|
38
|
+
@msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
39
|
+
else
|
40
|
+
@msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
41
|
+
end
|
42
|
+
#puts "\e[33mSeparated hits\e[0m"
|
43
|
+
separated_hits(hit) #Diff frame
|
44
|
+
end
|
45
|
+
#puts @output_seq.length
|
46
|
+
#puts '.....'
|
47
|
+
#puts "\e[32m#{@final_hit.inspect}\e[0m"
|
65
48
|
end
|
66
|
-
|
67
49
|
else
|
68
|
-
|
69
|
-
@final_hit = mismas_ids_array[0].dup
|
70
|
-
|
71
|
-
# if ($verbose)
|
72
|
-
# puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
|
73
|
-
# puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
|
74
|
-
# end
|
50
|
+
@final_hit = mismas_ids_array.shift
|
75
51
|
end
|
76
|
-
#
|
77
|
-
#
|
78
|
-
|
79
|
-
@full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
|
80
|
-
# puts "\nfull_prot_ulh: \n#{full_prot}"
|
81
|
-
|
82
|
-
(@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
|
83
|
-
|
52
|
+
#puts 'END ___________________'
|
53
|
+
#puts @final_hit.inspect
|
84
54
|
end
|
85
55
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if (hit.score == @final_hit.score &&
|
92
|
-
hit.q_beg == @final_hit.q_beg &&
|
93
|
-
hit.q_end == @final_hit.q_end &&
|
94
|
-
hit.s_beg == @final_hit.s_beg &&
|
95
|
-
hit.s_end == @final_hit.s_end)
|
96
|
-
|
97
|
-
same = true
|
98
|
-
end
|
99
|
-
|
100
|
-
return same
|
56
|
+
def same_frame_hits_query(hit)
|
57
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
|
58
|
+
@final_hit.q_end = hit.q_end
|
59
|
+
@final_hit.s_end = hit.s_end
|
60
|
+
@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
|
101
61
|
end
|
102
62
|
|
103
63
|
|
64
|
+
def overlapped_hits_query(hit)
|
65
|
+
overlapped_nts = @final_hit.q_end - hit.q_beg + 1
|
66
|
+
add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
|
67
|
+
@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
|
104
68
|
|
105
|
-
|
106
|
-
|
107
|
-
@final_hit.q_beg = @final_hit.q_beg
|
108
|
-
@final_hit.q_end = hit.q_end
|
109
|
-
|
110
|
-
@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
|
111
|
-
@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
|
69
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
|
70
|
+
@final_hit.q_end = hit.q_end + add_nt
|
112
71
|
|
72
|
+
@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
|
73
|
+
@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
|
74
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
75
|
+
@final_hit.q_len = @output_seq.length
|
113
76
|
end
|
114
77
|
|
115
78
|
|
79
|
+
def separated_hits_query(hit)
|
80
|
+
separated_nts = hit.q_beg - @final_hit.q_end + 1
|
81
|
+
add_nt = ajust_nt(separated_nts) # Fix frame-shift
|
116
82
|
|
117
|
-
|
118
|
-
|
119
|
-
# puts "los hits solapan!!!"
|
120
|
-
|
121
|
-
if (@msgs.empty?)
|
122
|
-
@msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
123
|
-
else
|
124
|
-
@msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
125
|
-
end
|
126
|
-
|
127
|
-
# ------------------------------------- preparamos los nt y aas que solapan
|
128
|
-
overlapped_nt = 0
|
129
|
-
overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)
|
130
|
-
|
131
|
-
overlapped_aas = 0
|
132
|
-
overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
|
133
|
-
# -------------------------------------
|
134
|
-
if (overlapped_nt % 3 == 1)
|
135
|
-
overlapped_nt += 2
|
136
|
-
elsif (overlapped_nt % 3 == 2)
|
137
|
-
overlapped_nt += 1
|
138
|
-
end
|
139
|
-
# ------------------------------------- calculamos el numero de x a añadir
|
140
|
-
@number_x = (((correccion_x + overlapped_nt)/3)+1)*3
|
141
|
-
@number_x_aa = overlapped_aas
|
142
|
-
num_x = ''
|
143
|
-
num_x_aa = ''
|
83
|
+
@output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
|
84
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
|
144
85
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
@msgs = "ERROR#2 unexpected negative index in x_number, "
|
151
|
-
# puts "ERROR#2 unexpected negative index in x_number"
|
152
|
-
end
|
86
|
+
@final_hit.q_end = hit.q_end + add_nt
|
87
|
+
@final_hit.s_end = hit.s_end
|
88
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
89
|
+
@final_hit.q_len = @output_seq.length
|
90
|
+
end
|
153
91
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
92
|
+
def same_frame_hits(hit)
|
93
|
+
add = (hit.s_beg - @final_hit.s_end) + 1
|
94
|
+
nt_add = add *3
|
95
|
+
@final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
|
96
|
+
@final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
|
97
|
+
@output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add + @output_seq[hit.q_beg+1..@output_seq.length-1]
|
98
|
+
@final_hit.q_end = hit.q_end
|
99
|
+
@final_hit.s_end = hit.s_end
|
100
|
+
@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
|
101
|
+
end
|
162
102
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
103
|
+
def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
|
104
|
+
overlapped_aas = @final_hit.s_end - hit.s_beg + 1
|
105
|
+
overlapped_nts = @final_hit.q_end - hit.q_beg + 1
|
106
|
+
align_len_final_hit = @final_hit.q_seq.length
|
107
|
+
hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
|
108
|
+
hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
|
109
|
+
total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps añaden aa q no existen, x lo q han de descontarse
|
110
|
+
nt_discount = (overlapped_aas ) * 3
|
111
|
+
|
112
|
+
absolute_overlap = 1
|
113
|
+
if nt_discount < 0 #Not overlap on subject
|
114
|
+
overlapped_aas = overlapped_aas.abs
|
115
|
+
absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
|
116
|
+
nt_discount = @final_hit.q_end - hit.q_beg + 1
|
117
|
+
nt_discount += ajust_nt(nt_discount)
|
169
118
|
end
|
170
|
-
|
171
|
-
|
172
|
-
output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"
|
119
|
+
add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
|
120
|
+
@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
|
173
121
|
|
174
|
-
|
175
|
-
|
176
|
-
|
122
|
+
#q_seq and s_seq are aa sequences
|
123
|
+
final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
|
124
|
+
@final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
|
125
|
+
@final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]
|
177
126
|
|
178
|
-
@output_seq = output_seq_tmp.dup
|
179
|
-
|
180
|
-
@final_hit.q_beg = @final_hit.q_beg
|
181
127
|
@final_hit.q_end = hit.q_end
|
182
128
|
|
183
|
-
@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
|
184
|
-
@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
|
185
|
-
|
129
|
+
@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
|
130
|
+
@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
|
131
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
186
132
|
end
|
187
133
|
|
134
|
+
def separated_hits(hit)
|
135
|
+
number_x = hit.q_beg - @final_hit.q_end - 1
|
136
|
+
number_x += ajust_nt(number_x)
|
188
137
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
if (@msgs.empty?)
|
195
|
-
@msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
196
|
-
else
|
197
|
-
@msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
198
|
-
end
|
199
|
-
|
200
|
-
# PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
|
201
|
-
@number_x = (hit.q_beg - @final_hit.q_end - 1)
|
202
|
-
# @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
|
203
|
-
if (@number_x % 3 == 1)
|
204
|
-
@number_x += 2
|
205
|
-
elsif (@number_x % 3 == 2)
|
206
|
-
@number_x += 1
|
207
|
-
end
|
208
|
-
|
209
|
-
|
210
|
-
if (@number_x.to_i > 0)
|
211
|
-
num_x = 'x'*@number_x.to_i
|
212
|
-
num_x_aa = 'x'*(@number_x.to_i/3)
|
213
|
-
elsif (@number_x.to_i == 0)
|
214
|
-
num_x = ''
|
215
|
-
num_x_aa = ''
|
216
|
-
else
|
217
|
-
@msgs = "ERROR#2 unexpected negative index in x_number"
|
218
|
-
# puts "ERROR#2 unexpected negative index in x_number"
|
138
|
+
num_x = ''
|
139
|
+
num_x_aa = ''
|
140
|
+
if number_x > 0
|
141
|
+
num_x = 'n'*number_x
|
142
|
+
num_x_aa = 'x'*(number_x/3)
|
219
143
|
end
|
144
|
+
@output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
|
145
|
+
@final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
|
146
|
+
@final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]
|
220
147
|
|
221
|
-
@output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
|
222
|
-
# @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
|
223
|
-
|
224
|
-
@final_hit.score += 1
|
225
|
-
@final_hit.q_beg = @final_hit.q_beg
|
226
148
|
@final_hit.q_end = hit.q_end
|
227
|
-
@final_hit.s_beg = @final_hit.s_beg
|
228
149
|
@final_hit.s_end = hit.s_end
|
229
|
-
|
230
|
-
@final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
|
231
|
-
@final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"
|
232
|
-
|
233
|
-
num_x = ''
|
234
|
-
num_x_aa = ''
|
235
|
-
|
236
|
-
full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
|
237
|
-
# puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
|
238
|
-
# puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
|
239
|
-
|
240
|
-
(is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
|
241
|
-
|
242
|
-
# puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
|
150
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
243
151
|
end
|
244
152
|
|
245
153
|
|
246
154
|
|
247
155
|
# creamos un array en el que esten solo los hits con la misma id.
|
248
|
-
def hits_misma_id(
|
249
|
-
|
250
|
-
# Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
|
251
|
-
wrong_seq = false
|
252
|
-
|
156
|
+
def hits_misma_id(full_hit, query_fasta_ori)
|
157
|
+
# Se les hace la reverso complementaria si es necesario
|
253
158
|
misma_id = []
|
254
|
-
|
255
159
|
query_fasta = query_fasta_ori.dup
|
256
|
-
frame_ori = q.hits
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
|
266
|
-
wrong_seq = true
|
267
|
-
reversed_hit = true
|
268
|
-
# puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
|
269
|
-
end
|
270
|
-
|
271
|
-
if (reversed_hit == false)
|
272
|
-
if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
|
273
|
-
(query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
|
274
|
-
h.reversed = true
|
160
|
+
#frame_ori = q.hits.first.q_frame
|
161
|
+
|
162
|
+
full_hit.each do |h|
|
163
|
+
if h.acc == full_hit.first.acc
|
164
|
+
# comprobar si los frames tienen el mismo sentido
|
165
|
+
#if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
|
166
|
+
if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
|
167
|
+
query_fasta = reverse_seq(query_fasta_ori, h)
|
168
|
+
h.reversed = TRUE
|
275
169
|
end
|
276
|
-
misma_id
|
277
|
-
end
|
278
|
-
|
170
|
+
misma_id << h
|
171
|
+
#end
|
279
172
|
end
|
173
|
+
end
|
174
|
+
|
175
|
+
return misma_id, query_fasta
|
176
|
+
end
|
280
177
|
|
178
|
+
def overlapping_hits?(hit)
|
179
|
+
overlap = FALSE
|
180
|
+
if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
|
181
|
+
overlap = TRUE
|
281
182
|
end
|
183
|
+
return overlap
|
184
|
+
end
|
282
185
|
|
283
|
-
|
186
|
+
def separated_hits?(hit)
|
187
|
+
separated=FALSE
|
188
|
+
if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
|
189
|
+
separated = TRUE
|
190
|
+
end
|
191
|
+
return separated
|
284
192
|
end
|
285
193
|
|
286
194
|
|
195
|
+
|
196
|
+
def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)
|
197
|
+
add=0
|
198
|
+
if nt % 3 == 1
|
199
|
+
add = 2
|
200
|
+
elsif nt % 3 == 2
|
201
|
+
add = 1
|
202
|
+
end
|
203
|
+
return add
|
204
|
+
end
|
287
205
|
end
|