full_lengther_next 0.0.8 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.txt +2 -2
- data/Manifest.txt +33 -18
- data/Rakefile +4 -2
- data/bin/download_fln_dbs.rb +310 -158
- data/bin/full_lengther_next +160 -103
- data/bin/make_test_dataset.rb +236 -0
- data/bin/make_user_db.rb +101 -117
- data/bin/plot_fln.rb +270 -0
- data/bin/plot_taxonomy.rb +70 -0
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next.rb +3 -3
- data/lib/full_lengther_next/classes/artifacts.rb +66 -0
- data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
- data/lib/full_lengther_next/classes/cdhit.rb +154 -0
- data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
- data/lib/full_lengther_next/classes/common_functions.rb +105 -63
- data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
- data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
- data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
- data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
- data/lib/full_lengther_next/classes/handle_db.rb +30 -0
- data/lib/full_lengther_next/classes/my_worker.rb +308 -138
- data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
- data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
- data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
- data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
- data/lib/full_lengther_next/classes/reptrans.rb +210 -0
- data/lib/full_lengther_next/classes/sequence.rb +439 -80
- data/lib/full_lengther_next/classes/test_code.rb +15 -16
- data/lib/full_lengther_next/classes/types.rb +12 -0
- data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
- data/lib/full_lengther_next/classes/warnings.rb +40 -0
- metadata +207 -93
- data/lib/full_lengther_next/classes/lcs.rb +0 -33
- data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -18,16 +18,11 @@ class TestCode
|
|
18
18
|
protein = ''
|
19
19
|
p_long = 0
|
20
20
|
|
21
|
-
if
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
ref_orf = ''
|
27
|
-
ref_msgs = 'Sequence length < 200 nt'
|
28
|
-
|
29
|
-
seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
|
30
|
-
# seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
|
21
|
+
if seq.fasta_length < 200
|
22
|
+
seq.type = UNKNOWN
|
23
|
+
seq.test_code(0.0)
|
24
|
+
seq.warnings('<200nt')
|
25
|
+
seq.hit = [ref_start, ref_end, 0] #Last element is ref_frame
|
31
26
|
else
|
32
27
|
|
33
28
|
# para probar tescode con toda la secuencia, en lugar de con los ORFs ----------------------------------------------------------------------
|
@@ -43,11 +38,15 @@ class TestCode
|
|
43
38
|
|
44
39
|
|
45
40
|
# see add_region filter
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame = t_code(seq)
|
42
|
+
seq.test_code(t_code)
|
43
|
+
seq.warnings(ref_msgs)
|
44
|
+
seq.hit = [ref_start, ref_end, ref_frame]
|
45
|
+
|
46
|
+
if status == :unknown
|
47
|
+
seq.type = UNKNOWN
|
49
48
|
else
|
50
|
-
seq.
|
49
|
+
seq.type = CODING
|
51
50
|
end
|
52
51
|
|
53
52
|
# if (ref_msgs.nil?)
|
@@ -157,7 +156,7 @@ class TestCode
|
|
157
156
|
ref_end = 0
|
158
157
|
ref_frame = 0
|
159
158
|
ref_orf = ''
|
160
|
-
ref_type =
|
159
|
+
ref_type = :unknown
|
161
160
|
ref_msgs = 'Non coding ORF found >= 200 nt '
|
162
161
|
return [name, ref_score, ref_type, ref_start, ref_end, ref_frame, ref_orf, ref_msgs, false, false]
|
163
162
|
end
|
@@ -772,7 +771,7 @@ class TestCode
|
|
772
771
|
valueY = ((valueY*1000.0).round/1000.0);
|
773
772
|
|
774
773
|
# return 'The TestCode value is <b>' + valueY.to_s + '</b>, which indicates that the sequence ' + getConclusion(valueY) + '.';
|
775
|
-
return [valueY
|
774
|
+
return [valueY, getConclusion(valueY)]
|
776
775
|
end
|
777
776
|
|
778
777
|
def calcParam (valueOne,valueTwo,valueThree)
|
@@ -4,284 +4,202 @@ include CommonFunctions
|
|
4
4
|
|
5
5
|
class UneLosHit
|
6
6
|
|
7
|
-
attr_reader :output_seq, :final_hit, :msgs, :number_x, :
|
7
|
+
attr_reader :output_seq, :final_hit, :msgs, :number_x, :is_ok, :q_index_start, :full_prot
|
8
8
|
|
9
|
-
def initialize(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
def initialize(full_hit, query_fasta)
|
10
|
+
#puts 'BEG ___________________'
|
11
|
+
#full_hit.map{|hsp| puts hsp.inspect}
|
12
|
+
mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
|
13
|
+
#puts '..................'
|
14
14
|
@mismas_ids_array = mismas_ids_array
|
15
|
-
@msgs =
|
16
|
-
@number_x = 0
|
17
|
-
num_x = ''
|
18
|
-
|
15
|
+
@msgs = []
|
19
16
|
@output_seq = query_fasta
|
20
|
-
|
21
|
-
if (mismas_ids_array.count > 1)
|
22
|
-
mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
|
23
|
-
@final_hit = mismas_ids_array[0].dup
|
24
17
|
|
18
|
+
if mismas_ids_array.count > 1
|
19
|
+
mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
|
20
|
+
@final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
|
21
|
+
#puts @output_seq.length
|
22
|
+
#puts "\e[32m#{@final_hit.inspect}\e[0m"
|
25
23
|
mismas_ids_array.each do |hit|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if (@final_hit.q_frame == hit.q_frame)
|
36
|
-
same_frame_hits(hit)
|
37
|
-
else
|
38
|
-
# condiciones para corregir el frame en el que tiene que continuar la seq de nt
|
39
|
-
correccion_x = 0
|
40
|
-
if (@final_hit.q_frame - hit.q_frame == 1)
|
41
|
-
correccion_x = 1
|
42
|
-
elsif (@final_hit.q_frame - hit.q_frame == 2)
|
43
|
-
correccion_x = 2
|
44
|
-
elsif (@final_hit.q_frame - hit.q_frame == -1)
|
45
|
-
correccion_x = 2
|
46
|
-
elsif (@final_hit.q_frame - hit.q_frame == -2)
|
47
|
-
correccion_x = 1
|
48
|
-
end
|
49
|
-
|
50
|
-
# las secuencias solapan en el query
|
51
|
-
if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
|
52
|
-
overlapped_hits(hit, correccion_x, q)
|
53
|
-
# puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
|
54
|
-
|
55
|
-
elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
|
56
|
-
separated_hits(hit, correccion_x, q)
|
57
|
-
# puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
|
58
|
-
else
|
59
|
-
@msgs = 'warning!, putative chimeric sequence! or repetitive structure'
|
60
|
-
# puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
|
61
|
-
end
|
62
|
-
end
|
24
|
+
#puts '.....', "\e[31m#{hit.inspect}\e[0m"
|
25
|
+
##if @final_hit.q_frame == hit.q_frame #Same frame
|
26
|
+
## puts "\e[33mSame Frame\e[0m"
|
27
|
+
## same_frame_hits_query(hit)
|
28
|
+
if overlapping_hits?(hit) #Diff frame
|
29
|
+
if @msgs.empty?
|
30
|
+
@msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
31
|
+
else
|
32
|
+
@msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
63
33
|
end
|
64
|
-
|
34
|
+
#puts "\e[33mOverlapped hits\e[0m"
|
35
|
+
overlapped_hits_query(hit)
|
36
|
+
elsif separated_hits?(hit) #Diff frame
|
37
|
+
if @msgs.empty?
|
38
|
+
@msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
39
|
+
else
|
40
|
+
@msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
|
41
|
+
end
|
42
|
+
#puts "\e[33mSeparated hits\e[0m"
|
43
|
+
separated_hits(hit) #Diff frame
|
44
|
+
end
|
45
|
+
#puts @output_seq.length
|
46
|
+
#puts '.....'
|
47
|
+
#puts "\e[32m#{@final_hit.inspect}\e[0m"
|
65
48
|
end
|
66
|
-
|
67
49
|
else
|
68
|
-
|
69
|
-
@final_hit = mismas_ids_array[0].dup
|
70
|
-
|
71
|
-
# if ($verbose)
|
72
|
-
# puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
|
73
|
-
# puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
|
74
|
-
# end
|
50
|
+
@final_hit = mismas_ids_array.shift
|
75
51
|
end
|
76
|
-
#
|
77
|
-
#
|
78
|
-
|
79
|
-
@full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
|
80
|
-
# puts "\nfull_prot_ulh: \n#{full_prot}"
|
81
|
-
|
82
|
-
(@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
|
83
|
-
|
52
|
+
#puts 'END ___________________'
|
53
|
+
#puts @final_hit.inspect
|
84
54
|
end
|
85
55
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if (hit.score == @final_hit.score &&
|
92
|
-
hit.q_beg == @final_hit.q_beg &&
|
93
|
-
hit.q_end == @final_hit.q_end &&
|
94
|
-
hit.s_beg == @final_hit.s_beg &&
|
95
|
-
hit.s_end == @final_hit.s_end)
|
96
|
-
|
97
|
-
same = true
|
98
|
-
end
|
99
|
-
|
100
|
-
return same
|
56
|
+
def same_frame_hits_query(hit)
|
57
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
|
58
|
+
@final_hit.q_end = hit.q_end
|
59
|
+
@final_hit.s_end = hit.s_end
|
60
|
+
@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
|
101
61
|
end
|
102
62
|
|
103
63
|
|
64
|
+
def overlapped_hits_query(hit)
|
65
|
+
overlapped_nts = @final_hit.q_end - hit.q_beg + 1
|
66
|
+
add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
|
67
|
+
@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
|
104
68
|
|
105
|
-
|
106
|
-
|
107
|
-
@final_hit.q_beg = @final_hit.q_beg
|
108
|
-
@final_hit.q_end = hit.q_end
|
109
|
-
|
110
|
-
@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
|
111
|
-
@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
|
69
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
|
70
|
+
@final_hit.q_end = hit.q_end + add_nt
|
112
71
|
|
72
|
+
@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
|
73
|
+
@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
|
74
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
75
|
+
@final_hit.q_len = @output_seq.length
|
113
76
|
end
|
114
77
|
|
115
78
|
|
79
|
+
def separated_hits_query(hit)
|
80
|
+
separated_nts = hit.q_beg - @final_hit.q_end + 1
|
81
|
+
add_nt = ajust_nt(separated_nts) # Fix frame-shift
|
116
82
|
|
117
|
-
|
118
|
-
|
119
|
-
# puts "los hits solapan!!!"
|
120
|
-
|
121
|
-
if (@msgs.empty?)
|
122
|
-
@msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
123
|
-
else
|
124
|
-
@msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
125
|
-
end
|
126
|
-
|
127
|
-
# ------------------------------------- preparamos los nt y aas que solapan
|
128
|
-
overlapped_nt = 0
|
129
|
-
overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)
|
130
|
-
|
131
|
-
overlapped_aas = 0
|
132
|
-
overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
|
133
|
-
# -------------------------------------
|
134
|
-
if (overlapped_nt % 3 == 1)
|
135
|
-
overlapped_nt += 2
|
136
|
-
elsif (overlapped_nt % 3 == 2)
|
137
|
-
overlapped_nt += 1
|
138
|
-
end
|
139
|
-
# ------------------------------------- calculamos el numero de x a añadir
|
140
|
-
@number_x = (((correccion_x + overlapped_nt)/3)+1)*3
|
141
|
-
@number_x_aa = overlapped_aas
|
142
|
-
num_x = ''
|
143
|
-
num_x_aa = ''
|
83
|
+
@output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
|
84
|
+
@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
|
144
85
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
@msgs = "ERROR#2 unexpected negative index in x_number, "
|
151
|
-
# puts "ERROR#2 unexpected negative index in x_number"
|
152
|
-
end
|
86
|
+
@final_hit.q_end = hit.q_end + add_nt
|
87
|
+
@final_hit.s_end = hit.s_end
|
88
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
89
|
+
@final_hit.q_len = @output_seq.length
|
90
|
+
end
|
153
91
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
92
|
+
def same_frame_hits(hit)
|
93
|
+
add = (hit.s_beg - @final_hit.s_end) + 1
|
94
|
+
nt_add = add *3
|
95
|
+
@final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
|
96
|
+
@final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
|
97
|
+
@output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add + @output_seq[hit.q_beg+1..@output_seq.length-1]
|
98
|
+
@final_hit.q_end = hit.q_end
|
99
|
+
@final_hit.s_end = hit.s_end
|
100
|
+
@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
|
101
|
+
end
|
162
102
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
103
|
+
def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
|
104
|
+
overlapped_aas = @final_hit.s_end - hit.s_beg + 1
|
105
|
+
overlapped_nts = @final_hit.q_end - hit.q_beg + 1
|
106
|
+
align_len_final_hit = @final_hit.q_seq.length
|
107
|
+
hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
|
108
|
+
hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
|
109
|
+
total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps añaden aa q no existen, x lo q han de descontarse
|
110
|
+
nt_discount = (overlapped_aas ) * 3
|
111
|
+
|
112
|
+
absolute_overlap = 1
|
113
|
+
if nt_discount < 0 #Not overlap on subject
|
114
|
+
overlapped_aas = overlapped_aas.abs
|
115
|
+
absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
|
116
|
+
nt_discount = @final_hit.q_end - hit.q_beg + 1
|
117
|
+
nt_discount += ajust_nt(nt_discount)
|
169
118
|
end
|
170
|
-
|
171
|
-
|
172
|
-
output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"
|
119
|
+
add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
|
120
|
+
@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]
|
173
121
|
|
174
|
-
|
175
|
-
|
176
|
-
|
122
|
+
#q_seq and s_seq are aa sequences
|
123
|
+
final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
|
124
|
+
@final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
|
125
|
+
@final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]
|
177
126
|
|
178
|
-
@output_seq = output_seq_tmp.dup
|
179
|
-
|
180
|
-
@final_hit.q_beg = @final_hit.q_beg
|
181
127
|
@final_hit.q_end = hit.q_end
|
182
128
|
|
183
|
-
@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
|
184
|
-
@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
|
185
|
-
|
129
|
+
@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
|
130
|
+
@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
|
131
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
186
132
|
end
|
187
133
|
|
134
|
+
def separated_hits(hit)
|
135
|
+
number_x = hit.q_beg - @final_hit.q_end - 1
|
136
|
+
number_x += ajust_nt(number_x)
|
188
137
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
if (@msgs.empty?)
|
195
|
-
@msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
196
|
-
else
|
197
|
-
@msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
|
198
|
-
end
|
199
|
-
|
200
|
-
# PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
|
201
|
-
@number_x = (hit.q_beg - @final_hit.q_end - 1)
|
202
|
-
# @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
|
203
|
-
if (@number_x % 3 == 1)
|
204
|
-
@number_x += 2
|
205
|
-
elsif (@number_x % 3 == 2)
|
206
|
-
@number_x += 1
|
207
|
-
end
|
208
|
-
|
209
|
-
|
210
|
-
if (@number_x.to_i > 0)
|
211
|
-
num_x = 'x'*@number_x.to_i
|
212
|
-
num_x_aa = 'x'*(@number_x.to_i/3)
|
213
|
-
elsif (@number_x.to_i == 0)
|
214
|
-
num_x = ''
|
215
|
-
num_x_aa = ''
|
216
|
-
else
|
217
|
-
@msgs = "ERROR#2 unexpected negative index in x_number"
|
218
|
-
# puts "ERROR#2 unexpected negative index in x_number"
|
138
|
+
num_x = ''
|
139
|
+
num_x_aa = ''
|
140
|
+
if number_x > 0
|
141
|
+
num_x = 'n'*number_x
|
142
|
+
num_x_aa = 'x'*(number_x/3)
|
219
143
|
end
|
144
|
+
@output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
|
145
|
+
@final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
|
146
|
+
@final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]
|
220
147
|
|
221
|
-
@output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
|
222
|
-
# @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
|
223
|
-
|
224
|
-
@final_hit.score += 1
|
225
|
-
@final_hit.q_beg = @final_hit.q_beg
|
226
148
|
@final_hit.q_end = hit.q_end
|
227
|
-
@final_hit.s_beg = @final_hit.s_beg
|
228
149
|
@final_hit.s_end = hit.s_end
|
229
|
-
|
230
|
-
@final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
|
231
|
-
@final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"
|
232
|
-
|
233
|
-
num_x = ''
|
234
|
-
num_x_aa = ''
|
235
|
-
|
236
|
-
full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
|
237
|
-
# puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
|
238
|
-
# puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
|
239
|
-
|
240
|
-
(is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
|
241
|
-
|
242
|
-
# puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
|
150
|
+
@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
|
243
151
|
end
|
244
152
|
|
245
153
|
|
246
154
|
|
247
155
|
# creamos un array en el que esten solo los hits con la misma id.
|
248
|
-
def hits_misma_id(
|
249
|
-
|
250
|
-
# Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
|
251
|
-
wrong_seq = false
|
252
|
-
|
156
|
+
def hits_misma_id(full_hit, query_fasta_ori)
|
157
|
+
# Se les hace la reverso complementaria si es necesario
|
253
158
|
misma_id = []
|
254
|
-
|
255
159
|
query_fasta = query_fasta_ori.dup
|
256
|
-
frame_ori = q.hits
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
|
266
|
-
wrong_seq = true
|
267
|
-
reversed_hit = true
|
268
|
-
# puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
|
269
|
-
end
|
270
|
-
|
271
|
-
if (reversed_hit == false)
|
272
|
-
if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
|
273
|
-
(query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
|
274
|
-
h.reversed = true
|
160
|
+
#frame_ori = q.hits.first.q_frame
|
161
|
+
|
162
|
+
full_hit.each do |h|
|
163
|
+
if h.acc == full_hit.first.acc
|
164
|
+
# comprobar si los frames tienen el mismo sentido
|
165
|
+
#if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
|
166
|
+
if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
|
167
|
+
query_fasta = reverse_seq(query_fasta_ori, h)
|
168
|
+
h.reversed = TRUE
|
275
169
|
end
|
276
|
-
misma_id
|
277
|
-
end
|
278
|
-
|
170
|
+
misma_id << h
|
171
|
+
#end
|
279
172
|
end
|
173
|
+
end
|
174
|
+
|
175
|
+
return misma_id, query_fasta
|
176
|
+
end
|
280
177
|
|
178
|
+
def overlapping_hits?(hit)
|
179
|
+
overlap = FALSE
|
180
|
+
if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
|
181
|
+
overlap = TRUE
|
281
182
|
end
|
183
|
+
return overlap
|
184
|
+
end
|
282
185
|
|
283
|
-
|
186
|
+
def separated_hits?(hit)
|
187
|
+
separated=FALSE
|
188
|
+
if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
|
189
|
+
separated = TRUE
|
190
|
+
end
|
191
|
+
return separated
|
284
192
|
end
|
285
193
|
|
286
194
|
|
195
|
+
|
196
|
+
def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)
|
197
|
+
add=0
|
198
|
+
if nt % 3 == 1
|
199
|
+
add = 2
|
200
|
+
elsif nt % 3 == 2
|
201
|
+
add = 1
|
202
|
+
end
|
203
|
+
return add
|
204
|
+
end
|
287
205
|
end
|