full_lengther_next 0.0.8 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.txt +2 -2
- data/Manifest.txt +33 -18
- data/Rakefile +4 -2
- data/bin/download_fln_dbs.rb +310 -158
- data/bin/full_lengther_next +160 -103
- data/bin/make_test_dataset.rb +236 -0
- data/bin/make_user_db.rb +101 -117
- data/bin/plot_fln.rb +270 -0
- data/bin/plot_taxonomy.rb +70 -0
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next.rb +3 -3
- data/lib/full_lengther_next/classes/artifacts.rb +66 -0
- data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
- data/lib/full_lengther_next/classes/cdhit.rb +154 -0
- data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
- data/lib/full_lengther_next/classes/common_functions.rb +105 -63
- data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
- data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
- data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
- data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
- data/lib/full_lengther_next/classes/handle_db.rb +30 -0
- data/lib/full_lengther_next/classes/my_worker.rb +308 -138
- data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
- data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
- data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
- data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
- data/lib/full_lengther_next/classes/reptrans.rb +210 -0
- data/lib/full_lengther_next/classes/sequence.rb +439 -80
- data/lib/full_lengther_next/classes/test_code.rb +15 -16
- data/lib/full_lengther_next/classes/types.rb +12 -0
- data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
- data/lib/full_lengther_next/classes/warnings.rb +40 -0
- metadata +207 -93
- data/lib/full_lengther_next/classes/lcs.rb +0 -33
- data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'scbi_fasta'
|
2
|
+
|
3
|
+
class Seq
|
4
|
+
attr_accessor :name, :comments, :seq_fasta, :db, :master
|
5
|
+
def initialize(name, comments, seq_fasta, master) #master = more representative sequence on a cluster
|
6
|
+
@name = name
|
7
|
+
@comments = comments
|
8
|
+
@seq_fasta = seq_fasta
|
9
|
+
@db= parse_db(name, comments)
|
10
|
+
@master = master
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_db(name, comments)
|
14
|
+
db=nil
|
15
|
+
if name =~ /^[sp]/ || comments =~ /^[sp]/
|
16
|
+
db='sp'
|
17
|
+
elsif comments =~ /^[tr]/
|
18
|
+
db='tr'
|
19
|
+
end
|
20
|
+
return db
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
return ">#{@name} #{@comments}\n#{@seq_fasta}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Cdhit
|
29
|
+
attr_accessor :clusters, :sequence_hash_fasta
|
30
|
+
|
31
|
+
NAME=0
|
32
|
+
COMMENTS=1
|
33
|
+
SEQ_FASTA=2
|
34
|
+
|
35
|
+
def initialize(fasta_file, clust_file)
|
36
|
+
@clusters = []
|
37
|
+
@sequence_hash_fasta=hash_fasta(fasta_file)
|
38
|
+
cd_hit_clusters(clust_file)
|
39
|
+
end
|
40
|
+
|
41
|
+
def each_cluster
|
42
|
+
@clusters.each do |cluster|
|
43
|
+
yield cluster
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def master_fasta(file_name)
|
48
|
+
fasta=File.open(file_name,'w')
|
49
|
+
each_cluster{|cluster|
|
50
|
+
master=get_master(cluster)
|
51
|
+
fasta.print '>'+master.name+' '+master.comments+"\n"+master.seq_fasta+"\n"
|
52
|
+
}
|
53
|
+
fasta.close
|
54
|
+
end
|
55
|
+
|
56
|
+
def master_to_sp_seq
|
57
|
+
each_cluster{|cluster|
|
58
|
+
master_seq = get_master(cluster)
|
59
|
+
if master_seq.db != 'sp'
|
60
|
+
sp_seq=get_sp(cluster)
|
61
|
+
if !sp_seq.nil?
|
62
|
+
cluster.map{|seq| seq.master=FALSE}
|
63
|
+
sp_seq.master=TRUE
|
64
|
+
end
|
65
|
+
end
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
def recover_different_lengths(percentage)
|
71
|
+
seqs = []
|
72
|
+
each_cluster{|cluster|
|
73
|
+
master = get_master(cluster)
|
74
|
+
cluster.each do |seq|
|
75
|
+
if seq.name == master.name
|
76
|
+
next
|
77
|
+
else
|
78
|
+
seq_mas_len = seq.seq_fasta.length/master.seq_fasta.length*100
|
79
|
+
mas_seq_len = master.seq_fasta.length/seq.seq_fasta.length*100
|
80
|
+
seqs << seq if mas_seq_len < percentage && seq_mas_len < percentage
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
return seqs
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_master(cluster)
|
88
|
+
master= cluster.select{|seq| seq.master}.first
|
89
|
+
return master
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_all_master
|
93
|
+
master = []
|
94
|
+
each_cluster{|cluster|
|
95
|
+
master << get_master(cluster)
|
96
|
+
}
|
97
|
+
return master
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_sp(cluster)
|
101
|
+
master=cluster.select{|seq| seq.db == 'sp'}
|
102
|
+
if !master.empty?
|
103
|
+
master=master.first
|
104
|
+
else
|
105
|
+
master=nil
|
106
|
+
end
|
107
|
+
return master
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def cd_hit_clusters(clust_file)
|
112
|
+
require 'bio-cd-hit-report'
|
113
|
+
report = Bio::CdHitReport.new(clust_file)
|
114
|
+
report.each_cluster do |cluster|
|
115
|
+
clust=[]
|
116
|
+
cluster.data.each do |member|
|
117
|
+
name, master = parse_member(member)
|
118
|
+
hash_seq = @sequence_hash_fasta[name]
|
119
|
+
sequence = Seq.new(hash_seq[NAME], hash_seq[COMMENTS], hash_seq[SEQ_FASTA], master)
|
120
|
+
clust << sequence
|
121
|
+
end
|
122
|
+
@clusters << clust
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_member(member)
|
127
|
+
member.gsub!('...','')
|
128
|
+
member.gsub!('>','')
|
129
|
+
fields = member.split(',')
|
130
|
+
data = fields[1].split(' ',2)
|
131
|
+
master = FALSE
|
132
|
+
if data[1] == '*'
|
133
|
+
master = TRUE
|
134
|
+
end
|
135
|
+
return data[0],master
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def hash_fasta(file)
|
140
|
+
sequence_hash_fasta={}
|
141
|
+
fqr=FastaQualFile.new(file)
|
142
|
+
fqr.each do |name,seq_fasta,comments|
|
143
|
+
sequence_hash_fasta[name[0..18]]=[name, comments, seq_fasta] #Cd-hit cuts sequence's name to 20 character (even > character) so we use 'name[0..18]' like key hash
|
144
|
+
end
|
145
|
+
fqr.close
|
146
|
+
return sequence_hash_fasta
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
#Example
|
151
|
+
#cdhit=Cdhit.new('all_rodents.fasta','rodents_cln.clstr')
|
152
|
+
#cdhit.master_to_sp_seq
|
153
|
+
#cdhit.master_fasta('all_rodents_red')
|
154
|
+
|
@@ -1,78 +1,336 @@
|
|
1
|
-
|
2
1
|
require 'scbi_blast'
|
2
|
+
require 'fl_analysis' #Allow call 'analiza_orf_y_fl'
|
3
|
+
require 'common_functions'
|
3
4
|
|
4
5
|
module ChimericSeqs
|
5
|
-
|
6
|
-
|
6
|
+
BEG = 0
|
7
|
+
STOP = 1
|
8
|
+
HIT = 2
|
9
|
+
def search_chimeras(seq, blast_query, options, db_name, db_path)
|
7
10
|
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
# DETECTION
|
12
|
+
#----------------------
|
13
|
+
homology_zones = []
|
14
|
+
cut_positions = []
|
15
|
+
if blast_query.hits.length > 1
|
16
|
+
homology_zones = define_homology_zones(blast_query, options, seq.seq_fasta)
|
17
|
+
cut_positions = set_cut_positions(homology_zones) if homology_zones.length > 1
|
12
18
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
q.hits.each do |hit|
|
21
|
-
# puts "---------#{hit.acc}"
|
22
|
-
# if overlaps or is contained in the ref hit
|
23
|
-
if ((ref_hit_beg <= hit.q_beg) && (ref_hit_end > hit.q_beg)) || ((hit.q_beg <= ref_hit_beg) && (hit.q_end > ref_hit_beg))
|
24
|
-
# puts "hits overlapping: ref_hit #{ref_hit_beg}-#{ref_hit_end}, current hit #{hit.q_beg}-#{hit.q_end}"
|
25
|
-
ref_hit_beg = [ref_hit_beg,hit.q_beg].min
|
26
|
-
ref_hit_end = [ref_hit_end,hit.q_end].max
|
27
|
-
# puts "modified ref_hit #{ref_hit_beg}-#{ref_hit_end}"
|
28
|
-
end
|
19
|
+
# CONFIRMATION
|
20
|
+
#----------------------
|
21
|
+
num_homology_zones = homology_zones.length
|
22
|
+
if num_homology_zones > 1 && options[:chimera].include?('r')
|
23
|
+
confirm_chimeras(homology_zones, db_path, options[:ident_thresold]) # Check if prots are differents or not
|
24
|
+
num_homology_zones = homology_zones.length
|
29
25
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
26
|
+
|
27
|
+
# SPLICING
|
28
|
+
#--------------------
|
29
|
+
new_seqs=[]
|
30
|
+
if num_homology_zones > 1 #In this case the sequence is a chimera
|
31
|
+
seq.format_chimera!
|
32
|
+
homology_zones.each_with_index do |hom_zone, i|
|
33
|
+
seq.hit << hom_zone[HIT].first.dup #Save hit before modified it for write output purposes
|
34
|
+
hit_limits = get_limits(hom_zone[HIT])# Take beginning and end of hit on query, hit can be composed by unsorted or antisense hsps
|
35
|
+
if options[:chimera].include?('c') && hit_limits[STOP]-hit_limits[BEG]> options[:min_nucleotides]
|
36
|
+
new_seqs << fragment_chimera(blast_query, seq, hom_zone[HIT], i, hit_limits, num_homology_zones, options, db_name, cut_positions[i])
|
37
|
+
seq.warnings('SOLVED')
|
41
38
|
end
|
42
|
-
|
43
|
-
|
39
|
+
end
|
40
|
+
else
|
41
|
+
new_seqs = nil #Sequence isn't chimera
|
42
|
+
end
|
43
|
+
return new_seqs
|
44
|
+
end
|
45
|
+
|
46
|
+
def set_cut_positions(homology_zones)
|
47
|
+
cut_positions = []
|
48
|
+
last_cut = -1
|
49
|
+
homology_zones.each_with_index do |hom_zone, i|
|
50
|
+
if i > 0
|
51
|
+
positions = []
|
52
|
+
positions << last_cut + 1 # Start of fragment
|
53
|
+
cut_position = homology_zones[i-1][STOP] + (hom_zone[BEG] - homology_zones[i-1][STOP])/2
|
54
|
+
positions << cut_position # End of fragment
|
55
|
+
last_cut = cut_position
|
56
|
+
cut_positions << positions
|
44
57
|
end
|
45
58
|
end
|
59
|
+
cut_positions << [last_cut, homology_zones.last[HIT].first.q_len-1]
|
60
|
+
return cut_positions
|
61
|
+
end
|
62
|
+
|
63
|
+
def confirm_chimeras(homology_zones, db_path, ident_thresold)
|
64
|
+
acc_hit = homology_zones.map{|zone| zone[HIT].first.acc}
|
65
|
+
seq_fasta = %x[blastdbcmd -db #{db_path} -entry #{acc_hit.join(',')}]
|
66
|
+
seq_fasta << ">remove\nALGO\n" #Needed for clustal-omega display the dist-matrix, requires unless 3 sequences to do it
|
67
|
+
|
68
|
+
clustal_matrix = do_clustal(seq_fasta)
|
69
|
+
clustal_matrix.shift #Remove header
|
70
|
+
clustal_matrix.pop #Remove false sequence
|
46
71
|
|
72
|
+
clustal_hits = []
|
73
|
+
distances = []
|
74
|
+
clustal_matrix.each do |line|
|
75
|
+
fields = line.split
|
76
|
+
fields.pop #Remove data belong to false sequence
|
77
|
+
fields.shift #Remove prot name
|
78
|
+
distances << fields.map! {|field| field.to_f}
|
79
|
+
end
|
80
|
+
delete_positions = search_ident_prots(homology_zones, distances, ident_thresold)
|
81
|
+
delete_zones(delete_positions, homology_zones)
|
47
82
|
end
|
48
83
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
84
|
+
|
85
|
+
def search_ident_prots(homology_zones, distances, ident_thresold)
|
86
|
+
delete_positions = []
|
87
|
+
n_homology_zones = homology_zones.length
|
88
|
+
n_homology_zones.times do |j|
|
89
|
+
n_homology_zones.times do |i|
|
90
|
+
next if i == j
|
91
|
+
if distances[j][i] >= ident_thresold
|
92
|
+
delete_positions << j
|
93
|
+
delete_positions << i
|
94
|
+
end
|
95
|
+
end
|
55
96
|
end
|
97
|
+
delete_positions.uniq!
|
98
|
+
return delete_positions
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions)
|
103
|
+
# Prepare new seq and query
|
104
|
+
#----------------------------
|
105
|
+
query_bak = query.dup
|
106
|
+
query_bak.hits = hit # Here, hit is an array of hsps
|
107
|
+
query_bak.query_def += "_split_#{hit_position}"
|
108
|
+
seq_bak = seq.dup
|
109
|
+
seq_bak.reset_classification
|
110
|
+
seq_bak.clean_warnings
|
111
|
+
seq_bak.seq_name += "_split_#{hit_position}"
|
112
|
+
seq_bak.clean_orfs
|
113
|
+
seq_bak.save_fasta = TRUE
|
114
|
+
seq_bak.ignore = FALSE
|
115
|
+
|
116
|
+
# Cut sequence and move hit/hsps limits
|
117
|
+
#----------------------------------------
|
118
|
+
if hit_position == 0 #First zone
|
119
|
+
limit = 0
|
120
|
+
if hit.first.q_frame < 0 #Hit reversed
|
121
|
+
hit.first.q_frame = -1
|
122
|
+
end
|
123
|
+
else #Middle & last zone
|
124
|
+
limit = cut_positions[BEG]#hit_limits[BEG]
|
125
|
+
hit_move_limits(hit, -limit, 0) #Redefine hit limits on new sequence after cut
|
126
|
+
if hit.first.q_frame >= 0
|
127
|
+
hit.first.q_frame=1
|
128
|
+
elsif hit_position < num_homology_zones-1 #Last zone keeps his original frame because it's composed by the hit and the terminal sequence (Here hit is reversed).
|
129
|
+
hit.first.q_frame=-1
|
130
|
+
end
|
131
|
+
end
|
132
|
+
if hit_position == num_homology_zones-1 #Last zone
|
133
|
+
seq_bak.seq_fasta = seq.seq_fasta[cut_positions[BEG]..seq.fasta_length-1]#[hit_limits[BEG]..seq.fasta_length-1]
|
134
|
+
else # Beginning & Middle zone
|
135
|
+
seq_bak.seq_fasta = seq.seq_fasta[limit..cut_positions[STOP]]#[limit..hit_limits[STOP]]
|
136
|
+
end
|
137
|
+
seq_length = seq_bak.seq_fasta.length
|
138
|
+
query_bak.full_query_length = seq_length
|
139
|
+
seq_bak.fasta_length = seq_length
|
140
|
+
hit_set_q_len(hit, seq_length)
|
141
|
+
|
142
|
+
|
143
|
+
# Full length analisys of fragment
|
144
|
+
#----------------------------------------
|
145
|
+
analiza_orf_y_fl(seq_bak, query_bak.hits, options, db_name)
|
146
|
+
|
147
|
+
return seq_bak
|
148
|
+
end
|
149
|
+
|
150
|
+
def define_homology_zones(query, options, query_fasta)
|
151
|
+
# Define hit limits
|
152
|
+
#---------------------
|
153
|
+
hits = cluster_query_hits(query) #Hsp packages
|
154
|
+
hits_limits = define_hit_limits(hits)
|
155
|
+
|
156
|
+
# Define homology zones
|
157
|
+
#------------------------
|
158
|
+
#First homology zone
|
159
|
+
zones = [[hits_limits.first[BEG], hits_limits.first[STOP], hits.first]]
|
160
|
+
ref_hit_beg = hits_limits.first[BEG]
|
161
|
+
ref_hit_end = hits_limits.first[STOP]
|
56
162
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
163
|
+
#Other homology zone
|
164
|
+
hits_limits.each_with_index do |hit, i|
|
165
|
+
coincidences = 0
|
166
|
+
zones.each do |zone|
|
167
|
+
if hit_is_in?(zone[BEG], zone[STOP], hit) # Extender zona de homologia si coinciden en zona
|
168
|
+
zone[BEG] = [zone[BEG],hit[BEG]].min
|
169
|
+
zone[STOP] = [zone[STOP],hit[STOP]].max
|
170
|
+
coincidences+=1
|
171
|
+
end
|
172
|
+
end
|
173
|
+
if coincidences == 0
|
174
|
+
zones << [hit[BEG], hit[STOP], hits[i]]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
zones.sort!{|e1,e2| e1[BEG] <=> e2[BEG]}
|
178
|
+
|
179
|
+
# Delete overlapping homology zones
|
180
|
+
#------------------------------------
|
181
|
+
overlapping_zones = overlapping_zones(zones)
|
182
|
+
delete_zones(overlapping_zones, zones)
|
183
|
+
|
184
|
+
return zones
|
185
|
+
end
|
186
|
+
|
187
|
+
def define_hit_limits(hits)
|
188
|
+
limits=[]
|
189
|
+
hits.each do |hit|
|
190
|
+
limits << get_limits(hit)
|
191
|
+
end
|
192
|
+
return limits
|
193
|
+
end
|
194
|
+
|
195
|
+
def get_limits(hit)
|
196
|
+
coordenates=[]
|
197
|
+
hit.map{|h| coordenates << h.q_beg; coordenates << h.q_end}
|
198
|
+
# BEG END
|
199
|
+
limits=[coordenates.min, coordenates.max]
|
200
|
+
return limits
|
201
|
+
end
|
202
|
+
|
203
|
+
def get_limits_s(hit)
|
204
|
+
coordenates=[]
|
205
|
+
hit.map{|h| coordenates << h.s_beg; coordenates << h.s_end}
|
206
|
+
# BEG END
|
207
|
+
limits=[coordenates.min, coordenates.max]
|
208
|
+
return limits
|
209
|
+
end
|
210
|
+
|
211
|
+
def cluster_query_hits(query)
|
212
|
+
hits = []
|
213
|
+
acc_hit = []
|
214
|
+
query.hits.each do |hit|
|
215
|
+
ind = acc_hit.index(hit.acc)
|
216
|
+
if ind.nil?
|
217
|
+
acc_hit << hit.acc
|
218
|
+
hits << [hit]
|
63
219
|
else
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
220
|
+
hits[ind] << hit
|
221
|
+
end
|
222
|
+
end
|
223
|
+
return hits
|
224
|
+
end
|
225
|
+
|
226
|
+
def delete_zones(overlapping_zones, zones)
|
227
|
+
overlapping_zones.each do |zone|
|
228
|
+
zones.delete_at(zone)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def overlapping_zones(zones)
|
233
|
+
delete_zones=[]
|
234
|
+
zones.each_with_index do |zone, i|
|
235
|
+
if i>0
|
236
|
+
if zone[BEG]< zones[i-1][STOP]
|
237
|
+
delete_zones << i
|
238
|
+
delete_zones << i-1
|
70
239
|
end
|
71
240
|
end
|
72
|
-
reverse_counter -= 1
|
73
241
|
end
|
74
|
-
|
75
|
-
return
|
242
|
+
delete_zones.uniq!
|
243
|
+
return delete_zones
|
244
|
+
end
|
245
|
+
|
246
|
+
def hit_is_in?(h_beg, h_end, hit)
|
247
|
+
is=FALSE
|
248
|
+
# CONTIENE #OVERLAP
|
249
|
+
if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
|
250
|
+
is=TRUE
|
251
|
+
end
|
252
|
+
return is
|
253
|
+
end
|
254
|
+
|
255
|
+
def get_hits(query, ref_hit)
|
256
|
+
all_hits=[]
|
257
|
+
query.hits.each do |hit|
|
258
|
+
if hit.acc == ref_hit.acc
|
259
|
+
all_hits << hit
|
260
|
+
end
|
261
|
+
end
|
262
|
+
return all_hits
|
263
|
+
end
|
264
|
+
|
265
|
+
|
266
|
+
def min_distance_between_homology_zones(homology_zones)
|
267
|
+
distance=nil
|
268
|
+
homology_zones.each_with_index do |zone,i|
|
269
|
+
if i > 0
|
270
|
+
local_distance=homology_zones[i][BEG] - homology_zones[i-1][STOP]
|
271
|
+
if distance.nil? || distance > local_distance
|
272
|
+
distance=local_distance
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
return distance
|
277
|
+
end
|
278
|
+
|
279
|
+
def duplicate_hits(query)
|
280
|
+
dup_hits=[]
|
281
|
+
query.hits.each do |hit|
|
282
|
+
dup_hits << hit.dup
|
283
|
+
end
|
284
|
+
return dup_hits
|
285
|
+
end
|
286
|
+
|
287
|
+
def set_limits(hit, q_beg, q_end, s_beg, s_end)
|
288
|
+
hit.q_beg = q_beg
|
289
|
+
hit.q_end = q_end
|
290
|
+
hit.s_beg = s_beg
|
291
|
+
hit.s_end = s_end
|
292
|
+
end
|
293
|
+
|
294
|
+
def move_limits(hit, q_add, s_add)
|
295
|
+
hit.q_beg+=q_add
|
296
|
+
hit.q_end+=q_add
|
297
|
+
hit.s_beg+=s_add
|
298
|
+
hit.s_end+=s_add
|
299
|
+
if hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? #There is frameshift
|
300
|
+
hit.q_frameshift.map!{|fs|
|
301
|
+
[fs.first + q_add, fs.last]
|
302
|
+
}
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def hit_move_limits(hit, q_add, s_add)
|
307
|
+
if hit.class.to_s == 'Array'
|
308
|
+
hit.each do |hsp|
|
309
|
+
move_limits(hsp, q_add, s_add)
|
310
|
+
end
|
311
|
+
elsif hit.class.to_s == 'Hit'
|
312
|
+
#puts "\e[35m#{hit.acc}\t#{hit.q_beg}\t#{hit.q_end}\t#{hit.s_beg}\t#{hit.s_end}\t#{hit.reversed}\e[0m"
|
313
|
+
move_limits(hit, q_add, s_add)
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
def hit_set_q_len(hit, q_len)
|
318
|
+
hit.each do |hsp|
|
319
|
+
hsp.q_len=q_len
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def do_clustal(seq_fasta)
|
324
|
+
cmd='clustalo -i - -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
|
325
|
+
clustal_matrix = nil
|
326
|
+
IO.popen(cmd,'w+') {|clustal|
|
327
|
+
clustal.sync = TRUE
|
328
|
+
clustal.write(seq_fasta)
|
329
|
+
clustal.close_write
|
330
|
+
clustal_matrix = clustal.readlines
|
331
|
+
clustal.close_read
|
332
|
+
}
|
333
|
+
return clustal_matrix
|
76
334
|
end
|
77
335
|
|
78
336
|
end
|