full_lengther_next 0.0.8 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/History.txt +2 -2
- data/Manifest.txt +33 -18
- data/Rakefile +4 -2
- data/bin/download_fln_dbs.rb +310 -158
- data/bin/full_lengther_next +160 -103
- data/bin/make_test_dataset.rb +236 -0
- data/bin/make_user_db.rb +101 -117
- data/bin/plot_fln.rb +270 -0
- data/bin/plot_taxonomy.rb +70 -0
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next.rb +3 -3
- data/lib/full_lengther_next/classes/artifacts.rb +66 -0
- data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
- data/lib/full_lengther_next/classes/cdhit.rb +154 -0
- data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
- data/lib/full_lengther_next/classes/common_functions.rb +105 -63
- data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
- data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
- data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
- data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
- data/lib/full_lengther_next/classes/handle_db.rb +30 -0
- data/lib/full_lengther_next/classes/my_worker.rb +308 -138
- data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
- data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
- data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
- data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
- data/lib/full_lengther_next/classes/reptrans.rb +210 -0
- data/lib/full_lengther_next/classes/sequence.rb +439 -80
- data/lib/full_lengther_next/classes/test_code.rb +15 -16
- data/lib/full_lengther_next/classes/types.rb +12 -0
- data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
- data/lib/full_lengther_next/classes/warnings.rb +40 -0
- metadata +207 -93
- data/lib/full_lengther_next/classes/lcs.rb +0 -33
- data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'scbi_fasta'
|
2
|
+
|
3
|
+
class Seq
|
4
|
+
attr_accessor :name, :comments, :seq_fasta, :db, :master
|
5
|
+
def initialize(name, comments, seq_fasta, master) #master = more representative sequence on a cluster
|
6
|
+
@name = name
|
7
|
+
@comments = comments
|
8
|
+
@seq_fasta = seq_fasta
|
9
|
+
@db= parse_db(name, comments)
|
10
|
+
@master = master
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_db(name, comments)
|
14
|
+
db=nil
|
15
|
+
if name =~ /^[sp]/ || comments =~ /^[sp]/
|
16
|
+
db='sp'
|
17
|
+
elsif comments =~ /^[tr]/
|
18
|
+
db='tr'
|
19
|
+
end
|
20
|
+
return db
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
return ">#{@name} #{@comments}\n#{@seq_fasta}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Cdhit
|
29
|
+
attr_accessor :clusters, :sequence_hash_fasta
|
30
|
+
|
31
|
+
NAME=0
|
32
|
+
COMMENTS=1
|
33
|
+
SEQ_FASTA=2
|
34
|
+
|
35
|
+
def initialize(fasta_file, clust_file)
|
36
|
+
@clusters = []
|
37
|
+
@sequence_hash_fasta=hash_fasta(fasta_file)
|
38
|
+
cd_hit_clusters(clust_file)
|
39
|
+
end
|
40
|
+
|
41
|
+
def each_cluster
|
42
|
+
@clusters.each do |cluster|
|
43
|
+
yield cluster
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def master_fasta(file_name)
|
48
|
+
fasta=File.open(file_name,'w')
|
49
|
+
each_cluster{|cluster|
|
50
|
+
master=get_master(cluster)
|
51
|
+
fasta.print '>'+master.name+' '+master.comments+"\n"+master.seq_fasta+"\n"
|
52
|
+
}
|
53
|
+
fasta.close
|
54
|
+
end
|
55
|
+
|
56
|
+
def master_to_sp_seq
|
57
|
+
each_cluster{|cluster|
|
58
|
+
master_seq = get_master(cluster)
|
59
|
+
if master_seq.db != 'sp'
|
60
|
+
sp_seq=get_sp(cluster)
|
61
|
+
if !sp_seq.nil?
|
62
|
+
cluster.map{|seq| seq.master=FALSE}
|
63
|
+
sp_seq.master=TRUE
|
64
|
+
end
|
65
|
+
end
|
66
|
+
}
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
def recover_different_lengths(percentage)
|
71
|
+
seqs = []
|
72
|
+
each_cluster{|cluster|
|
73
|
+
master = get_master(cluster)
|
74
|
+
cluster.each do |seq|
|
75
|
+
if seq.name == master.name
|
76
|
+
next
|
77
|
+
else
|
78
|
+
seq_mas_len = seq.seq_fasta.length/master.seq_fasta.length*100
|
79
|
+
mas_seq_len = master.seq_fasta.length/seq.seq_fasta.length*100
|
80
|
+
seqs << seq if mas_seq_len < percentage && seq_mas_len < percentage
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
return seqs
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_master(cluster)
|
88
|
+
master= cluster.select{|seq| seq.master}.first
|
89
|
+
return master
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_all_master
|
93
|
+
master = []
|
94
|
+
each_cluster{|cluster|
|
95
|
+
master << get_master(cluster)
|
96
|
+
}
|
97
|
+
return master
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_sp(cluster)
|
101
|
+
master=cluster.select{|seq| seq.db == 'sp'}
|
102
|
+
if !master.empty?
|
103
|
+
master=master.first
|
104
|
+
else
|
105
|
+
master=nil
|
106
|
+
end
|
107
|
+
return master
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def cd_hit_clusters(clust_file)
|
112
|
+
require 'bio-cd-hit-report'
|
113
|
+
report = Bio::CdHitReport.new(clust_file)
|
114
|
+
report.each_cluster do |cluster|
|
115
|
+
clust=[]
|
116
|
+
cluster.data.each do |member|
|
117
|
+
name, master = parse_member(member)
|
118
|
+
hash_seq = @sequence_hash_fasta[name]
|
119
|
+
sequence = Seq.new(hash_seq[NAME], hash_seq[COMMENTS], hash_seq[SEQ_FASTA], master)
|
120
|
+
clust << sequence
|
121
|
+
end
|
122
|
+
@clusters << clust
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_member(member)
|
127
|
+
member.gsub!('...','')
|
128
|
+
member.gsub!('>','')
|
129
|
+
fields = member.split(',')
|
130
|
+
data = fields[1].split(' ',2)
|
131
|
+
master = FALSE
|
132
|
+
if data[1] == '*'
|
133
|
+
master = TRUE
|
134
|
+
end
|
135
|
+
return data[0],master
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def hash_fasta(file)
|
140
|
+
sequence_hash_fasta={}
|
141
|
+
fqr=FastaQualFile.new(file)
|
142
|
+
fqr.each do |name,seq_fasta,comments|
|
143
|
+
sequence_hash_fasta[name[0..18]]=[name, comments, seq_fasta] #Cd-hit cuts sequence's name to 20 character (even > character) so we use 'name[0..18]' like key hash
|
144
|
+
end
|
145
|
+
fqr.close
|
146
|
+
return sequence_hash_fasta
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
#Example
|
151
|
+
#cdhit=Cdhit.new('all_rodents.fasta','rodents_cln.clstr')
|
152
|
+
#cdhit.master_to_sp_seq
|
153
|
+
#cdhit.master_fasta('all_rodents_red')
|
154
|
+
|
@@ -1,78 +1,336 @@
|
|
1
|
-
|
2
1
|
require 'scbi_blast'
|
2
|
+
require 'fl_analysis' #Allow call 'analiza_orf_y_fl'
|
3
|
+
require 'common_functions'
|
3
4
|
|
4
5
|
module ChimericSeqs
|
5
|
-
|
6
|
-
|
6
|
+
BEG = 0
|
7
|
+
STOP = 1
|
8
|
+
HIT = 2
|
9
|
+
def search_chimeras(seq, blast_query, options, db_name, db_path)
|
7
10
|
|
8
|
-
#
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
# DETECTION
|
12
|
+
#----------------------
|
13
|
+
homology_zones = []
|
14
|
+
cut_positions = []
|
15
|
+
if blast_query.hits.length > 1
|
16
|
+
homology_zones = define_homology_zones(blast_query, options, seq.seq_fasta)
|
17
|
+
cut_positions = set_cut_positions(homology_zones) if homology_zones.length > 1
|
12
18
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
q.hits.each do |hit|
|
21
|
-
# puts "---------#{hit.acc}"
|
22
|
-
# if overlaps or is contained in the ref hit
|
23
|
-
if ((ref_hit_beg <= hit.q_beg) && (ref_hit_end > hit.q_beg)) || ((hit.q_beg <= ref_hit_beg) && (hit.q_end > ref_hit_beg))
|
24
|
-
# puts "hits overlapping: ref_hit #{ref_hit_beg}-#{ref_hit_end}, current hit #{hit.q_beg}-#{hit.q_end}"
|
25
|
-
ref_hit_beg = [ref_hit_beg,hit.q_beg].min
|
26
|
-
ref_hit_end = [ref_hit_end,hit.q_end].max
|
27
|
-
# puts "modified ref_hit #{ref_hit_beg}-#{ref_hit_end}"
|
28
|
-
end
|
19
|
+
# CONFIRMATION
|
20
|
+
#----------------------
|
21
|
+
num_homology_zones = homology_zones.length
|
22
|
+
if num_homology_zones > 1 && options[:chimera].include?('r')
|
23
|
+
confirm_chimeras(homology_zones, db_path, options[:ident_thresold]) # Check if prots are differents or not
|
24
|
+
num_homology_zones = homology_zones.length
|
29
25
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
26
|
+
|
27
|
+
# SPLICING
|
28
|
+
#--------------------
|
29
|
+
new_seqs=[]
|
30
|
+
if num_homology_zones > 1 #In this case the sequence is a chimera
|
31
|
+
seq.format_chimera!
|
32
|
+
homology_zones.each_with_index do |hom_zone, i|
|
33
|
+
seq.hit << hom_zone[HIT].first.dup #Save hit before modified it for write output purposes
|
34
|
+
hit_limits = get_limits(hom_zone[HIT])# Take beginning and end of hit on query, hit can be composed by unsorted or antisense hsps
|
35
|
+
if options[:chimera].include?('c') && hit_limits[STOP]-hit_limits[BEG]> options[:min_nucleotides]
|
36
|
+
new_seqs << fragment_chimera(blast_query, seq, hom_zone[HIT], i, hit_limits, num_homology_zones, options, db_name, cut_positions[i])
|
37
|
+
seq.warnings('SOLVED')
|
41
38
|
end
|
42
|
-
|
43
|
-
|
39
|
+
end
|
40
|
+
else
|
41
|
+
new_seqs = nil #Sequence isn't chimera
|
42
|
+
end
|
43
|
+
return new_seqs
|
44
|
+
end
|
45
|
+
|
46
|
+
def set_cut_positions(homology_zones)
|
47
|
+
cut_positions = []
|
48
|
+
last_cut = -1
|
49
|
+
homology_zones.each_with_index do |hom_zone, i|
|
50
|
+
if i > 0
|
51
|
+
positions = []
|
52
|
+
positions << last_cut + 1 # Start of fragment
|
53
|
+
cut_position = homology_zones[i-1][STOP] + (hom_zone[BEG] - homology_zones[i-1][STOP])/2
|
54
|
+
positions << cut_position # End of fragment
|
55
|
+
last_cut = cut_position
|
56
|
+
cut_positions << positions
|
44
57
|
end
|
45
58
|
end
|
59
|
+
cut_positions << [last_cut, homology_zones.last[HIT].first.q_len-1]
|
60
|
+
return cut_positions
|
61
|
+
end
|
62
|
+
|
63
|
+
def confirm_chimeras(homology_zones, db_path, ident_thresold)
|
64
|
+
acc_hit = homology_zones.map{|zone| zone[HIT].first.acc}
|
65
|
+
seq_fasta = %x[blastdbcmd -db #{db_path} -entry #{acc_hit.join(',')}]
|
66
|
+
seq_fasta << ">remove\nALGO\n" #Needed for clustal-omega display the dist-matrix, requires unless 3 sequences to do it
|
67
|
+
|
68
|
+
clustal_matrix = do_clustal(seq_fasta)
|
69
|
+
clustal_matrix.shift #Remove header
|
70
|
+
clustal_matrix.pop #Remove false sequence
|
46
71
|
|
72
|
+
clustal_hits = []
|
73
|
+
distances = []
|
74
|
+
clustal_matrix.each do |line|
|
75
|
+
fields = line.split
|
76
|
+
fields.pop #Remove data belong to false sequence
|
77
|
+
fields.shift #Remove prot name
|
78
|
+
distances << fields.map! {|field| field.to_f}
|
79
|
+
end
|
80
|
+
delete_positions = search_ident_prots(homology_zones, distances, ident_thresold)
|
81
|
+
delete_zones(delete_positions, homology_zones)
|
47
82
|
end
|
48
83
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
84
|
+
|
85
|
+
def search_ident_prots(homology_zones, distances, ident_thresold)
|
86
|
+
delete_positions = []
|
87
|
+
n_homology_zones = homology_zones.length
|
88
|
+
n_homology_zones.times do |j|
|
89
|
+
n_homology_zones.times do |i|
|
90
|
+
next if i == j
|
91
|
+
if distances[j][i] >= ident_thresold
|
92
|
+
delete_positions << j
|
93
|
+
delete_positions << i
|
94
|
+
end
|
95
|
+
end
|
55
96
|
end
|
97
|
+
delete_positions.uniq!
|
98
|
+
return delete_positions
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions)
|
103
|
+
# Prepare new seq and query
|
104
|
+
#----------------------------
|
105
|
+
query_bak = query.dup
|
106
|
+
query_bak.hits = hit # Here, hit is an array of hsps
|
107
|
+
query_bak.query_def += "_split_#{hit_position}"
|
108
|
+
seq_bak = seq.dup
|
109
|
+
seq_bak.reset_classification
|
110
|
+
seq_bak.clean_warnings
|
111
|
+
seq_bak.seq_name += "_split_#{hit_position}"
|
112
|
+
seq_bak.clean_orfs
|
113
|
+
seq_bak.save_fasta = TRUE
|
114
|
+
seq_bak.ignore = FALSE
|
115
|
+
|
116
|
+
# Cut sequence and move hit/hsps limits
|
117
|
+
#----------------------------------------
|
118
|
+
if hit_position == 0 #First zone
|
119
|
+
limit = 0
|
120
|
+
if hit.first.q_frame < 0 #Hit reversed
|
121
|
+
hit.first.q_frame = -1
|
122
|
+
end
|
123
|
+
else #Middle & last zone
|
124
|
+
limit = cut_positions[BEG]#hit_limits[BEG]
|
125
|
+
hit_move_limits(hit, -limit, 0) #Redefine hit limits on new sequence after cut
|
126
|
+
if hit.first.q_frame >= 0
|
127
|
+
hit.first.q_frame=1
|
128
|
+
elsif hit_position < num_homology_zones-1 #Last zone keeps his original frame because it's composed by the hit and the terminal sequence (Here hit is reversed).
|
129
|
+
hit.first.q_frame=-1
|
130
|
+
end
|
131
|
+
end
|
132
|
+
if hit_position == num_homology_zones-1 #Last zone
|
133
|
+
seq_bak.seq_fasta = seq.seq_fasta[cut_positions[BEG]..seq.fasta_length-1]#[hit_limits[BEG]..seq.fasta_length-1]
|
134
|
+
else # Beginning & Middle zone
|
135
|
+
seq_bak.seq_fasta = seq.seq_fasta[limit..cut_positions[STOP]]#[limit..hit_limits[STOP]]
|
136
|
+
end
|
137
|
+
seq_length = seq_bak.seq_fasta.length
|
138
|
+
query_bak.full_query_length = seq_length
|
139
|
+
seq_bak.fasta_length = seq_length
|
140
|
+
hit_set_q_len(hit, seq_length)
|
141
|
+
|
142
|
+
|
143
|
+
# Full length analisys of fragment
|
144
|
+
#----------------------------------------
|
145
|
+
analiza_orf_y_fl(seq_bak, query_bak.hits, options, db_name)
|
146
|
+
|
147
|
+
return seq_bak
|
148
|
+
end
|
149
|
+
|
150
|
+
def define_homology_zones(query, options, query_fasta)
|
151
|
+
# Define hit limits
|
152
|
+
#---------------------
|
153
|
+
hits = cluster_query_hits(query) #Hsp packages
|
154
|
+
hits_limits = define_hit_limits(hits)
|
155
|
+
|
156
|
+
# Define homology zones
|
157
|
+
#------------------------
|
158
|
+
#First homology zone
|
159
|
+
zones = [[hits_limits.first[BEG], hits_limits.first[STOP], hits.first]]
|
160
|
+
ref_hit_beg = hits_limits.first[BEG]
|
161
|
+
ref_hit_end = hits_limits.first[STOP]
|
56
162
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
163
|
+
#Other homology zone
|
164
|
+
hits_limits.each_with_index do |hit, i|
|
165
|
+
coincidences = 0
|
166
|
+
zones.each do |zone|
|
167
|
+
if hit_is_in?(zone[BEG], zone[STOP], hit) # Extender zona de homologia si coinciden en zona
|
168
|
+
zone[BEG] = [zone[BEG],hit[BEG]].min
|
169
|
+
zone[STOP] = [zone[STOP],hit[STOP]].max
|
170
|
+
coincidences+=1
|
171
|
+
end
|
172
|
+
end
|
173
|
+
if coincidences == 0
|
174
|
+
zones << [hit[BEG], hit[STOP], hits[i]]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
zones.sort!{|e1,e2| e1[BEG] <=> e2[BEG]}
|
178
|
+
|
179
|
+
# Delete overlapping homology zones
|
180
|
+
#------------------------------------
|
181
|
+
overlapping_zones = overlapping_zones(zones)
|
182
|
+
delete_zones(overlapping_zones, zones)
|
183
|
+
|
184
|
+
return zones
|
185
|
+
end
|
186
|
+
|
187
|
+
def define_hit_limits(hits)
|
188
|
+
limits=[]
|
189
|
+
hits.each do |hit|
|
190
|
+
limits << get_limits(hit)
|
191
|
+
end
|
192
|
+
return limits
|
193
|
+
end
|
194
|
+
|
195
|
+
def get_limits(hit)
|
196
|
+
coordenates=[]
|
197
|
+
hit.map{|h| coordenates << h.q_beg; coordenates << h.q_end}
|
198
|
+
# BEG END
|
199
|
+
limits=[coordenates.min, coordenates.max]
|
200
|
+
return limits
|
201
|
+
end
|
202
|
+
|
203
|
+
def get_limits_s(hit)
|
204
|
+
coordenates=[]
|
205
|
+
hit.map{|h| coordenates << h.s_beg; coordenates << h.s_end}
|
206
|
+
# BEG END
|
207
|
+
limits=[coordenates.min, coordenates.max]
|
208
|
+
return limits
|
209
|
+
end
|
210
|
+
|
211
|
+
def cluster_query_hits(query)
|
212
|
+
hits = []
|
213
|
+
acc_hit = []
|
214
|
+
query.hits.each do |hit|
|
215
|
+
ind = acc_hit.index(hit.acc)
|
216
|
+
if ind.nil?
|
217
|
+
acc_hit << hit.acc
|
218
|
+
hits << [hit]
|
63
219
|
else
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
220
|
+
hits[ind] << hit
|
221
|
+
end
|
222
|
+
end
|
223
|
+
return hits
|
224
|
+
end
|
225
|
+
|
226
|
+
def delete_zones(overlapping_zones, zones)
|
227
|
+
overlapping_zones.each do |zone|
|
228
|
+
zones.delete_at(zone)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def overlapping_zones(zones)
|
233
|
+
delete_zones=[]
|
234
|
+
zones.each_with_index do |zone, i|
|
235
|
+
if i>0
|
236
|
+
if zone[BEG]< zones[i-1][STOP]
|
237
|
+
delete_zones << i
|
238
|
+
delete_zones << i-1
|
70
239
|
end
|
71
240
|
end
|
72
|
-
reverse_counter -= 1
|
73
241
|
end
|
74
|
-
|
75
|
-
return
|
242
|
+
delete_zones.uniq!
|
243
|
+
return delete_zones
|
244
|
+
end
|
245
|
+
|
246
|
+
def hit_is_in?(h_beg, h_end, hit)
|
247
|
+
is=FALSE
|
248
|
+
# CONTIENE #OVERLAP
|
249
|
+
if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
|
250
|
+
is=TRUE
|
251
|
+
end
|
252
|
+
return is
|
253
|
+
end
|
254
|
+
|
255
|
+
def get_hits(query, ref_hit)
|
256
|
+
all_hits=[]
|
257
|
+
query.hits.each do |hit|
|
258
|
+
if hit.acc == ref_hit.acc
|
259
|
+
all_hits << hit
|
260
|
+
end
|
261
|
+
end
|
262
|
+
return all_hits
|
263
|
+
end
|
264
|
+
|
265
|
+
|
266
|
+
def min_distance_between_homology_zones(homology_zones)
|
267
|
+
distance=nil
|
268
|
+
homology_zones.each_with_index do |zone,i|
|
269
|
+
if i > 0
|
270
|
+
local_distance=homology_zones[i][BEG] - homology_zones[i-1][STOP]
|
271
|
+
if distance.nil? || distance > local_distance
|
272
|
+
distance=local_distance
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
return distance
|
277
|
+
end
|
278
|
+
|
279
|
+
def duplicate_hits(query)
|
280
|
+
dup_hits=[]
|
281
|
+
query.hits.each do |hit|
|
282
|
+
dup_hits << hit.dup
|
283
|
+
end
|
284
|
+
return dup_hits
|
285
|
+
end
|
286
|
+
|
287
|
+
def set_limits(hit, q_beg, q_end, s_beg, s_end)
|
288
|
+
hit.q_beg = q_beg
|
289
|
+
hit.q_end = q_end
|
290
|
+
hit.s_beg = s_beg
|
291
|
+
hit.s_end = s_end
|
292
|
+
end
|
293
|
+
|
294
|
+
def move_limits(hit, q_add, s_add)
|
295
|
+
hit.q_beg+=q_add
|
296
|
+
hit.q_end+=q_add
|
297
|
+
hit.s_beg+=s_add
|
298
|
+
hit.s_end+=s_add
|
299
|
+
if hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? #There is frameshift
|
300
|
+
hit.q_frameshift.map!{|fs|
|
301
|
+
[fs.first + q_add, fs.last]
|
302
|
+
}
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
def hit_move_limits(hit, q_add, s_add)
|
307
|
+
if hit.class.to_s == 'Array'
|
308
|
+
hit.each do |hsp|
|
309
|
+
move_limits(hsp, q_add, s_add)
|
310
|
+
end
|
311
|
+
elsif hit.class.to_s == 'Hit'
|
312
|
+
#puts "\e[35m#{hit.acc}\t#{hit.q_beg}\t#{hit.q_end}\t#{hit.s_beg}\t#{hit.s_end}\t#{hit.reversed}\e[0m"
|
313
|
+
move_limits(hit, q_add, s_add)
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
def hit_set_q_len(hit, q_len)
|
318
|
+
hit.each do |hsp|
|
319
|
+
hsp.q_len=q_len
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def do_clustal(seq_fasta)
|
324
|
+
cmd='clustalo -i - -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
|
325
|
+
clustal_matrix = nil
|
326
|
+
IO.popen(cmd,'w+') {|clustal|
|
327
|
+
clustal.sync = TRUE
|
328
|
+
clustal.write(seq_fasta)
|
329
|
+
clustal.close_write
|
330
|
+
clustal_matrix = clustal.readlines
|
331
|
+
clustal.close_read
|
332
|
+
}
|
333
|
+
return clustal_matrix
|
76
334
|
end
|
77
335
|
|
78
336
|
end
|