gene_assembler 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/GeneAssembler +233 -0
- data/bin/phytozome_scan +60 -0
- data/gene_assembler.gemspec +25 -0
- data/lib/gene_assembler.rb +5 -0
- data/lib/gene_assembler/blast_type_parser.rb +41 -0
- data/lib/gene_assembler/contig.rb +643 -0
- data/lib/gene_assembler/dataset.rb +532 -0
- data/lib/gene_assembler/exonerate_result.rb +230 -0
- data/lib/gene_assembler/gff_contig.rb +67 -0
- data/lib/gene_assembler/gff_dataset.rb +152 -0
- data/lib/gene_assembler/gff_feature.rb +175 -0
- data/lib/gene_assembler/gff_frameshift.rb +6 -0
- data/lib/gene_assembler/gff_go.rb +13 -0
- data/lib/gene_assembler/gff_hit.rb +53 -0
- data/lib/gene_assembler/gff_hsp.rb +6 -0
- data/lib/gene_assembler/gff_localization.rb +6 -0
- data/lib/gene_assembler/gff_master_feature.rb +5 -0
- data/lib/gene_assembler/gff_parser.rb +35 -0
- data/lib/gene_assembler/gff_snp.rb +21 -0
- data/lib/gene_assembler/gff_stop.rb +6 -0
- data/lib/gene_assembler/go.rb +13 -0
- data/lib/gene_assembler/hit.rb +191 -0
- data/lib/gene_assembler/hsp.rb +100 -0
- data/lib/gene_assembler/other_functions.rb +228 -0
- data/lib/gene_assembler/parser.rb +25 -0
- data/lib/gene_assembler/parser_blast.rb +12 -0
- data/lib/gene_assembler/parser_exonerate.rb +16 -0
- data/lib/gene_assembler/rebuild.rb +975 -0
- data/lib/gene_assembler/report.rb +13 -0
- data/lib/gene_assembler/report_gff.rb +30 -0
- data/lib/gene_assembler/snp.rb +13 -0
- data/lib/gene_assembler/version.rb +3 -0
- metadata +149 -0
@@ -0,0 +1,532 @@
|
|
1
|
+
require 'contig'
|
2
|
+
|
3
|
+
class Dataset
|
4
|
+
attr_accessor :type, :contigs, :clusters, :references_hash
|
5
|
+
def initialize(type) #Carga un objeto blast para generar los objetos contig que inician esta clase
|
6
|
+
@type=type #Definido pero no se usa
|
7
|
+
@contigs=[]
|
8
|
+
@clusters=[]
|
9
|
+
@references_hash=''
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_contig(name)
|
13
|
+
c=Contig.new(name)
|
14
|
+
@contigs << c
|
15
|
+
return c
|
16
|
+
end
|
17
|
+
|
18
|
+
def transfer_contigs(add_contigs,limit=0)
|
19
|
+
if limit==0
|
20
|
+
@contigs << add_contigs
|
21
|
+
@contigs.flatten!
|
22
|
+
else
|
23
|
+
if add_contigs.class.to_s=='Array'
|
24
|
+
add_contigs.each_with_index do |contig,i|
|
25
|
+
if i==limit
|
26
|
+
break
|
27
|
+
end
|
28
|
+
@contigs << contig
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def transfer_cluster(cluster)
|
35
|
+
@clusters << cluster
|
36
|
+
end
|
37
|
+
|
38
|
+
def delete_cluster(cluster)
|
39
|
+
@clusters.delete(cluster)
|
40
|
+
end
|
41
|
+
|
42
|
+
def delete_cluster_at(ind)
|
43
|
+
@clusters.delete_at(ind)
|
44
|
+
end
|
45
|
+
|
46
|
+
#iterador
|
47
|
+
def each_contig
|
48
|
+
@contigs.each do |contig|
|
49
|
+
yield contig
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def each_contig_with_index
|
54
|
+
@contigs.each_with_index do |contig,i|
|
55
|
+
yield contig,i
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def each_cluster
|
60
|
+
@clusters.each do |cluster|
|
61
|
+
yield cluster
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_cluster_with_index
|
66
|
+
@clusters.each_with_index do |cluster,i|
|
67
|
+
yield cluster,i
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def n_contigs?
|
72
|
+
n=@contigs.length
|
73
|
+
return n
|
74
|
+
end
|
75
|
+
|
76
|
+
def clr_contigs # Vacia @contigs
|
77
|
+
@contigs=[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def clusters_empty?
|
81
|
+
empty=TRUE
|
82
|
+
i=0
|
83
|
+
each_cluster{|cl|
|
84
|
+
i+=1
|
85
|
+
if i>0
|
86
|
+
empty=FALSE
|
87
|
+
break
|
88
|
+
end
|
89
|
+
}
|
90
|
+
return empty
|
91
|
+
end
|
92
|
+
|
93
|
+
def contig_count
|
94
|
+
count=@contigs.length
|
95
|
+
return count
|
96
|
+
end
|
97
|
+
|
98
|
+
def cluster_count
|
99
|
+
count=@clusters.length
|
100
|
+
return count
|
101
|
+
end
|
102
|
+
|
103
|
+
def attrib_recover(dataset) #Reponer atributos en el Dataset del exonerate que se han perdido en el proceso (exonerate no los tiene), se recuperan del blast
|
104
|
+
each_contig{|self_contig|
|
105
|
+
dataset.each_contig{|dataset_contig|
|
106
|
+
if self_contig.name==dataset_contig.name
|
107
|
+
self_contig.length=dataset_contig.length
|
108
|
+
self_contig.seq=dataset_contig.seq
|
109
|
+
self_contig.each_hit{|hit|
|
110
|
+
hit.s_length=dataset_contig.first_hit.s_length
|
111
|
+
}
|
112
|
+
break
|
113
|
+
end
|
114
|
+
}
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
def correct_hsp_contigs(blast_coor_type)
|
119
|
+
each_contig {|contig|
|
120
|
+
contig.correct_hsps(blast_coor_type)
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def clustering # Compara el subject_id entre todos los contig y agrupa en un array aquellos con mismo s_i. Cada array se guarda en el array 'clusters'
|
125
|
+
finished_clusters=[]
|
126
|
+
each_contig{|contig|
|
127
|
+
clust=[]
|
128
|
+
if finished_clusters.include?(contig.first_hit.name)
|
129
|
+
next
|
130
|
+
end
|
131
|
+
each_contig{|contig2|
|
132
|
+
if contig.first_hit.name==contig2.first_hit.name
|
133
|
+
clust << contig2
|
134
|
+
contig2=nil
|
135
|
+
end
|
136
|
+
}
|
137
|
+
finished_clusters << contig.first_hit.name
|
138
|
+
if !clust.empty?
|
139
|
+
@clusters << clust
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def info_clusters # Muestra informacion sobre @Clusters, muestra contig, la proteina a la q pertenece y un diagrama del alineamiento en aa
|
145
|
+
if $verbose
|
146
|
+
each_cluster{|cl|
|
147
|
+
puts '............................'
|
148
|
+
cl.each do |c|
|
149
|
+
puts "#{c.first_hit.name}\t#{c.name}"
|
150
|
+
end
|
151
|
+
puts "............................"
|
152
|
+
}
|
153
|
+
|
154
|
+
each_cluster{|clust|
|
155
|
+
puts "\n********************MAP*************************\n"
|
156
|
+
clust.each do |contig|
|
157
|
+
contig.draw
|
158
|
+
end
|
159
|
+
}
|
160
|
+
puts "\n"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def filtering #Bateria de filtros
|
165
|
+
putative_contigs=[]
|
166
|
+
uni_hsp=[]
|
167
|
+
each_contig{ |contig|
|
168
|
+
if contig.mixed?
|
169
|
+
next
|
170
|
+
elsif contig.is_one_hsp? #Apartamos contigs uni-hsp
|
171
|
+
uni_hsp << contig
|
172
|
+
next
|
173
|
+
elsif contig.is_gapped?
|
174
|
+
next
|
175
|
+
elsif contig.is_truncated?
|
176
|
+
next
|
177
|
+
elsif contig.hsp_minor_than?(15) #En nt
|
178
|
+
next
|
179
|
+
else putative_contigs << contig
|
180
|
+
if $verbose
|
181
|
+
puts "#{contig.first_hit.name}\t#{contig.name}"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
}
|
185
|
+
@contigs=putative_contigs
|
186
|
+
return uni_hsp
|
187
|
+
end
|
188
|
+
|
189
|
+
def load_seq(hash) #Carga secuencias en @contigs
|
190
|
+
each_contig{|contig|
|
191
|
+
contig.seq=hash[contig.name]
|
192
|
+
contig.seq.upcase!
|
193
|
+
}
|
194
|
+
end
|
195
|
+
|
196
|
+
def rev_comp #Realiza la secuencia reverso complementaria en @contigs y @uni_hsp
|
197
|
+
each_contig{|contig|
|
198
|
+
contig.rev_comp_if_hit
|
199
|
+
}
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_stops
|
203
|
+
each_contig{|contig|
|
204
|
+
contig.stop_codon_search
|
205
|
+
}
|
206
|
+
end
|
207
|
+
|
208
|
+
def fasta(fasta_file) #Crea un archivo fasta a partir de @contigs
|
209
|
+
temp=File.open(fasta_file, 'w')
|
210
|
+
each_contig{|contig|
|
211
|
+
temp.print ">#{contig.name}\n"
|
212
|
+
temp.puts contig.seq
|
213
|
+
}
|
214
|
+
temp.close
|
215
|
+
end
|
216
|
+
|
217
|
+
def filtering_clust # Bateria de filtros q se aplica sobre @clusters. tb muestra informacion
|
218
|
+
gene_clusters=[]
|
219
|
+
uni_hsp=[]
|
220
|
+
each_cluster{|clust|
|
221
|
+
if $verbose
|
222
|
+
puts "\n********************CLUSTER*************************\n"
|
223
|
+
end
|
224
|
+
putative_ex=[]
|
225
|
+
trash_ex=[]
|
226
|
+
clust.each do |contig|
|
227
|
+
temp=[]
|
228
|
+
if contig.mixed?
|
229
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
230
|
+
trash_ex << temp
|
231
|
+
elsif contig.is_truncated?
|
232
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
233
|
+
trash_ex << temp
|
234
|
+
elsif contig.is_one_hsp?
|
235
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
236
|
+
trash_ex << temp
|
237
|
+
uni_hsp << contig#Se guardan los contig uni-hsp, para procesado posterior
|
238
|
+
elsif contig.is_gapped?
|
239
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
240
|
+
trash_ex << temp
|
241
|
+
else putative_ex << contig
|
242
|
+
end
|
243
|
+
end
|
244
|
+
if $verbose
|
245
|
+
putative_ex.each do |contig|
|
246
|
+
puts "#{contig.first_hit.name}\t#{contig.name}\t\t\tsc:#{contig.first_hit.first_hsp.score}" #el score de cada hsp es el mismo, por lo que realmente pertenece al alineamiento entero
|
247
|
+
end
|
248
|
+
puts ',,,,,,,,,,,,,REJECTED,,,,,,,,,,,,,'
|
249
|
+
trash_ex.each do |contig|
|
250
|
+
puts contig
|
251
|
+
end
|
252
|
+
puts "\n= = = = = = = = = =MAP= = = = = = = = = = = =\n"
|
253
|
+
putative_ex.each do |contig|
|
254
|
+
contig.draw
|
255
|
+
end
|
256
|
+
end
|
257
|
+
gene_clusters << putative_ex
|
258
|
+
}
|
259
|
+
@clusters=gene_clusters
|
260
|
+
return uni_hsp
|
261
|
+
end
|
262
|
+
|
263
|
+
def sort_cont_clust #Ordenar contigs dentro de @clusters de menor a mayor en base a su primer hsp
|
264
|
+
each_cluster{|cluster|
|
265
|
+
cluster=sort_cluster(cluster)
|
266
|
+
}
|
267
|
+
#@clusters=sort_clusters(@clusters)
|
268
|
+
end
|
269
|
+
|
270
|
+
def sort_cluster(cluster)#Ordena los elementos de cluster(contigs) en base a su posicion en el subject
|
271
|
+
cluster.sort!{|e1,e2| e1.first_hit.first_hsp.s_beg<=>e2.first_hit.first_hsp.s_beg}
|
272
|
+
end
|
273
|
+
|
274
|
+
def load_references(references_file) # Carga en @references_hash todas las referencias en forma de objetos contig
|
275
|
+
hash={}
|
276
|
+
if File.exists?(references_file)
|
277
|
+
File.open(references_file, 'r').each do |line|
|
278
|
+
fields=line.split
|
279
|
+
contig_name=fields[0]
|
280
|
+
if !fields[1].nil?
|
281
|
+
structures=fields[1].split('|')
|
282
|
+
all_models=[]
|
283
|
+
structures.each do |structure|
|
284
|
+
contig=Contig.new(contig_name)
|
285
|
+
contig.add_hit(contig_name, 0, 1,:nt)
|
286
|
+
if structure.nil?
|
287
|
+
break
|
288
|
+
end
|
289
|
+
hsps=structure.split(';')
|
290
|
+
s_end=0
|
291
|
+
nt_add=0
|
292
|
+
hsps.each do |hsp|
|
293
|
+
coords=hsp.split('-')
|
294
|
+
q_beg=coords[0].to_i
|
295
|
+
q_end=coords[1].to_i
|
296
|
+
s_beg=s_end+1
|
297
|
+
exon_length=q_end-q_beg+nt_add
|
298
|
+
s_end=s_end+(exon_length/3)
|
299
|
+
nt_add=exon_length.modulo(3)
|
300
|
+
contig.first_hit.add_hsp(q_beg, q_end, s_beg, s_end, 0, 0, 0, 0)
|
301
|
+
end
|
302
|
+
contig.length=contig.first_hit.last_hsp.q_end
|
303
|
+
all_models << contig
|
304
|
+
end
|
305
|
+
hash[contig_name]=all_models
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
@references_hash=hash
|
310
|
+
end
|
311
|
+
|
312
|
+
def missing_cluster_transfer(dataset) #Busca que clusters estan vacios e intenta llenarlos con clusters de dataset
|
313
|
+
add=[]
|
314
|
+
delete=[]
|
315
|
+
if clusters_empty?
|
316
|
+
dataset.each_cluster{ |clust|
|
317
|
+
transfer_cluster(clust)
|
318
|
+
}
|
319
|
+
dataset.clear_clusters
|
320
|
+
else
|
321
|
+
dataset.each_cluster_with_index{|uni_cluster,ind|
|
322
|
+
is_cluster=FALSE
|
323
|
+
each_cluster{|cluster| #Se mira si existe cluster uni-hsp en cluster
|
324
|
+
if uni_cluster.first.first_hit.name==cluster.first.first_hit.name
|
325
|
+
is_cluster=TRUE
|
326
|
+
break
|
327
|
+
end
|
328
|
+
}
|
329
|
+
if !is_cluster #Caso de q no exista cluster, se transfiere cluster uni-hsp
|
330
|
+
add << uni_cluster
|
331
|
+
delete << ind
|
332
|
+
end
|
333
|
+
}
|
334
|
+
add.each do |clust|
|
335
|
+
transfer_cluster(clust.dup)
|
336
|
+
end
|
337
|
+
delete.sort!
|
338
|
+
delete.reverse_each do |ind|
|
339
|
+
dataset.delete_cluster_at(ind)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def clear_clusters
|
345
|
+
@clusters=[]
|
346
|
+
end
|
347
|
+
|
348
|
+
def score_correction(factor) #Suma al atributo score la operacion nº intrones*factor
|
349
|
+
each_contig{|contig|
|
350
|
+
n_intron=contig.n_intron
|
351
|
+
contig.first_hit.each_hsp{|hsp|
|
352
|
+
hsp.score+=factor*n_intron
|
353
|
+
}
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
def correct_left_side_contigs(contig_base)
|
358
|
+
last_contig=nil
|
359
|
+
limit=0
|
360
|
+
correct=0
|
361
|
+
## Alineamiento de los contig entre si para calcular desplazamiento
|
362
|
+
if !contig_base.nil?
|
363
|
+
limit=-1
|
364
|
+
last_contig=contig_base
|
365
|
+
end
|
366
|
+
each_contig_with_index do |contig,i| # Calculo del desplazamiento necesario para corregir indices negativos en el gff
|
367
|
+
if i>limit
|
368
|
+
overlap_exon_with_last,ex=contig.compare(last_contig)
|
369
|
+
if overlap_exon_with_last>-1
|
370
|
+
overlap_exon_current,ex=last_contig.compare(contig)
|
371
|
+
diference=coord_prot(last_contig.hsp_at(overlap_exon_with_last),contig.hsp_at(overlap_exon_current))
|
372
|
+
if diference<correct
|
373
|
+
correct=diference
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
if !contig_base.nil?
|
379
|
+
last_contig=contig_base
|
380
|
+
else
|
381
|
+
last_contig=contig
|
382
|
+
end
|
383
|
+
end
|
384
|
+
correct*=-1
|
385
|
+
return correct
|
386
|
+
end
|
387
|
+
|
388
|
+
def align_contigs(contig_base)
|
389
|
+
limit=0
|
390
|
+
las_contig=nil
|
391
|
+
if !contig_base.nil?
|
392
|
+
limit=-1
|
393
|
+
last_contig=contig_base
|
394
|
+
end
|
395
|
+
## Alineamiento de los contig entre si o contra una referencia
|
396
|
+
add=0
|
397
|
+
align=TRUE
|
398
|
+
each_contig_with_index do |contig,i|
|
399
|
+
if i>limit
|
400
|
+
#Calcular desplazamiento de un contig respecto al anterior en el gff
|
401
|
+
overlap_exon_with_last,ex=contig.compare(last_contig)
|
402
|
+
if overlap_exon_with_last==-1
|
403
|
+
if contig_base.nil?
|
404
|
+
add+=last_contig.length
|
405
|
+
else
|
406
|
+
align=FALSE
|
407
|
+
end
|
408
|
+
else
|
409
|
+
overlap_exon_current,ex=last_contig.compare(contig)
|
410
|
+
add+=coord_prot(last_contig.hsp_at(overlap_exon_with_last),contig.hsp_at(overlap_exon_current))
|
411
|
+
if !contig_base.nil?
|
412
|
+
align=TRUE
|
413
|
+
end
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
#Modificacion de contigs
|
418
|
+
if align || contig_base.nil? # Modificar si no existe referencia o el contig a alineado contra la referencia
|
419
|
+
contig.modified_coordenates(add)
|
420
|
+
contig.length+=add
|
421
|
+
end
|
422
|
+
if !contig_base.nil?
|
423
|
+
last_contig=contig_base
|
424
|
+
add=0 #Resetear desplazamiento en caso de usarse una referencia
|
425
|
+
else
|
426
|
+
last_contig=contig
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def multiple_align_contigs(array_contig_base,mod_contig_base=FALSE)
|
432
|
+
correct=0
|
433
|
+
array_contig_base.each do |contig_base|
|
434
|
+
local_correct=correct_left_side_contigs(contig_base)
|
435
|
+
if local_correct>correct
|
436
|
+
correct=local_correct
|
437
|
+
end
|
438
|
+
self.align_contigs(contig_base,mod_contig_base)
|
439
|
+
end
|
440
|
+
# Correcion del modelo en base al desplazamiento general calculado para cada fragmento teniendo en cuenta el desplazamiento local realizado
|
441
|
+
array_contig_base.each do |contig|
|
442
|
+
if correct>0
|
443
|
+
contig.modified_coordenates(correct)
|
444
|
+
contig.length+=correct
|
445
|
+
end
|
446
|
+
end
|
447
|
+
self.each_contig {|contig|
|
448
|
+
if correct>0
|
449
|
+
contig.modified_coordenates(correct)
|
450
|
+
contig.length+=correct
|
451
|
+
end
|
452
|
+
}
|
453
|
+
|
454
|
+
return correct
|
455
|
+
end
|
456
|
+
|
457
|
+
def transfer_n_contigs_def_hit_type(dataset,cluster,new_hit_type,limit)
|
458
|
+
if !cluster.empty?||!cluster.nil?
|
459
|
+
dataset.each_cluster{|dat_cluster|
|
460
|
+
if dat_cluster.empty?||dat_cluster.nil?
|
461
|
+
next
|
462
|
+
end
|
463
|
+
if dat_cluster.first.first_hit.name==cluster.first.first_hit.name # Se busca en los clusters unihsp aquel q pertenece al gen q se esta trabajando
|
464
|
+
dat_cluster.each do |contig|
|
465
|
+
contig.first_hit.type='pseudogene'
|
466
|
+
end
|
467
|
+
transfer_contigs(dat_cluster,limit)
|
468
|
+
end
|
469
|
+
|
470
|
+
}
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
def missing_contigs_transfer(dataset) #dataset is uni_hsp. Se buscan contigs q no alineen con los de self
|
475
|
+
contigs_cluster=[]
|
476
|
+
self.each_cluster_with_index{|self_cluster,s|
|
477
|
+
dataset.each_cluster{|dataset_cluster|
|
478
|
+
if dataset_cluster.nil? ||dataset_cluster.empty?
|
479
|
+
next
|
480
|
+
end
|
481
|
+
if self_cluster.first.first_hit.name==dataset_cluster.first.first_hit.name #Mismo cluster
|
482
|
+
dataset_cluster.each do |dataset_contig|
|
483
|
+
align=FALSE
|
484
|
+
self_cluster.each do |self_contig|
|
485
|
+
position,n_exones=dataset_contig.compare(self_contig)
|
486
|
+
if position>-1
|
487
|
+
align=TRUE
|
488
|
+
break
|
489
|
+
end
|
490
|
+
end
|
491
|
+
if !align
|
492
|
+
contigs_cluster << dataset_contig
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
contigs_cluster.each do |contig|
|
497
|
+
self.transfer_contig_to_cluster(contig,s)
|
498
|
+
dataset_cluster.delete(contig)
|
499
|
+
end
|
500
|
+
contigs_cluster=[]
|
501
|
+
end
|
502
|
+
}
|
503
|
+
}
|
504
|
+
end
|
505
|
+
|
506
|
+
def transfer_contig_to_cluster(contig,n_cluster)
|
507
|
+
@clusters[n_cluster] << contig
|
508
|
+
end
|
509
|
+
|
510
|
+
def generate_file_5_prime(file, fasta)
|
511
|
+
prime5_file = File.open(file, 'w')
|
512
|
+
fasta_file = File.open(fasta, 'w')
|
513
|
+
each_cluster{ |cluster|
|
514
|
+
if !cluster.nil? && !cluster.empty?
|
515
|
+
gene_name = cluster.first.first_hit.name
|
516
|
+
cluster.each do |contig|
|
517
|
+
if contig.first_hit.first_hsp.s_beg <= 10
|
518
|
+
prime5_end = contig.first_hit.first_hsp.q_beg
|
519
|
+
prime5_file.puts "#{gene_name}\t#{contig.name}\t#{prime5_end}"
|
520
|
+
seq = contig.seq[0..prime5_end]
|
521
|
+
if !seq.nil?
|
522
|
+
fasta_file.puts "#{gene_name}\n#{seq}"
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
}
|
528
|
+
prime5_file.close
|
529
|
+
fasta_file.close
|
530
|
+
end
|
531
|
+
|
532
|
+
end
|