gene_assembler 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/GeneAssembler +233 -0
- data/bin/phytozome_scan +60 -0
- data/gene_assembler.gemspec +25 -0
- data/lib/gene_assembler.rb +5 -0
- data/lib/gene_assembler/blast_type_parser.rb +41 -0
- data/lib/gene_assembler/contig.rb +643 -0
- data/lib/gene_assembler/dataset.rb +532 -0
- data/lib/gene_assembler/exonerate_result.rb +230 -0
- data/lib/gene_assembler/gff_contig.rb +67 -0
- data/lib/gene_assembler/gff_dataset.rb +152 -0
- data/lib/gene_assembler/gff_feature.rb +175 -0
- data/lib/gene_assembler/gff_frameshift.rb +6 -0
- data/lib/gene_assembler/gff_go.rb +13 -0
- data/lib/gene_assembler/gff_hit.rb +53 -0
- data/lib/gene_assembler/gff_hsp.rb +6 -0
- data/lib/gene_assembler/gff_localization.rb +6 -0
- data/lib/gene_assembler/gff_master_feature.rb +5 -0
- data/lib/gene_assembler/gff_parser.rb +35 -0
- data/lib/gene_assembler/gff_snp.rb +21 -0
- data/lib/gene_assembler/gff_stop.rb +6 -0
- data/lib/gene_assembler/go.rb +13 -0
- data/lib/gene_assembler/hit.rb +191 -0
- data/lib/gene_assembler/hsp.rb +100 -0
- data/lib/gene_assembler/other_functions.rb +228 -0
- data/lib/gene_assembler/parser.rb +25 -0
- data/lib/gene_assembler/parser_blast.rb +12 -0
- data/lib/gene_assembler/parser_exonerate.rb +16 -0
- data/lib/gene_assembler/rebuild.rb +975 -0
- data/lib/gene_assembler/report.rb +13 -0
- data/lib/gene_assembler/report_gff.rb +30 -0
- data/lib/gene_assembler/snp.rb +13 -0
- data/lib/gene_assembler/version.rb +3 -0
- metadata +149 -0
@@ -0,0 +1,532 @@
|
|
1
|
+
require 'contig'
|
2
|
+
|
3
|
+
class Dataset
|
4
|
+
attr_accessor :type, :contigs, :clusters, :references_hash
|
5
|
+
def initialize(type) #Carga un objeto blast para generar los objetos contig que inician esta clase
|
6
|
+
@type=type #Definido pero no se usa
|
7
|
+
@contigs=[]
|
8
|
+
@clusters=[]
|
9
|
+
@references_hash=''
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_contig(name)
|
13
|
+
c=Contig.new(name)
|
14
|
+
@contigs << c
|
15
|
+
return c
|
16
|
+
end
|
17
|
+
|
18
|
+
def transfer_contigs(add_contigs,limit=0)
|
19
|
+
if limit==0
|
20
|
+
@contigs << add_contigs
|
21
|
+
@contigs.flatten!
|
22
|
+
else
|
23
|
+
if add_contigs.class.to_s=='Array'
|
24
|
+
add_contigs.each_with_index do |contig,i|
|
25
|
+
if i==limit
|
26
|
+
break
|
27
|
+
end
|
28
|
+
@contigs << contig
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def transfer_cluster(cluster)
|
35
|
+
@clusters << cluster
|
36
|
+
end
|
37
|
+
|
38
|
+
def delete_cluster(cluster)
|
39
|
+
@clusters.delete(cluster)
|
40
|
+
end
|
41
|
+
|
42
|
+
def delete_cluster_at(ind)
|
43
|
+
@clusters.delete_at(ind)
|
44
|
+
end
|
45
|
+
|
46
|
+
#iterador
|
47
|
+
def each_contig
|
48
|
+
@contigs.each do |contig|
|
49
|
+
yield contig
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def each_contig_with_index
|
54
|
+
@contigs.each_with_index do |contig,i|
|
55
|
+
yield contig,i
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def each_cluster
|
60
|
+
@clusters.each do |cluster|
|
61
|
+
yield cluster
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_cluster_with_index
|
66
|
+
@clusters.each_with_index do |cluster,i|
|
67
|
+
yield cluster,i
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def n_contigs?
|
72
|
+
n=@contigs.length
|
73
|
+
return n
|
74
|
+
end
|
75
|
+
|
76
|
+
def clr_contigs # Vacia @contigs
|
77
|
+
@contigs=[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def clusters_empty?
|
81
|
+
empty=TRUE
|
82
|
+
i=0
|
83
|
+
each_cluster{|cl|
|
84
|
+
i+=1
|
85
|
+
if i>0
|
86
|
+
empty=FALSE
|
87
|
+
break
|
88
|
+
end
|
89
|
+
}
|
90
|
+
return empty
|
91
|
+
end
|
92
|
+
|
93
|
+
def contig_count
|
94
|
+
count=@contigs.length
|
95
|
+
return count
|
96
|
+
end
|
97
|
+
|
98
|
+
def cluster_count
|
99
|
+
count=@clusters.length
|
100
|
+
return count
|
101
|
+
end
|
102
|
+
|
103
|
+
def attrib_recover(dataset) #Reponer atributos en el Dataset del exonerate que se han perdido en el proceso (exonerate no los tiene), se recuperan del blast
|
104
|
+
each_contig{|self_contig|
|
105
|
+
dataset.each_contig{|dataset_contig|
|
106
|
+
if self_contig.name==dataset_contig.name
|
107
|
+
self_contig.length=dataset_contig.length
|
108
|
+
self_contig.seq=dataset_contig.seq
|
109
|
+
self_contig.each_hit{|hit|
|
110
|
+
hit.s_length=dataset_contig.first_hit.s_length
|
111
|
+
}
|
112
|
+
break
|
113
|
+
end
|
114
|
+
}
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
def correct_hsp_contigs(blast_coor_type)
|
119
|
+
each_contig {|contig|
|
120
|
+
contig.correct_hsps(blast_coor_type)
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
def clustering # Compara el subject_id entre todos los contig y agrupa en un array aquellos con mismo s_i. Cada array se guarda en el array 'clusters'
|
125
|
+
finished_clusters=[]
|
126
|
+
each_contig{|contig|
|
127
|
+
clust=[]
|
128
|
+
if finished_clusters.include?(contig.first_hit.name)
|
129
|
+
next
|
130
|
+
end
|
131
|
+
each_contig{|contig2|
|
132
|
+
if contig.first_hit.name==contig2.first_hit.name
|
133
|
+
clust << contig2
|
134
|
+
contig2=nil
|
135
|
+
end
|
136
|
+
}
|
137
|
+
finished_clusters << contig.first_hit.name
|
138
|
+
if !clust.empty?
|
139
|
+
@clusters << clust
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def info_clusters # Muestra informacion sobre @Clusters, muestra contig, la proteina a la q pertenece y un diagrama del alineamiento en aa
|
145
|
+
if $verbose
|
146
|
+
each_cluster{|cl|
|
147
|
+
puts '............................'
|
148
|
+
cl.each do |c|
|
149
|
+
puts "#{c.first_hit.name}\t#{c.name}"
|
150
|
+
end
|
151
|
+
puts "............................"
|
152
|
+
}
|
153
|
+
|
154
|
+
each_cluster{|clust|
|
155
|
+
puts "\n********************MAP*************************\n"
|
156
|
+
clust.each do |contig|
|
157
|
+
contig.draw
|
158
|
+
end
|
159
|
+
}
|
160
|
+
puts "\n"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def filtering #Bateria de filtros
|
165
|
+
putative_contigs=[]
|
166
|
+
uni_hsp=[]
|
167
|
+
each_contig{ |contig|
|
168
|
+
if contig.mixed?
|
169
|
+
next
|
170
|
+
elsif contig.is_one_hsp? #Apartamos contigs uni-hsp
|
171
|
+
uni_hsp << contig
|
172
|
+
next
|
173
|
+
elsif contig.is_gapped?
|
174
|
+
next
|
175
|
+
elsif contig.is_truncated?
|
176
|
+
next
|
177
|
+
elsif contig.hsp_minor_than?(15) #En nt
|
178
|
+
next
|
179
|
+
else putative_contigs << contig
|
180
|
+
if $verbose
|
181
|
+
puts "#{contig.first_hit.name}\t#{contig.name}"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
}
|
185
|
+
@contigs=putative_contigs
|
186
|
+
return uni_hsp
|
187
|
+
end
|
188
|
+
|
189
|
+
def load_seq(hash) #Carga secuencias en @contigs
|
190
|
+
each_contig{|contig|
|
191
|
+
contig.seq=hash[contig.name]
|
192
|
+
contig.seq.upcase!
|
193
|
+
}
|
194
|
+
end
|
195
|
+
|
196
|
+
def rev_comp #Realiza la secuencia reverso complementaria en @contigs y @uni_hsp
|
197
|
+
each_contig{|contig|
|
198
|
+
contig.rev_comp_if_hit
|
199
|
+
}
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_stops
|
203
|
+
each_contig{|contig|
|
204
|
+
contig.stop_codon_search
|
205
|
+
}
|
206
|
+
end
|
207
|
+
|
208
|
+
def fasta(fasta_file) #Crea un archivo fasta a partir de @contigs
|
209
|
+
temp=File.open(fasta_file, 'w')
|
210
|
+
each_contig{|contig|
|
211
|
+
temp.print ">#{contig.name}\n"
|
212
|
+
temp.puts contig.seq
|
213
|
+
}
|
214
|
+
temp.close
|
215
|
+
end
|
216
|
+
|
217
|
+
def filtering_clust # Bateria de filtros q se aplica sobre @clusters. tb muestra informacion
|
218
|
+
gene_clusters=[]
|
219
|
+
uni_hsp=[]
|
220
|
+
each_cluster{|clust|
|
221
|
+
if $verbose
|
222
|
+
puts "\n********************CLUSTER*************************\n"
|
223
|
+
end
|
224
|
+
putative_ex=[]
|
225
|
+
trash_ex=[]
|
226
|
+
clust.each do |contig|
|
227
|
+
temp=[]
|
228
|
+
if contig.mixed?
|
229
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
230
|
+
trash_ex << temp
|
231
|
+
elsif contig.is_truncated?
|
232
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
233
|
+
trash_ex << temp
|
234
|
+
elsif contig.is_one_hsp?
|
235
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
236
|
+
trash_ex << temp
|
237
|
+
uni_hsp << contig#Se guardan los contig uni-hsp, para procesado posterior
|
238
|
+
elsif contig.is_gapped?
|
239
|
+
temp << "#{contig.first_hit.name}\t#{contig.name}"
|
240
|
+
trash_ex << temp
|
241
|
+
else putative_ex << contig
|
242
|
+
end
|
243
|
+
end
|
244
|
+
if $verbose
|
245
|
+
putative_ex.each do |contig|
|
246
|
+
puts "#{contig.first_hit.name}\t#{contig.name}\t\t\tsc:#{contig.first_hit.first_hsp.score}" #el score de cada hsp es el mismo, por lo que realmente pertenece al alineamiento entero
|
247
|
+
end
|
248
|
+
puts ',,,,,,,,,,,,,REJECTED,,,,,,,,,,,,,'
|
249
|
+
trash_ex.each do |contig|
|
250
|
+
puts contig
|
251
|
+
end
|
252
|
+
puts "\n= = = = = = = = = =MAP= = = = = = = = = = = =\n"
|
253
|
+
putative_ex.each do |contig|
|
254
|
+
contig.draw
|
255
|
+
end
|
256
|
+
end
|
257
|
+
gene_clusters << putative_ex
|
258
|
+
}
|
259
|
+
@clusters=gene_clusters
|
260
|
+
return uni_hsp
|
261
|
+
end
|
262
|
+
|
263
|
+
def sort_cont_clust #Ordenar contigs dentro de @clusters de menor a mayor en base a su primer hsp
|
264
|
+
each_cluster{|cluster|
|
265
|
+
cluster=sort_cluster(cluster)
|
266
|
+
}
|
267
|
+
#@clusters=sort_clusters(@clusters)
|
268
|
+
end
|
269
|
+
|
270
|
+
def sort_cluster(cluster)#Ordena los elementos de cluster(contigs) en base a su posicion en el subject
|
271
|
+
cluster.sort!{|e1,e2| e1.first_hit.first_hsp.s_beg<=>e2.first_hit.first_hsp.s_beg}
|
272
|
+
end
|
273
|
+
|
274
|
+
def load_references(references_file) # Carga en @references_hash todas las referencias en forma de objetos contig
|
275
|
+
hash={}
|
276
|
+
if File.exists?(references_file)
|
277
|
+
File.open(references_file, 'r').each do |line|
|
278
|
+
fields=line.split
|
279
|
+
contig_name=fields[0]
|
280
|
+
if !fields[1].nil?
|
281
|
+
structures=fields[1].split('|')
|
282
|
+
all_models=[]
|
283
|
+
structures.each do |structure|
|
284
|
+
contig=Contig.new(contig_name)
|
285
|
+
contig.add_hit(contig_name, 0, 1,:nt)
|
286
|
+
if structure.nil?
|
287
|
+
break
|
288
|
+
end
|
289
|
+
hsps=structure.split(';')
|
290
|
+
s_end=0
|
291
|
+
nt_add=0
|
292
|
+
hsps.each do |hsp|
|
293
|
+
coords=hsp.split('-')
|
294
|
+
q_beg=coords[0].to_i
|
295
|
+
q_end=coords[1].to_i
|
296
|
+
s_beg=s_end+1
|
297
|
+
exon_length=q_end-q_beg+nt_add
|
298
|
+
s_end=s_end+(exon_length/3)
|
299
|
+
nt_add=exon_length.modulo(3)
|
300
|
+
contig.first_hit.add_hsp(q_beg, q_end, s_beg, s_end, 0, 0, 0, 0)
|
301
|
+
end
|
302
|
+
contig.length=contig.first_hit.last_hsp.q_end
|
303
|
+
all_models << contig
|
304
|
+
end
|
305
|
+
hash[contig_name]=all_models
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
@references_hash=hash
|
310
|
+
end
|
311
|
+
|
312
|
+
def missing_cluster_transfer(dataset) #Busca que clusters estan vacios e intenta llenarlos con clusters de dataset
|
313
|
+
add=[]
|
314
|
+
delete=[]
|
315
|
+
if clusters_empty?
|
316
|
+
dataset.each_cluster{ |clust|
|
317
|
+
transfer_cluster(clust)
|
318
|
+
}
|
319
|
+
dataset.clear_clusters
|
320
|
+
else
|
321
|
+
dataset.each_cluster_with_index{|uni_cluster,ind|
|
322
|
+
is_cluster=FALSE
|
323
|
+
each_cluster{|cluster| #Se mira si existe cluster uni-hsp en cluster
|
324
|
+
if uni_cluster.first.first_hit.name==cluster.first.first_hit.name
|
325
|
+
is_cluster=TRUE
|
326
|
+
break
|
327
|
+
end
|
328
|
+
}
|
329
|
+
if !is_cluster #Caso de q no exista cluster, se transfiere cluster uni-hsp
|
330
|
+
add << uni_cluster
|
331
|
+
delete << ind
|
332
|
+
end
|
333
|
+
}
|
334
|
+
add.each do |clust|
|
335
|
+
transfer_cluster(clust.dup)
|
336
|
+
end
|
337
|
+
delete.sort!
|
338
|
+
delete.reverse_each do |ind|
|
339
|
+
dataset.delete_cluster_at(ind)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def clear_clusters
|
345
|
+
@clusters=[]
|
346
|
+
end
|
347
|
+
|
348
|
+
def score_correction(factor) #Suma al atributo score la operacion nº intrones*factor
|
349
|
+
each_contig{|contig|
|
350
|
+
n_intron=contig.n_intron
|
351
|
+
contig.first_hit.each_hsp{|hsp|
|
352
|
+
hsp.score+=factor*n_intron
|
353
|
+
}
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
def correct_left_side_contigs(contig_base)
|
358
|
+
last_contig=nil
|
359
|
+
limit=0
|
360
|
+
correct=0
|
361
|
+
## Alineamiento de los contig entre si para calcular desplazamiento
|
362
|
+
if !contig_base.nil?
|
363
|
+
limit=-1
|
364
|
+
last_contig=contig_base
|
365
|
+
end
|
366
|
+
each_contig_with_index do |contig,i| # Calculo del desplazamiento necesario para corregir indices negativos en el gff
|
367
|
+
if i>limit
|
368
|
+
overlap_exon_with_last,ex=contig.compare(last_contig)
|
369
|
+
if overlap_exon_with_last>-1
|
370
|
+
overlap_exon_current,ex=last_contig.compare(contig)
|
371
|
+
diference=coord_prot(last_contig.hsp_at(overlap_exon_with_last),contig.hsp_at(overlap_exon_current))
|
372
|
+
if diference<correct
|
373
|
+
correct=diference
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
if !contig_base.nil?
|
379
|
+
last_contig=contig_base
|
380
|
+
else
|
381
|
+
last_contig=contig
|
382
|
+
end
|
383
|
+
end
|
384
|
+
correct*=-1
|
385
|
+
return correct
|
386
|
+
end
|
387
|
+
|
388
|
+
def align_contigs(contig_base)
|
389
|
+
limit=0
|
390
|
+
las_contig=nil
|
391
|
+
if !contig_base.nil?
|
392
|
+
limit=-1
|
393
|
+
last_contig=contig_base
|
394
|
+
end
|
395
|
+
## Alineamiento de los contig entre si o contra una referencia
|
396
|
+
add=0
|
397
|
+
align=TRUE
|
398
|
+
each_contig_with_index do |contig,i|
|
399
|
+
if i>limit
|
400
|
+
#Calcular desplazamiento de un contig respecto al anterior en el gff
|
401
|
+
overlap_exon_with_last,ex=contig.compare(last_contig)
|
402
|
+
if overlap_exon_with_last==-1
|
403
|
+
if contig_base.nil?
|
404
|
+
add+=last_contig.length
|
405
|
+
else
|
406
|
+
align=FALSE
|
407
|
+
end
|
408
|
+
else
|
409
|
+
overlap_exon_current,ex=last_contig.compare(contig)
|
410
|
+
add+=coord_prot(last_contig.hsp_at(overlap_exon_with_last),contig.hsp_at(overlap_exon_current))
|
411
|
+
if !contig_base.nil?
|
412
|
+
align=TRUE
|
413
|
+
end
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
#Modificacion de contigs
|
418
|
+
if align || contig_base.nil? # Modificar si no existe referencia o el contig a alineado contra la referencia
|
419
|
+
contig.modified_coordenates(add)
|
420
|
+
contig.length+=add
|
421
|
+
end
|
422
|
+
if !contig_base.nil?
|
423
|
+
last_contig=contig_base
|
424
|
+
add=0 #Resetear desplazamiento en caso de usarse una referencia
|
425
|
+
else
|
426
|
+
last_contig=contig
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def multiple_align_contigs(array_contig_base,mod_contig_base=FALSE)
|
432
|
+
correct=0
|
433
|
+
array_contig_base.each do |contig_base|
|
434
|
+
local_correct=correct_left_side_contigs(contig_base)
|
435
|
+
if local_correct>correct
|
436
|
+
correct=local_correct
|
437
|
+
end
|
438
|
+
self.align_contigs(contig_base,mod_contig_base)
|
439
|
+
end
|
440
|
+
# Correcion del modelo en base al desplazamiento general calculado para cada fragmento teniendo en cuenta el desplazamiento local realizado
|
441
|
+
array_contig_base.each do |contig|
|
442
|
+
if correct>0
|
443
|
+
contig.modified_coordenates(correct)
|
444
|
+
contig.length+=correct
|
445
|
+
end
|
446
|
+
end
|
447
|
+
self.each_contig {|contig|
|
448
|
+
if correct>0
|
449
|
+
contig.modified_coordenates(correct)
|
450
|
+
contig.length+=correct
|
451
|
+
end
|
452
|
+
}
|
453
|
+
|
454
|
+
return correct
|
455
|
+
end
|
456
|
+
|
457
|
+
def transfer_n_contigs_def_hit_type(dataset,cluster,new_hit_type,limit)
|
458
|
+
if !cluster.empty?||!cluster.nil?
|
459
|
+
dataset.each_cluster{|dat_cluster|
|
460
|
+
if dat_cluster.empty?||dat_cluster.nil?
|
461
|
+
next
|
462
|
+
end
|
463
|
+
if dat_cluster.first.first_hit.name==cluster.first.first_hit.name # Se busca en los clusters unihsp aquel q pertenece al gen q se esta trabajando
|
464
|
+
dat_cluster.each do |contig|
|
465
|
+
contig.first_hit.type='pseudogene'
|
466
|
+
end
|
467
|
+
transfer_contigs(dat_cluster,limit)
|
468
|
+
end
|
469
|
+
|
470
|
+
}
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
def missing_contigs_transfer(dataset) #dataset is uni_hsp. Se buscan contigs q no alineen con los de self
|
475
|
+
contigs_cluster=[]
|
476
|
+
self.each_cluster_with_index{|self_cluster,s|
|
477
|
+
dataset.each_cluster{|dataset_cluster|
|
478
|
+
if dataset_cluster.nil? ||dataset_cluster.empty?
|
479
|
+
next
|
480
|
+
end
|
481
|
+
if self_cluster.first.first_hit.name==dataset_cluster.first.first_hit.name #Mismo cluster
|
482
|
+
dataset_cluster.each do |dataset_contig|
|
483
|
+
align=FALSE
|
484
|
+
self_cluster.each do |self_contig|
|
485
|
+
position,n_exones=dataset_contig.compare(self_contig)
|
486
|
+
if position>-1
|
487
|
+
align=TRUE
|
488
|
+
break
|
489
|
+
end
|
490
|
+
end
|
491
|
+
if !align
|
492
|
+
contigs_cluster << dataset_contig
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
contigs_cluster.each do |contig|
|
497
|
+
self.transfer_contig_to_cluster(contig,s)
|
498
|
+
dataset_cluster.delete(contig)
|
499
|
+
end
|
500
|
+
contigs_cluster=[]
|
501
|
+
end
|
502
|
+
}
|
503
|
+
}
|
504
|
+
end
|
505
|
+
|
506
|
+
def transfer_contig_to_cluster(contig,n_cluster)
|
507
|
+
@clusters[n_cluster] << contig
|
508
|
+
end
|
509
|
+
|
510
|
+
def generate_file_5_prime(file, fasta)
|
511
|
+
prime5_file = File.open(file, 'w')
|
512
|
+
fasta_file = File.open(fasta, 'w')
|
513
|
+
each_cluster{ |cluster|
|
514
|
+
if !cluster.nil? && !cluster.empty?
|
515
|
+
gene_name = cluster.first.first_hit.name
|
516
|
+
cluster.each do |contig|
|
517
|
+
if contig.first_hit.first_hsp.s_beg <= 10
|
518
|
+
prime5_end = contig.first_hit.first_hsp.q_beg
|
519
|
+
prime5_file.puts "#{gene_name}\t#{contig.name}\t#{prime5_end}"
|
520
|
+
seq = contig.seq[0..prime5_end]
|
521
|
+
if !seq.nil?
|
522
|
+
fasta_file.puts "#{gene_name}\n#{seq}"
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
}
|
528
|
+
prime5_file.close
|
529
|
+
fasta_file.close
|
530
|
+
end
|
531
|
+
|
532
|
+
end
|