gene_assembler 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/GeneAssembler +233 -0
- data/bin/phytozome_scan +60 -0
- data/gene_assembler.gemspec +25 -0
- data/lib/gene_assembler.rb +5 -0
- data/lib/gene_assembler/blast_type_parser.rb +41 -0
- data/lib/gene_assembler/contig.rb +643 -0
- data/lib/gene_assembler/dataset.rb +532 -0
- data/lib/gene_assembler/exonerate_result.rb +230 -0
- data/lib/gene_assembler/gff_contig.rb +67 -0
- data/lib/gene_assembler/gff_dataset.rb +152 -0
- data/lib/gene_assembler/gff_feature.rb +175 -0
- data/lib/gene_assembler/gff_frameshift.rb +6 -0
- data/lib/gene_assembler/gff_go.rb +13 -0
- data/lib/gene_assembler/gff_hit.rb +53 -0
- data/lib/gene_assembler/gff_hsp.rb +6 -0
- data/lib/gene_assembler/gff_localization.rb +6 -0
- data/lib/gene_assembler/gff_master_feature.rb +5 -0
- data/lib/gene_assembler/gff_parser.rb +35 -0
- data/lib/gene_assembler/gff_snp.rb +21 -0
- data/lib/gene_assembler/gff_stop.rb +6 -0
- data/lib/gene_assembler/go.rb +13 -0
- data/lib/gene_assembler/hit.rb +191 -0
- data/lib/gene_assembler/hsp.rb +100 -0
- data/lib/gene_assembler/other_functions.rb +228 -0
- data/lib/gene_assembler/parser.rb +25 -0
- data/lib/gene_assembler/parser_blast.rb +12 -0
- data/lib/gene_assembler/parser_exonerate.rb +16 -0
- data/lib/gene_assembler/rebuild.rb +975 -0
- data/lib/gene_assembler/report.rb +13 -0
- data/lib/gene_assembler/report_gff.rb +30 -0
- data/lib/gene_assembler/snp.rb +13 -0
- data/lib/gene_assembler/version.rb +3 -0
- metadata +149 -0
@@ -0,0 +1,643 @@
|
|
1
|
+
require 'hit'
|
2
|
+
require 'snp'
|
3
|
+
require 'go'
|
4
|
+
|
5
|
+
|
6
|
+
class Contig
|
7
|
+
attr_accessor :name, :seq,:type, :length, :hits, :completed, :q_frameshift, :s_frameshift, :stops, :mod_coord
|
8
|
+
def initialize (name)
|
9
|
+
@name=name
|
10
|
+
@seq=''
|
11
|
+
@type=nil
|
12
|
+
@length=''
|
13
|
+
@hits=[]
|
14
|
+
@snps=[]
|
15
|
+
@gos=[]
|
16
|
+
@completed=''
|
17
|
+
@localization=[]
|
18
|
+
@q_frameshift=[]
|
19
|
+
@s_frameshift=[]
|
20
|
+
@stops=[]
|
21
|
+
@mod_coord=FALSE #Indica si se han alterado las coordenadas del contig previamente
|
22
|
+
end
|
23
|
+
|
24
|
+
def length=(length)
|
25
|
+
if length.class.to_s=='String'
|
26
|
+
@length=length.to_i
|
27
|
+
else
|
28
|
+
@length=length
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def first_hit
|
33
|
+
h=nil
|
34
|
+
each_hit{|hit|
|
35
|
+
h=hit
|
36
|
+
break
|
37
|
+
}
|
38
|
+
return h
|
39
|
+
end
|
40
|
+
|
41
|
+
def last_hit
|
42
|
+
h=nil
|
43
|
+
each_hit{|hit|
|
44
|
+
h=hit
|
45
|
+
}
|
46
|
+
return h
|
47
|
+
end
|
48
|
+
|
49
|
+
def hit_count
|
50
|
+
count=0
|
51
|
+
each_hit{|hit|
|
52
|
+
count+=1
|
53
|
+
}
|
54
|
+
return count
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_localization(localization)
|
58
|
+
@localization << localization
|
59
|
+
end
|
60
|
+
|
61
|
+
def each_localization
|
62
|
+
@localization.each do |localization|
|
63
|
+
yield localization
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def each_localization_with_index
|
68
|
+
@localization.each_with_index do |localization,i|
|
69
|
+
yield localization,i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def each_stop
|
74
|
+
@stops.each do |stop|
|
75
|
+
yield stop
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def each_stop_with_index
|
80
|
+
@stops.each_with_index do |stop,i|
|
81
|
+
yield stop,i
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def add_go(go,name,obsolete)
|
86
|
+
go=GO.new(go,name,obsolete)
|
87
|
+
@gos << go
|
88
|
+
return go
|
89
|
+
end
|
90
|
+
|
91
|
+
def each_go
|
92
|
+
@gos.each do |go|
|
93
|
+
yield go
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def add_snp(position)
|
98
|
+
snp=SNP.new(position)
|
99
|
+
@snps << snp
|
100
|
+
return snp
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_hit(hit_name, s_length, reversed, type)
|
104
|
+
hit=Hit.new(hit_name, s_length, reversed, type)
|
105
|
+
@hits << hit
|
106
|
+
return hit
|
107
|
+
end
|
108
|
+
|
109
|
+
def has_hit?
|
110
|
+
has_hit=FALSE
|
111
|
+
if @hits.length>0
|
112
|
+
has_hit=TRUE
|
113
|
+
end
|
114
|
+
return has_hit
|
115
|
+
end
|
116
|
+
|
117
|
+
def each_hit
|
118
|
+
@hits.each do |hit|
|
119
|
+
yield hit
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def each_hit_with_index
|
124
|
+
@hits.each_with_index do |hit,i|
|
125
|
+
yield hit,i
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def n_hits?
|
130
|
+
n=@hits.length
|
131
|
+
return n
|
132
|
+
end
|
133
|
+
|
134
|
+
def each_snp
|
135
|
+
@snps.each do |snp|
|
136
|
+
yield snp
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def each_snp_with_index
|
141
|
+
@snps.each_with_index do |snp,i|
|
142
|
+
yield snp,i
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def hits_sort!
|
147
|
+
each_hit{|hit|
|
148
|
+
hit.hsps_sort!
|
149
|
+
}
|
150
|
+
end
|
151
|
+
|
152
|
+
def modified_coordenates(add)
|
153
|
+
@mod_coord=TRUE
|
154
|
+
each_hit{|hit|
|
155
|
+
hit.modified_coordenates(add)
|
156
|
+
stop_modified_coordenates(add)
|
157
|
+
frameshift_modified_coordenates(add)
|
158
|
+
}
|
159
|
+
return last_hit.last_hsp.q_end
|
160
|
+
end
|
161
|
+
|
162
|
+
def stop_modified_coordenates(add)
|
163
|
+
@stops.length.times do |n|
|
164
|
+
@stops[n]+=add
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def frameshift_modified_coordenates(add)
|
169
|
+
@q_frameshift.length.times do |n|
|
170
|
+
@q_frameshift[n]+=add
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def each_q_frameshift
|
175
|
+
@q_frameshift.each do |qfs|
|
176
|
+
yield qfs
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def draw #Realiza una representacion del contig a nivel del subject
|
181
|
+
last_hsp_end=0
|
182
|
+
overlap=0
|
183
|
+
first_hit.each_hsp_with_index{|hsp,c|
|
184
|
+
dif=hsp.s_beg-last_hsp_end
|
185
|
+
if dif>=0 && dif<=2
|
186
|
+
print '/' # Limite solapante uno a continuacion de otro (disposicion normal del exon)
|
187
|
+
elsif dif>2
|
188
|
+
print '-'*(hsp.s_beg-last_hsp_end)
|
189
|
+
print '|'
|
190
|
+
elsif dif<0
|
191
|
+
print '&'
|
192
|
+
overlap=dif
|
193
|
+
end
|
194
|
+
h=c+1 #Num de hsp
|
195
|
+
print "#{h.to_s.center(hsp.s_end-hsp.s_beg+overlap-1)}"
|
196
|
+
if dif>2
|
197
|
+
#print '|'
|
198
|
+
end
|
199
|
+
last_hsp_end=hsp.s_end
|
200
|
+
}
|
201
|
+
print '|-'
|
202
|
+
ending=first_hit.s_length-last_hsp_end
|
203
|
+
if ending<0
|
204
|
+
ending=0
|
205
|
+
end
|
206
|
+
print '-'*(ending)
|
207
|
+
puts "\n"
|
208
|
+
end
|
209
|
+
|
210
|
+
def indices #Muestra los indices de subject y query del contig
|
211
|
+
each_hit_with_index {|hit,ind|
|
212
|
+
hit.each_hsp_with_index{|hsp,i|
|
213
|
+
puts "#{ind+1}.#{i+1})\t#{hsp.q_beg}\t#{hsp.q_end}\t#{hsp.s_beg}\t#{hsp.s_end}\t#{@name}\t#{@length}\t#{@mod_coord}"
|
214
|
+
}
|
215
|
+
}
|
216
|
+
end
|
217
|
+
|
218
|
+
def exones_s # Devuelve un array con el tamaño de cada hsp/exon a nivel del subject
|
219
|
+
exones_s=[]
|
220
|
+
each_hit {|hit|
|
221
|
+
hit.each_hsp{|hsp|
|
222
|
+
long=(hsp.s_end-hsp.s_beg).abs
|
223
|
+
exones_s << long
|
224
|
+
}
|
225
|
+
}
|
226
|
+
return exones_s
|
227
|
+
end
|
228
|
+
|
229
|
+
def exones_q # Devuelve un array con el tamaño de cada hsp/exon a nivel del query
|
230
|
+
exones_q=[]
|
231
|
+
each_hit {|hit|
|
232
|
+
hit.each_hsp{|hsp|
|
233
|
+
long=(hsp.q_end-hsp.q_beg).abs
|
234
|
+
exones_q << long
|
235
|
+
}
|
236
|
+
}
|
237
|
+
return exones_q
|
238
|
+
end
|
239
|
+
|
240
|
+
def intrones_q # Devuelve un array con el tamaño de cada intron a nivel del query
|
241
|
+
intrones_q=[]
|
242
|
+
l=first_hit.hsp_count
|
243
|
+
each_hit {|hit|
|
244
|
+
hit.each_hsp_with_index{|hsp,ind|
|
245
|
+
if !first_hit.hsp_at(ind+1)
|
246
|
+
break
|
247
|
+
end
|
248
|
+
long=(first_hit.hsp_at(ind+1).q_beg-hsp.q_end).abs
|
249
|
+
intrones_q << long
|
250
|
+
}
|
251
|
+
}
|
252
|
+
return intrones_q
|
253
|
+
end
|
254
|
+
|
255
|
+
def n_intron
|
256
|
+
n_intron=0
|
257
|
+
each_hit{|hit|
|
258
|
+
n_intron+=1
|
259
|
+
}
|
260
|
+
return n_intron
|
261
|
+
end
|
262
|
+
|
263
|
+
def exon_acumulative #Suma la longitud de todos los exones
|
264
|
+
long=0
|
265
|
+
exones=exones_q
|
266
|
+
exones.each do |ex|
|
267
|
+
long+=ex
|
268
|
+
end
|
269
|
+
return long
|
270
|
+
end
|
271
|
+
|
272
|
+
#Funciones para comprobar validez de los contig
|
273
|
+
def mixed? #Examina si los hsp de un hit estan desordenados
|
274
|
+
is_mix=FALSE
|
275
|
+
beg=nil
|
276
|
+
sign=0
|
277
|
+
sign_local=0
|
278
|
+
@hits.first.hsps.each_with_index do |hsp,c|
|
279
|
+
if !beg.nil?
|
280
|
+
dif=hsp.q_beg-beg
|
281
|
+
if dif>=0
|
282
|
+
sign_local=1
|
283
|
+
else sign_local=0
|
284
|
+
end
|
285
|
+
if sign_local!=sign && c>1
|
286
|
+
is_mix=TRUE
|
287
|
+
break
|
288
|
+
end
|
289
|
+
end
|
290
|
+
sign=sign_local
|
291
|
+
beg=hsp.q_beg
|
292
|
+
end
|
293
|
+
return is_mix
|
294
|
+
end
|
295
|
+
|
296
|
+
def is_one_hsp? #Examina si el hit esta compuesto por un solo hsp
|
297
|
+
is_one=FALSE
|
298
|
+
if first_hit.hsp_count==1
|
299
|
+
is_one=TRUE
|
300
|
+
end
|
301
|
+
return is_one
|
302
|
+
end
|
303
|
+
|
304
|
+
def is_gapped? #Examina si hay gaps internos en la estructura del gen q se mapea sobre la proteina q pudieran señalar la falta parcial o completa de un exon
|
305
|
+
gap=3 #Gap maximo permitido medido en aa, como minimo poner a 1
|
306
|
+
gapped=FALSE
|
307
|
+
s_end_last=0
|
308
|
+
@hits.first.hsps.each do |hsp|
|
309
|
+
if s_end_last >0
|
310
|
+
dif=hsp.s_beg-s_end_last
|
311
|
+
if dif>gap #En caso de coordenadas solapantes, siempre saldra dif negativo, con lo que la condicion gap no se cumple
|
312
|
+
gapped=TRUE
|
313
|
+
break
|
314
|
+
end
|
315
|
+
end
|
316
|
+
s_end_last=hsp.s_end
|
317
|
+
end
|
318
|
+
return gapped
|
319
|
+
end
|
320
|
+
|
321
|
+
def is_truncated? #Examina si los exones en el borde del contig estan truncados/interrumpidos
|
322
|
+
truncated=FALSE
|
323
|
+
# puts self.name
|
324
|
+
# puts @hits.inspect
|
325
|
+
#Truncado por el inicio
|
326
|
+
if first_hit.first_hsp.s_beg>1 && first_hit.first_hsp.q_beg==1
|
327
|
+
truncated=TRUE
|
328
|
+
end
|
329
|
+
|
330
|
+
#Truncado por el final
|
331
|
+
if first_hit.last_hsp.q_end==@length
|
332
|
+
truncated=TRUE
|
333
|
+
end
|
334
|
+
return truncated
|
335
|
+
end
|
336
|
+
|
337
|
+
def hsp_minor_than?(hsp_length) # En nt
|
338
|
+
minor=FALSE
|
339
|
+
each_hit_with_index {|hit,i|
|
340
|
+
if i>0 || i<hit.hsp_count
|
341
|
+
if hit.hsp_minor_than?(hsp_length)
|
342
|
+
minor=TRUE
|
343
|
+
break
|
344
|
+
end
|
345
|
+
end
|
346
|
+
}
|
347
|
+
return minor
|
348
|
+
end
|
349
|
+
|
350
|
+
def correct_hsps(blast_coor_type)
|
351
|
+
each_hit {|hit|
|
352
|
+
hit.correct_hsps(blast_coor_type)
|
353
|
+
}
|
354
|
+
end
|
355
|
+
#-------------------------------------------------------------------------
|
356
|
+
|
357
|
+
def compare(contig) #Alinea un contig con otro en base a las coordenadas del subject
|
358
|
+
exon_match=-1
|
359
|
+
exones=0
|
360
|
+
match_found=FALSE
|
361
|
+
#SELF HIT
|
362
|
+
self.each_hit {|self_hit|
|
363
|
+
if match_found #Romper bucle si ha habido coindidencia definitiva
|
364
|
+
break
|
365
|
+
end
|
366
|
+
#SELF HSP
|
367
|
+
self_hit.each_hsp {|self_hsp|
|
368
|
+
if match_found
|
369
|
+
break
|
370
|
+
end
|
371
|
+
#CONTIG HIT
|
372
|
+
last=0
|
373
|
+
contig_hsp_count=0
|
374
|
+
contig.each_hit {|contig_hit|
|
375
|
+
#CONTIG HSP
|
376
|
+
contig_hit.each_hsp {|contig_hsp|#Valoracion del coverage de cada hit entre si, en el momento que el segundo de mayor se cancela
|
377
|
+
coverage=self_hsp.compare(contig_hsp)
|
378
|
+
if coverage>last #Guardar coincidencia a la espera de una mejor
|
379
|
+
exon_match=contig_hsp_count
|
380
|
+
last=coverage
|
381
|
+
end
|
382
|
+
if coverage>0.2 #Contaje de exones
|
383
|
+
exones+=1
|
384
|
+
end
|
385
|
+
if coverage==0 && exon_match>-1 #Romper bucle cuando se ha encontrado coincidencia y los demas exones no coinciden
|
386
|
+
match_found=TRUE
|
387
|
+
break
|
388
|
+
end
|
389
|
+
contig_hsp_count+=1
|
390
|
+
}
|
391
|
+
if match_found
|
392
|
+
break
|
393
|
+
end
|
394
|
+
}
|
395
|
+
}
|
396
|
+
}
|
397
|
+
return exon_match,exones
|
398
|
+
end
|
399
|
+
|
400
|
+
def rev_comp_if_hit #Devuelve la secuencia reversocomplementaria del contig
|
401
|
+
if first_hit.reversed
|
402
|
+
rev_comp
|
403
|
+
first_hit.reversed=FALSE
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def rev_comp
|
408
|
+
rev_seq=[]
|
409
|
+
@seq.each_char do |char|
|
410
|
+
char.upcase!
|
411
|
+
if char =='A'
|
412
|
+
rev_seq << 'T'
|
413
|
+
elsif char =='T'
|
414
|
+
rev_seq << 'A'
|
415
|
+
elsif char =='G'
|
416
|
+
rev_seq << 'C'
|
417
|
+
elsif char =='C'
|
418
|
+
rev_seq << 'G'
|
419
|
+
else
|
420
|
+
rev_seq << char
|
421
|
+
end
|
422
|
+
end
|
423
|
+
rev_seq.reverse!
|
424
|
+
@seq=rev_seq.join
|
425
|
+
|
426
|
+
end
|
427
|
+
|
428
|
+
def rev_coord
|
429
|
+
each_hit {|hit|
|
430
|
+
hit.rev_coord(@length)
|
431
|
+
hit.hsps_sort!
|
432
|
+
}
|
433
|
+
end
|
434
|
+
|
435
|
+
def start_codon_search #Busqueda codon inicio, busca si existe y una vez encontrado modifica coordenadas para acomodar el codon
|
436
|
+
exists=FALSE
|
437
|
+
s_beg=@hits.first.hsps.first.s_beg
|
438
|
+
s_end=@hits.first.hsps.first.s_end
|
439
|
+
q_beg=@hits.first.hsps.first.q_beg
|
440
|
+
q_end=@hits.first.hsps.first.q_end
|
441
|
+
if s_beg<=10 # Se busca codon si la proteina carece de los 10 primeros aa
|
442
|
+
continue=TRUE
|
443
|
+
index=0
|
444
|
+
temp_index=0
|
445
|
+
while continue==TRUE
|
446
|
+
if temp_index==0
|
447
|
+
find=@seq.index('ATG')
|
448
|
+
else
|
449
|
+
find=@seq.index('ATG',temp_index+1)
|
450
|
+
end
|
451
|
+
find+=1
|
452
|
+
if find==nil
|
453
|
+
continue=FALSE
|
454
|
+
else
|
455
|
+
if find==q_beg
|
456
|
+
continue=FALSE
|
457
|
+
index=find
|
458
|
+
elsif find>(q_beg-1+3)
|
459
|
+
continue=FALSE
|
460
|
+
else
|
461
|
+
if (find-q_end).modulo(3)==0 || find-q_end==0
|
462
|
+
index=find
|
463
|
+
end
|
464
|
+
if temp_index==0
|
465
|
+
temp_index=find+1
|
466
|
+
else
|
467
|
+
temp_index=find
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end#While
|
472
|
+
if index>0
|
473
|
+
@hits.first.hsps.first.s_beg=1
|
474
|
+
@hits.first.hsps.first.q_beg=index
|
475
|
+
exists=TRUE
|
476
|
+
end
|
477
|
+
end
|
478
|
+
return exists
|
479
|
+
end
|
480
|
+
|
481
|
+
def stop_codon(codon,ends,*beg) #Busqueda posibles codones de parada
|
482
|
+
reference=ends-1
|
483
|
+
if !beg.empty?
|
484
|
+
reference=ends
|
485
|
+
ends=beg.first
|
486
|
+
end
|
487
|
+
position=nil
|
488
|
+
follow=TRUE
|
489
|
+
while follow
|
490
|
+
pos=@seq.index(codon,ends)#Implicitamente lleva el +1
|
491
|
+
if pos.nil?
|
492
|
+
follow=FALSE
|
493
|
+
else
|
494
|
+
dif=(pos-reference).abs
|
495
|
+
#puts "#{pos} #{dif} #{reference}"
|
496
|
+
if dif.modulo(3)==0
|
497
|
+
position=pos
|
498
|
+
follow=FALSE
|
499
|
+
else
|
500
|
+
ends=pos+1
|
501
|
+
end
|
502
|
+
end
|
503
|
+
end
|
504
|
+
return position
|
505
|
+
end
|
506
|
+
|
507
|
+
def coor_intrones
|
508
|
+
#Determinar bordes de los intrones
|
509
|
+
intrones=[]
|
510
|
+
last_hsp=nil
|
511
|
+
if first_hit.hsp_count>1
|
512
|
+
first_hit.each_hsp_with_index{|hsp,i|
|
513
|
+
if i>0
|
514
|
+
intrones << [last_hsp.q_end,hsp.q_beg]
|
515
|
+
end
|
516
|
+
last_hsp=hsp
|
517
|
+
}
|
518
|
+
end
|
519
|
+
#---------------------------------
|
520
|
+
return intrones
|
521
|
+
end
|
522
|
+
|
523
|
+
def stop_codon_search #Busqueda codon de parada, busca si existe
|
524
|
+
exists=FALSE
|
525
|
+
homology_start=first_hit.first_hsp.q_beg
|
526
|
+
homology_end=first_hit.last_hsp.q_end
|
527
|
+
n=1
|
528
|
+
codon=''
|
529
|
+
position=nil
|
530
|
+
intrones=coor_intrones
|
531
|
+
@seq.chars do |c|
|
532
|
+
if n<homology_start#Comenzar comparacion al principio del primer exon
|
533
|
+
n+=1
|
534
|
+
next
|
535
|
+
end
|
536
|
+
if n>homology_end#Terminar comparacion
|
537
|
+
break
|
538
|
+
end
|
539
|
+
#Saltar intrones
|
540
|
+
if !intrones.empty?
|
541
|
+
jump=FALSE
|
542
|
+
intrones.each do |int|
|
543
|
+
if n>int[0] && n<int[1] #NO se incluye el borde del exon
|
544
|
+
#print 'i'
|
545
|
+
jump=TRUE
|
546
|
+
break
|
547
|
+
end
|
548
|
+
end
|
549
|
+
if jump
|
550
|
+
n+=1
|
551
|
+
next
|
552
|
+
end
|
553
|
+
end
|
554
|
+
#-----------------
|
555
|
+
codon+=c
|
556
|
+
if codon.length==3 #Comparacion del codon
|
557
|
+
#puts position.to_s+"\t"+codon
|
558
|
+
if codon=='TAG'||codon=='TAA'||codon=='TGA'
|
559
|
+
@stops << position
|
560
|
+
exists=TRUE
|
561
|
+
end
|
562
|
+
codon=''
|
563
|
+
elsif codon.length==1 #Guardar posicion del primer nucleotido del codon
|
564
|
+
position=n
|
565
|
+
end
|
566
|
+
n+=1
|
567
|
+
end
|
568
|
+
return exists
|
569
|
+
end
|
570
|
+
|
571
|
+
def gff(id,parent,add) #Devuelve en estrutura gff los exones en genomico
|
572
|
+
text=[]
|
573
|
+
#Exones
|
574
|
+
first_hit.each_hsp{|hsp|
|
575
|
+
# if hsp.reversed==TRUE
|
576
|
+
# hsp.q_beg=@length-hsp.q_beg
|
577
|
+
# hsp.q_end=@length-hsp.q_end
|
578
|
+
# end
|
579
|
+
#puts "#{hsp.q_beg+add} #{hsp.q_end+add}"
|
580
|
+
text<<"#{id}\t.\texon\t#{hsp.q_beg+add}\t#{hsp.q_end+add}\t.\t+\t.\tID=#{parent}_exon;Parent=#{parent};Name=#{parent}_exon"
|
581
|
+
}
|
582
|
+
return text
|
583
|
+
end
|
584
|
+
|
585
|
+
def gff_prot(id,prot_name) #Devuelve en estrutura gff los exones en proteina
|
586
|
+
#parent="#{parent}_mRNA"
|
587
|
+
text=[]
|
588
|
+
#Exones
|
589
|
+
first_hit.each_hsp{|hsp|
|
590
|
+
#puts "#{hsp.q_beg+add} #{hsp.q_end+add}"
|
591
|
+
text<<"#{id}\t.\tprotein_match\t#{hsp.s_beg}\t#{hsp.s_end}\t.\t+\t.\tID=#{prot_name}_prot;Parent=#{prot_name};Name=#{id}_prot"
|
592
|
+
}
|
593
|
+
return text
|
594
|
+
end
|
595
|
+
|
596
|
+
def transfer_contig_hits(contig)
|
597
|
+
contig.each_hit{|hit|
|
598
|
+
self.transfer_hit(hit)
|
599
|
+
}
|
600
|
+
end
|
601
|
+
|
602
|
+
def transfer_hit(hit)
|
603
|
+
@hits << hit
|
604
|
+
end
|
605
|
+
|
606
|
+
def overlap
|
607
|
+
overlap=[]
|
608
|
+
last_hit=nil
|
609
|
+
each_hit_with_index{|hit,i|
|
610
|
+
overlap << hit.hsp_overlap
|
611
|
+
if i>0
|
612
|
+
diference=hit.overlap_with(last_hit)
|
613
|
+
if diference<0
|
614
|
+
overlap << diference
|
615
|
+
end
|
616
|
+
end
|
617
|
+
last_hit=hit
|
618
|
+
}
|
619
|
+
overlap.flatten!
|
620
|
+
return overlap
|
621
|
+
end
|
622
|
+
|
623
|
+
def hsp_at(position)
|
624
|
+
hsp_ret=nil
|
625
|
+
count_hsp=0
|
626
|
+
found=FALSE
|
627
|
+
each_hit {|hit|
|
628
|
+
hit.each_hsp {|hsp|
|
629
|
+
if position==count_hsp
|
630
|
+
hsp_ret=hsp
|
631
|
+
found=TRUE
|
632
|
+
break
|
633
|
+
end
|
634
|
+
count_hsp+=1
|
635
|
+
}
|
636
|
+
if found
|
637
|
+
break
|
638
|
+
end
|
639
|
+
}
|
640
|
+
return hsp_ret
|
641
|
+
end
|
642
|
+
|
643
|
+
end #class
|