gene_assembler 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/GeneAssembler +233 -0
- data/bin/phytozome_scan +60 -0
- data/gene_assembler.gemspec +25 -0
- data/lib/gene_assembler.rb +5 -0
- data/lib/gene_assembler/blast_type_parser.rb +41 -0
- data/lib/gene_assembler/contig.rb +643 -0
- data/lib/gene_assembler/dataset.rb +532 -0
- data/lib/gene_assembler/exonerate_result.rb +230 -0
- data/lib/gene_assembler/gff_contig.rb +67 -0
- data/lib/gene_assembler/gff_dataset.rb +152 -0
- data/lib/gene_assembler/gff_feature.rb +175 -0
- data/lib/gene_assembler/gff_frameshift.rb +6 -0
- data/lib/gene_assembler/gff_go.rb +13 -0
- data/lib/gene_assembler/gff_hit.rb +53 -0
- data/lib/gene_assembler/gff_hsp.rb +6 -0
- data/lib/gene_assembler/gff_localization.rb +6 -0
- data/lib/gene_assembler/gff_master_feature.rb +5 -0
- data/lib/gene_assembler/gff_parser.rb +35 -0
- data/lib/gene_assembler/gff_snp.rb +21 -0
- data/lib/gene_assembler/gff_stop.rb +6 -0
- data/lib/gene_assembler/go.rb +13 -0
- data/lib/gene_assembler/hit.rb +191 -0
- data/lib/gene_assembler/hsp.rb +100 -0
- data/lib/gene_assembler/other_functions.rb +228 -0
- data/lib/gene_assembler/parser.rb +25 -0
- data/lib/gene_assembler/parser_blast.rb +12 -0
- data/lib/gene_assembler/parser_exonerate.rb +16 -0
- data/lib/gene_assembler/rebuild.rb +975 -0
- data/lib/gene_assembler/report.rb +13 -0
- data/lib/gene_assembler/report_gff.rb +30 -0
- data/lib/gene_assembler/snp.rb +13 -0
- data/lib/gene_assembler/version.rb +3 -0
- metadata +149 -0
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# GeneAssembler
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'gene_assembler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install gene_assembler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it ( https://github.com/[my-github-username]/gene_assembler/fork )
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/GeneAssembler
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH=File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, "../lib/"))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, "../lib/gene_assembler/"))
|
6
|
+
|
7
|
+
require 'optparse'
|
8
|
+
require 'scbi_fasta'
|
9
|
+
require 'parser_blast'
|
10
|
+
require 'parser_exonerate'
|
11
|
+
require 'dataset'
|
12
|
+
require 'rebuild'
|
13
|
+
require 'other_functions'
|
14
|
+
|
15
|
+
|
16
|
+
# INPUT PARSING
|
17
|
+
#################################################################################################
|
18
|
+
options = {}
|
19
|
+
|
20
|
+
optparse = OptionParser.new do |opts|
|
21
|
+
options[:fasta] = 'contigsMC.fasta'
|
22
|
+
opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
|
23
|
+
options[:fasta] = file
|
24
|
+
end
|
25
|
+
|
26
|
+
options[:db] = 'dual_prot.fasta'
|
27
|
+
opts.on( '-d', '--database FILE', 'Blast database' ) do |db|
|
28
|
+
options[:db] = db
|
29
|
+
end
|
30
|
+
|
31
|
+
options[:reference] = 'reference'
|
32
|
+
opts.on( '-r', '--reference FILE', 'Gene models reference file' ) do |ref|
|
33
|
+
options[:reference] = ref
|
34
|
+
end
|
35
|
+
|
36
|
+
options[:evalue] = 1.0e-3
|
37
|
+
opts.on( '-e', '--evalue EVALUE', 'e value threshold to consider as reliable the orthologue sequence. Default=1.0e-3' ) do |evalue|
|
38
|
+
options[:evalue] = evalue.to_f
|
39
|
+
end
|
40
|
+
|
41
|
+
options[:verbose] = FALSE
|
42
|
+
opts.on( '-v', '--verbose', 'Default=0' ) do |verbose|
|
43
|
+
options[:verbose] = TRUE
|
44
|
+
$verbose=TRUE
|
45
|
+
end
|
46
|
+
|
47
|
+
options[:overwrite] = FALSE
|
48
|
+
opts.on( '-o', '--overwrite', 'Default=FALSE' ) do |overwrite|
|
49
|
+
options[:overwrite] = TRUE
|
50
|
+
end
|
51
|
+
|
52
|
+
options[:web] = FALSE
|
53
|
+
opts.on( '-w', '--web', 'Default=FALSE' ) do |web|
|
54
|
+
options[:web] = TRUE
|
55
|
+
end
|
56
|
+
|
57
|
+
options[:index] = FALSE
|
58
|
+
opts.on( '-i', '--index', 'Default=FALSE' ) do |index|
|
59
|
+
options[:index] = TRUE
|
60
|
+
end
|
61
|
+
|
62
|
+
options[:rebuild] = TRUE
|
63
|
+
opts.on( '-n', '--nrebuild', 'Default=1' ) do |rebuild|
|
64
|
+
options[:rebuild] = FALSE
|
65
|
+
end
|
66
|
+
|
67
|
+
options[:cpu] = 1
|
68
|
+
opts.on( '-c', '--cpu CPU', 'Default=2' ) do |cpu|
|
69
|
+
options[:cpu] = cpu
|
70
|
+
end
|
71
|
+
|
72
|
+
# Set a banner, displayed at the top of the help screen.
|
73
|
+
opts.banner = "Usage: GeneEnsambler -f input.fasta -e evalue -c n_cpu \n\n"
|
74
|
+
|
75
|
+
# This displays the help screen
|
76
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
77
|
+
puts opts
|
78
|
+
exit
|
79
|
+
end
|
80
|
+
end # End opts
|
81
|
+
|
82
|
+
# parse options and remove from ARGV
|
83
|
+
optparse.parse!
|
84
|
+
|
85
|
+
# I/O FILES
|
86
|
+
#####################################################################################
|
87
|
+
# comprueba si existen los ficheros de entrada
|
88
|
+
if !File.exists?(options[:fasta])||!File.exists?(options[:db])
|
89
|
+
puts "File #{options[:fasta]} or #{options[:db]} doesn't exists"
|
90
|
+
Process.exit(-1)
|
91
|
+
end
|
92
|
+
|
93
|
+
path={}
|
94
|
+
# Directories path
|
95
|
+
path[:db]=File.join(Dir.pwd,'db') #Database folder
|
96
|
+
path[:temp]=File.join(Dir.pwd,'temp') #Temp folder
|
97
|
+
path[:local]=File.join(Dir.pwd,'temp/local')
|
98
|
+
path[:ouput_files]=File.join(Dir.pwd,'output_files') #Output folder
|
99
|
+
|
100
|
+
# Create work directories
|
101
|
+
path.each do |key, directory|
|
102
|
+
if !File.exists?(directory)
|
103
|
+
Dir.mkdir(directory)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Files path
|
108
|
+
path[:fasta]=File.join(path[:ouput_files],'gene_models.fasta')
|
109
|
+
path[:gff]=File.join(path[:ouput_files],'gene_capture.gff')
|
110
|
+
path[:error]=File.join(path[:ouput_files],'Error.log')
|
111
|
+
path[:html]=File.join(path[:ouput_files],'Index.html')
|
112
|
+
path[:prime]=File.join(path[:ouput_files],'5_prime_data.txt')
|
113
|
+
path[:fasta_prime]=File.join(path[:ouput_files],'5_prime_region.fasta')
|
114
|
+
path[:db_prot]=options[:db] # ???
|
115
|
+
path[:blast_db]=File.join(path[:db],File.basename(options[:db]))
|
116
|
+
path[:blast_output]=File.join(path[:temp],File.basename(options[:db])+'.blast')
|
117
|
+
path[:exonerate_db]=options[:db]
|
118
|
+
path[:exonerate_output]=File.join(path[:temp],File.basename(options[:db])+'.ex')
|
119
|
+
path[:exonerate_input_fasta]=File.join(path[:db],File.basename(options[:db])+'.fasta')
|
120
|
+
path[:reference]= options[:reference]
|
121
|
+
|
122
|
+
# Links path
|
123
|
+
path[:gbrowse_link]='http://10.247.129.19/cgi-bin/gbrowse/ostra2/?name=Sequence:'
|
124
|
+
|
125
|
+
if File.exists?(path[:gff]) #Delete gff report for creation a new one
|
126
|
+
FileUtils.rm(path[:gff])
|
127
|
+
end
|
128
|
+
|
129
|
+
# BLASTING
|
130
|
+
#######################################################################################
|
131
|
+
|
132
|
+
# Creating blast db
|
133
|
+
#---------------------------------------
|
134
|
+
if !File.exists?(path[:blast_db]+'.psq')||options[:overwrite]
|
135
|
+
puts 'Creating DB '
|
136
|
+
var="makeblastdb -in #{options[:db]} -out #{path[:blast_db]} -dbtype prot -parse_seqids" #Protein
|
137
|
+
system(var)
|
138
|
+
puts 'DB created'
|
139
|
+
end
|
140
|
+
|
141
|
+
# Do blast
|
142
|
+
#---------------------------------------
|
143
|
+
if !File.exists?(path[:blast_output]) ||options[:overwrite]
|
144
|
+
puts 'Start blastx'
|
145
|
+
cmd="blastx -query #{options[:fasta]} -db #{path[:blast_db]} -outfmt '7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq qlen slen stitle' -evalue #{options[:evalue]} -max_target_seqs 1 -out #{path[:blast_output]} -num_threads #{options[:cpu]}"
|
146
|
+
system(cmd)
|
147
|
+
puts "Blastx has finished"
|
148
|
+
end
|
149
|
+
|
150
|
+
#Parsing blast (blast to class)
|
151
|
+
#-------------------------------------------
|
152
|
+
puts 'Parsing blast_prot:'
|
153
|
+
store_blast = ParserBlast.new('contig','nucleotide_match', path[:blast_output])
|
154
|
+
store_prot_blast = store_blast.dataset
|
155
|
+
store_prot_blast.correct_hsp_contigs('s') #Subject
|
156
|
+
store_prot_blast.correct_hsp_contigs('q') #Query
|
157
|
+
puts 'End parsing'
|
158
|
+
|
159
|
+
#Save relationship contig-protein for debbuging
|
160
|
+
#-------------------------------------------------
|
161
|
+
if options[:index]
|
162
|
+
index=File.open(File.join(path[:ouput_files],'contig_index'),'w')
|
163
|
+
store_prot_blast.each_contig {|contig|
|
164
|
+
index.puts contig.name+"\t"+ contig.first_hit.name
|
165
|
+
}
|
166
|
+
index.close
|
167
|
+
end
|
168
|
+
|
169
|
+
# Filtering results
|
170
|
+
#-----------------------------------------------
|
171
|
+
puts 'FILTERING BLAST'
|
172
|
+
store_prot_uni_hsp=Dataset.new(:prot)
|
173
|
+
store_prot_uni_hsp.transfer_contigs(store_prot_blast.filtering)
|
174
|
+
puts "FILTERING BLAST FINISHED\n"
|
175
|
+
|
176
|
+
# EXONERATING
|
177
|
+
###########################################################################################
|
178
|
+
|
179
|
+
# Loading sequences for exonerate fasta
|
180
|
+
#---------------------------------------
|
181
|
+
puts 'Loading sequences for exonerate fasta'
|
182
|
+
seqs = fasta_hash(options[:fasta])
|
183
|
+
store_prot_blast.load_seq(seqs) #Cargar secuencia de los contigs en la clase
|
184
|
+
store_prot_uni_hsp.load_seq(seqs)
|
185
|
+
seqs = nil
|
186
|
+
store_prot_blast.rev_comp #Hacer reversocomplementaria en caso de no estar la secuencia en +, de todos los contig
|
187
|
+
store_prot_uni_hsp.rev_comp
|
188
|
+
|
189
|
+
# Do exonerate
|
190
|
+
#---------------------------------------
|
191
|
+
puts 'START EXONERATE'
|
192
|
+
if !File.exists?(path[:exonerate_output]) ||options[:overwrite]
|
193
|
+
store_prot_blast.fasta(path[:exonerate_input_fasta])
|
194
|
+
cmd="exonerate -q #{options[:db]} -t #{path[:exonerate_input_fasta]} -Q protein -T dna -m protein2genome --percent 1 --showalignment 0 --showvulgar --useaatla 1 > #{path[:exonerate_output]}"
|
195
|
+
system(cmd)
|
196
|
+
end
|
197
|
+
puts 'EXONERATE FINISHED'
|
198
|
+
|
199
|
+
# Parsing exonerate (exonerate to class)
|
200
|
+
#-------------------------------------------
|
201
|
+
store_exonerate = ParserExonerate.new('contig','nucleotide_match', path[:exonerate_output])
|
202
|
+
store_prot_exonerate = store_exonerate.dataset
|
203
|
+
store_prot_exonerate.attrib_recover(store_prot_blast)
|
204
|
+
store_prot_exonerate.score_correction(30) #Correccion intron penalty del exonerate (el programa tiene un bug q impide hacerlo desde el mismo)
|
205
|
+
|
206
|
+
# Filtering results
|
207
|
+
#-----------------------------------------------
|
208
|
+
puts 'FILTERING EXONERATE'
|
209
|
+
store_prot_uni_hsp.transfer_contigs(store_prot_exonerate.filtering)
|
210
|
+
puts "Filtering exonerate finished\n"
|
211
|
+
|
212
|
+
# CLUSTERING
|
213
|
+
#################################################################################
|
214
|
+
store_prot_exonerate.clustering #Clusterizado contigs que han pasado los filtros
|
215
|
+
store_prot_exonerate.info_clusters
|
216
|
+
store_prot_uni_hsp.clustering #Clusterizado contigs uni-hsp
|
217
|
+
|
218
|
+
# Rescue of missed genes and contigs (store_prot_uni_hsp to store_prot_exonerate)
|
219
|
+
#----------------------------------------------------------------------------
|
220
|
+
store_prot_exonerate.missing_cluster_transfer(store_prot_uni_hsp)
|
221
|
+
store_prot_exonerate.missing_contigs_transfer(store_prot_uni_hsp)
|
222
|
+
|
223
|
+
# GENE REBUILD
|
224
|
+
######################################################################################
|
225
|
+
puts 'Gene rebuild start'
|
226
|
+
store_prot_exonerate.load_references(path[:reference])
|
227
|
+
store_prot_exonerate.sort_cont_clust #Ordenar contigs de menor a mayor en base a su primer hsp
|
228
|
+
store_prot_uni_hsp.sort_cont_clust #Ordenar contigs uni-hsp
|
229
|
+
store_prot_exonerate.generate_file_5_prime(path[:prime], path[:fasta_prime])
|
230
|
+
rebuild=Rebuild.new(store_prot_exonerate,store_prot_uni_hsp,path)
|
231
|
+
rebuild.rebuild(options)
|
232
|
+
puts "\nGene rebuild finished"
|
233
|
+
|
data/bin/phytozome_scan
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
ROOT_PATH=File.dirname(__FILE__)
|
4
|
+
$: << File.expand_path(File.join(ROOT_PATH, "../lib/"))
|
5
|
+
$: << File.expand_path(File.join(ROOT_PATH, "../lib/gene_assembler/"))
|
6
|
+
|
7
|
+
|
8
|
+
require 'gff_parser'
|
9
|
+
#Input gff and blast result with outfmt 6
|
10
|
+
|
11
|
+
# Load Arabidopsis gene features in hash (only CDS), file used: NCBI_Chr1.tbl downloaded of TAIR
|
12
|
+
|
13
|
+
gff=Gff_parser.new(ARGV[0]).dataset
|
14
|
+
|
15
|
+
# Load relationship between a pool of proteins and Arabidopsis gene's pool in hash, file used: tblastn report with outfmt 6. Name has two partsm first is gene name and second is the mRNA name. mRNA name i used like gene_name
|
16
|
+
model_hash={}
|
17
|
+
File.open(ARGV[1],'r').each do |line|
|
18
|
+
fields=line.split
|
19
|
+
gene_name=fields[1].split('+')
|
20
|
+
if !model_hash.key?(fields[0])
|
21
|
+
model_hash[fields[0]]=[gene_name[1].gsub('id','')]
|
22
|
+
else
|
23
|
+
model_hash[fields[0]] << gene_name[1].gsub('id','')
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Write crossover between blast and gff
|
29
|
+
max_exones=100
|
30
|
+
genes_exones=Array.new(max_exones,0)
|
31
|
+
ref=File.open('references','w')
|
32
|
+
model_hash.each do |gene_names|
|
33
|
+
ref.print gene_names[0]+"\t"
|
34
|
+
gene_names[1].each do |mRNA_name|
|
35
|
+
mRNA=gff.feature(mRNA_name)
|
36
|
+
if !mRNA.nil?
|
37
|
+
mRNA_structure=mRNA.cds
|
38
|
+
length=mRNA_structure.length-1
|
39
|
+
genes_exones[length]+=1
|
40
|
+
first_exon=mRNA_structure.first
|
41
|
+
origin=nil
|
42
|
+
if mRNA.strand =='-' # Detect reverse structures
|
43
|
+
origin=mRNA_structure.first.last-1
|
44
|
+
mRNA_structure.each do |exon|
|
45
|
+
exon.reverse!
|
46
|
+
end
|
47
|
+
else
|
48
|
+
origin=mRNA_structure.first.first-1
|
49
|
+
end
|
50
|
+
mRNA_structure.each do |exon|
|
51
|
+
ref.print "#{(exon[0]-=origin).abs}-#{(exon[1]-=origin).abs};"
|
52
|
+
end
|
53
|
+
ref.print '|'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
ref.puts
|
57
|
+
end
|
58
|
+
ref.close
|
59
|
+
#puts 'GENES-EXONES'
|
60
|
+
#puts genes_exones.inspect
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'gene_assembler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "gene_assembler"
|
8
|
+
spec.version = GeneAssembler::VERSION
|
9
|
+
spec.authors = ["Pedro Seoane"]
|
10
|
+
spec.email = ["seoanezonjic@hotmail.es"]
|
11
|
+
spec.summary = %q{This gem builds gene models using fragmented genome information.}
|
12
|
+
spec.description = %q{Use this siftware with techniques like genecapture}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_runtime_dependency 'scbi_blast', '>= 0.0.43'
|
22
|
+
spec.add_runtime_dependency 'scbi_fasta', '>= 0.1.9'
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'parser'
|
2
|
+
|
3
|
+
class BlastTypeParser < Parser
|
4
|
+
|
5
|
+
def initialize(contig_type,hit_type,file,all=FALSE)
|
6
|
+
@file=file
|
7
|
+
@dataset=create_dataset
|
8
|
+
@all=all
|
9
|
+
data=parse_file(file)
|
10
|
+
load_dataset(data,contig_type,hit_type)
|
11
|
+
end
|
12
|
+
|
13
|
+
def load_dataset(data,contig_type,hit_type) # Introduce datos del blast en clases contig/hit/hsp
|
14
|
+
data.querys.each do |item|
|
15
|
+
if item.hits.empty? #Descartamos querys q no hayan dado nigun match
|
16
|
+
next
|
17
|
+
end
|
18
|
+
contig=@dataset.add_contig(item.query_def) #query_def -> nombre de la query (nuestro contig)
|
19
|
+
contig.length=item.full_query_length #full_query_length -> longitud de la query
|
20
|
+
contig.type=contig_type
|
21
|
+
populate_extra_atributes(contig,item)
|
22
|
+
|
23
|
+
last_hit_name=''
|
24
|
+
hit=''
|
25
|
+
item.hits.each do |ht| #Clasificacion hits del blast en hits-hsps
|
26
|
+
if ht.subject_id != last_hit_name #Hit
|
27
|
+
hit=contig.add_hit(ht.subject_id, ht.full_subject_length, ht.q_frame, hit_type)
|
28
|
+
end
|
29
|
+
hsp=hit.add_hsp(ht.q_beg+1, ht.q_end+1, ht.s_beg+1, ht.s_end+1, ht.align_len, ht.score, ht.ident, ht.gaps) # +1 xq gema parser blast resta 1 a todo
|
30
|
+
hsp.type='match_part'
|
31
|
+
last_hit_name=ht.subject_id
|
32
|
+
end
|
33
|
+
contig.hits_sort!
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def populate_extra_atributes(contig,item)
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|