bio-ngs 0.3.2.alpha.01
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +39 -0
- data/Gemfile.lock +81 -0
- data/LICENSE.txt +28 -0
- data/README.rdoc +240 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/bin/biongs +35 -0
- data/bio-ngs.gemspec +215 -0
- data/ext/mkrf_conf.rb +87 -0
- data/lib/bio-ngs.rb +54 -0
- data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
- data/lib/bio/appl/ngs/blast.rb +36 -0
- data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
- data/lib/bio/appl/ngs/cufflinks.rb +489 -0
- data/lib/bio/appl/ngs/fastx.rb +170 -0
- data/lib/bio/appl/ngs/samtools.rb +118 -0
- data/lib/bio/appl/ngs/sff_extract.rb +23 -0
- data/lib/bio/appl/ngs/tophat.rb +158 -0
- data/lib/bio/ngs/converter.rb +100 -0
- data/lib/bio/ngs/core_ext.rb +12 -0
- data/lib/bio/ngs/db.rb +66 -0
- data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
- data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
- data/lib/bio/ngs/db/models.rb +1 -0
- data/lib/bio/ngs/db/models/homology.rb +8 -0
- data/lib/bio/ngs/db/models/ontology.rb +16 -0
- data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
- data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
- data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
- data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
- data/lib/bio/ngs/ext/versions.yaml +73 -0
- data/lib/bio/ngs/graphics.rb +189 -0
- data/lib/bio/ngs/homology.rb +102 -0
- data/lib/bio/ngs/ontology.rb +103 -0
- data/lib/bio/ngs/quality.rb +64 -0
- data/lib/bio/ngs/record.rb +50 -0
- data/lib/bio/ngs/task.rb +46 -0
- data/lib/bio/ngs/utils.rb +176 -0
- data/lib/development_tasks.rb +34 -0
- data/lib/enumerable.rb +37 -0
- data/lib/tasks/bwa.thor +126 -0
- data/lib/tasks/convert.thor +454 -0
- data/lib/tasks/history.thor +51 -0
- data/lib/tasks/homology.thor +121 -0
- data/lib/tasks/ontology.thor +93 -0
- data/lib/tasks/project.thor +51 -0
- data/lib/tasks/quality.thor +142 -0
- data/lib/tasks/rna.thor +126 -0
- data/lib/tasks/sff_extract.thor +9 -0
- data/lib/templates/README.tt +43 -0
- data/lib/templates/db.tt +6 -0
- data/lib/wrapper.rb +225 -0
- data/spec/converter_qseq_spec.rb +56 -0
- data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
- data/spec/quality_spec.rb +40 -0
- data/spec/sff_extract_spec.rb +98 -0
- data/spec/spec_helper.rb +55 -0
- data/spec/tophat_spec.rb +99 -0
- data/spec/utils_spec.rb +22 -0
- data/test/conf/test_db.yml +4 -0
- data/test/data/blastoutput.xml +69 -0
- data/test/data/gene-GO.json +1 -0
- data/test/data/goa_uniprot +27 -0
- data/test/data/goslim_goa.obo +1763 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-ngs.rb +17 -0
- data/test/test_db.rb +21 -0
- data/test/test_homology.rb +102 -0
- data/test/test_ngs.rb +21 -0
- data/test/test_ontology.rb +74 -0
- data/test/test_utils.rb +29 -0
- metadata +460 -0
Binary file
|
Binary file
|
@@ -0,0 +1,73 @@
|
|
1
|
+
common:
|
2
|
+
libgtextutils:
|
3
|
+
version: 0.6
|
4
|
+
url: http://hannonlab.cshl.edu/fastx_toolkit/libgtextutils-0.6.tar.bz2
|
5
|
+
basename: libgtextutils-0.6
|
6
|
+
suffix: tar.bz2
|
7
|
+
desc: ""
|
8
|
+
type: source
|
9
|
+
fastx:
|
10
|
+
version: 0.0.13
|
11
|
+
url: http://hannonlab.cshl.edu/fastx_toolkit/fastx_toolkit-0.0.13.tar.bz2
|
12
|
+
basename: fastx_toolkit-0.0.13
|
13
|
+
suffix: tar.bz2
|
14
|
+
desc: "Fastx-toolkit version 0.0.13 requires libgtextutils-0.6 (available here for download). A recent g++ compiler (tested with GNU G++ 4.1.2 and later). The fasta_clipping_histogram tool requires two perl modules: PerlIO::gzip and GD::Graph::bars. The fastx_barcode_splitter tool requires GNU sed. The fastq_quality_boxplot tool requires gnuplot version 4.2 or newer."
|
15
|
+
type: source
|
16
|
+
linux:
|
17
|
+
cufflinks:
|
18
|
+
version: 1.1.0
|
19
|
+
url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.Linux_x86_64.tar.gz
|
20
|
+
basename: cufflinks-1.1.0.Linux_x86_64
|
21
|
+
suffix: tar.gz
|
22
|
+
desc: ""
|
23
|
+
type: binary
|
24
|
+
tophat:
|
25
|
+
version: 1.3.2
|
26
|
+
url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.Linux_x86_64.tar.gz
|
27
|
+
basename: tophat-1.3.2.Linux_x86_64
|
28
|
+
suffix: tar.gz
|
29
|
+
desc: ""
|
30
|
+
type: binary
|
31
|
+
bowtie:
|
32
|
+
version: 0.12.7
|
33
|
+
url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-linux-x86_64.zip/download
|
34
|
+
basename: bowtie-0.12.7-linux-x86_64
|
35
|
+
suffix: zip
|
36
|
+
desc: ""
|
37
|
+
type: binary
|
38
|
+
# sra:
|
39
|
+
# version:
|
40
|
+
# url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-centos_linux64.tar.gz
|
41
|
+
# basename: sratoolkit.2.1.0-centos_linux64
|
42
|
+
# suffix: tar.gz
|
43
|
+
# desc: ""
|
44
|
+
# type: binary
|
45
|
+
osx:
|
46
|
+
cufflinks:
|
47
|
+
version: 1.1.0
|
48
|
+
url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.OSX_x86_64.tar.gz
|
49
|
+
basename: cufflinks-1.1.0.OSX_x86_64
|
50
|
+
suffix: tar.gz
|
51
|
+
desc: ""
|
52
|
+
type: binary
|
53
|
+
tophat:
|
54
|
+
version: 1.3.2
|
55
|
+
url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.OSX_x86_64.tar.gz
|
56
|
+
basename: tophat-1.3.2.OSX_x86_64
|
57
|
+
suffix: tar.gz
|
58
|
+
desc: ""
|
59
|
+
type: binary
|
60
|
+
bowtie:
|
61
|
+
version: 0.12.7
|
62
|
+
url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-macos-10.5-x86_64.zip/download
|
63
|
+
basename: bowtie-0.12.7-macos-10.5-x86_64
|
64
|
+
suffix: zip
|
65
|
+
desc: ""
|
66
|
+
type: binary
|
67
|
+
# sra:
|
68
|
+
# version:
|
69
|
+
# url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-mac64.tar.gz
|
70
|
+
# basename: sratoolkit.2.1.0-mac64
|
71
|
+
# suffix: tar.gz
|
72
|
+
# desc: ""
|
73
|
+
# type: binary
|
@@ -0,0 +1,189 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2011
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'rubyvis'
|
11
|
+
|
12
|
+
module Bio
|
13
|
+
module Ngs
|
14
|
+
class Graphics
|
15
|
+
|
16
|
+
def self.draw_area(data,width,height,out=nil,xlabel,ylabel)
|
17
|
+
point = 0
|
18
|
+
max = data.max + 10
|
19
|
+
data = data.map do |d|
|
20
|
+
point += 1
|
21
|
+
OpenStruct.new({:x=> point, :y=> d})
|
22
|
+
end
|
23
|
+
x = pv.Scale.linear(data, lambda {|d| d.x}).range(0, width)
|
24
|
+
y = pv.Scale.linear(0, max).range(0, height);
|
25
|
+
|
26
|
+
#The root panel
|
27
|
+
vis = pv.Panel.new() do
|
28
|
+
width width
|
29
|
+
height height
|
30
|
+
bottom 20
|
31
|
+
left 50
|
32
|
+
right 10
|
33
|
+
top 5
|
34
|
+
|
35
|
+
# Y-axis and ticks
|
36
|
+
rule do
|
37
|
+
data y.ticks(n_ticks)
|
38
|
+
bottom(y)
|
39
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
40
|
+
label(:anchor=>"left") {
|
41
|
+
puts y.inspect
|
42
|
+
text y.tick_format
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
# X-axis and ticks.
|
47
|
+
rule do
|
48
|
+
data x.ticks()
|
49
|
+
visible {|d| d!=0}
|
50
|
+
left(x)
|
51
|
+
bottom(-5)
|
52
|
+
height(5)
|
53
|
+
label(:anchor=>'bottom') {
|
54
|
+
text(x.tick_format)
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
#/* The area with top line. */
|
59
|
+
area do |a|
|
60
|
+
a.data data
|
61
|
+
a.bottom(1)
|
62
|
+
a.left {|d| x.scale(d.x)}
|
63
|
+
a.height {|d| y.scale(d.y)}
|
64
|
+
a.fill_style("rgb(121,173,210)")
|
65
|
+
a.line(:anchor=>'top') {
|
66
|
+
line_width(3)
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# panel legend and title
|
72
|
+
panel = vis.add(Rubyvis::Panel).
|
73
|
+
width(width-x_padding).
|
74
|
+
height(height)
|
75
|
+
|
76
|
+
panel.anchor('top').add(Rubyvis::Label).
|
77
|
+
font("20px sans-serif").
|
78
|
+
text(title_label)
|
79
|
+
|
80
|
+
panel.anchor('bottom').add(Rubyvis::Label).text(xlabel)
|
81
|
+
panel.anchor('left').add(Rubyvis::Label).
|
82
|
+
text_angle(1.5*Math::PI).
|
83
|
+
text(ylabel)
|
84
|
+
|
85
|
+
|
86
|
+
vis.render();
|
87
|
+
|
88
|
+
if out
|
89
|
+
File.open(out,"w") {|f| f.write(vis.to_svg) }
|
90
|
+
else
|
91
|
+
puts vis.to_svg
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.bubble_chart(fileout,dataset = {}, panel_w = 600, panel_h = 800)
|
97
|
+
colors=Rubyvis::Colors.category10()
|
98
|
+
c=Rubyvis::Colors.category10().by(lambda {|n| n.parent_node})
|
99
|
+
|
100
|
+
vis = Rubyvis::Panel.new
|
101
|
+
.width(panel_w-10)
|
102
|
+
.height(panel_h-10)
|
103
|
+
.bottom(5)
|
104
|
+
.left(5)
|
105
|
+
.right(5)
|
106
|
+
.top(5)
|
107
|
+
|
108
|
+
root=Rubyvis::Dom::Node.new
|
109
|
+
dataset.each_pair do |name,value|
|
110
|
+
child = Rubyvis::Dom::Node.new(value)
|
111
|
+
child.node_name = name
|
112
|
+
root.append_child(child)
|
113
|
+
end
|
114
|
+
root = root.nodes()
|
115
|
+
|
116
|
+
pack=vis.add(pv.Layout.Pack).
|
117
|
+
nodes(root).
|
118
|
+
size(lambda {|n| n.node_value})
|
119
|
+
|
120
|
+
pack.node.add(Rubyvis::Dot).
|
121
|
+
visible( lambda {|n| n.parent_node}).
|
122
|
+
fill_style(lambda {|n|
|
123
|
+
colors.scale(n.parent_node).
|
124
|
+
brighter((n.node_value) / 5.0)
|
125
|
+
}).
|
126
|
+
stroke_style(c)
|
127
|
+
|
128
|
+
pack.node_label.add(Rubyvis::Label).
|
129
|
+
visible( lambda {|n| n.parent_node}).
|
130
|
+
text(lambda {|n| n.node_name})
|
131
|
+
vis.render()
|
132
|
+
File.open(fileout,"w") {|f| f.write vis.to_svg+"\n"}
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
def self.bar_charts(labels, data, fileout, width = 500, height = 300)
|
137
|
+
|
138
|
+
x = pv.Scale.linear(0, data.max).range(0, width)
|
139
|
+
y = pv.Scale.ordinal(pv.range(data.size)).split_banded(0, height, 4/5.0)
|
140
|
+
|
141
|
+
#/* The root panel. */
|
142
|
+
vis = pv.Panel.new()
|
143
|
+
.width(width)
|
144
|
+
.height(height)
|
145
|
+
.bottom(20)
|
146
|
+
.left(100)
|
147
|
+
.right(10)
|
148
|
+
.top(5);
|
149
|
+
|
150
|
+
#/* The bars. */
|
151
|
+
bar = vis.add(pv.Bar)
|
152
|
+
.data(data)
|
153
|
+
.top(lambda {y.scale(self.index)})
|
154
|
+
.height(y.range_band)
|
155
|
+
.left(0)
|
156
|
+
.width(x)
|
157
|
+
|
158
|
+
#/* The value label. */
|
159
|
+
bar.anchor("right").add(pv.Label)
|
160
|
+
.text_style("white")
|
161
|
+
.text(lambda {|d| "%0.1f" % d})
|
162
|
+
|
163
|
+
#/* The variable label. */
|
164
|
+
bar.anchor("left").add(pv.Label)
|
165
|
+
.text_margin(5)
|
166
|
+
.text_align("right")
|
167
|
+
.text(lambda { labels[self.index]});
|
168
|
+
|
169
|
+
#/* X-axis ticks. */
|
170
|
+
vis.add(pv.Rule)
|
171
|
+
.data(x.ticks(5))
|
172
|
+
.left(x)
|
173
|
+
.stroke_style(lambda {|d| d!=0 ? "rgba(255,255,255,.3)" : "#000"})
|
174
|
+
.add(pv.Rule)
|
175
|
+
.bottom(0)
|
176
|
+
.height(5)
|
177
|
+
.stroke_style("#000")
|
178
|
+
.anchor("bottom").add(pv.Label).text(x.tick_format)
|
179
|
+
|
180
|
+
# X-axis Labels
|
181
|
+
vis.anchor("top").add(Rubyvis::Label).text("Number of sequences")
|
182
|
+
|
183
|
+
vis.render();
|
184
|
+
File.open(fileout,"w") {|out| out.write vis.to_svg+"\n"}
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
# Copyright:: Copyright (C) 2011
|
4
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
#
|
8
|
+
|
9
|
+
module Bio
|
10
|
+
module Ngs
|
11
|
+
class Homology
|
12
|
+
|
13
|
+
|
14
|
+
# Method to import a Blast XML output file into a BlastOuput table created according to ActiveRecord model
|
15
|
+
# Params: XML Blast file, YAML file for db connection, optional ActiveRecord models file
|
16
|
+
def self.blast_import(file,yaml_file=nil)
|
17
|
+
db = Bio::Ngs::Db.new :homology,yaml_file
|
18
|
+
inserts = []
|
19
|
+
Bio::Blast::XmlIterator.new(file).to_enum.each do |iter|
|
20
|
+
iter.each do |hit|
|
21
|
+
identity = 0.0
|
22
|
+
positive = 0.0
|
23
|
+
evalue = []
|
24
|
+
length = 0
|
25
|
+
hit.each do |hsp|
|
26
|
+
identity += hsp.identity.to_f
|
27
|
+
positive += hsp.positive.to_f
|
28
|
+
evalue << hsp.evalue
|
29
|
+
length += hsp.align_len
|
30
|
+
end
|
31
|
+
identity = (identity / length)*100
|
32
|
+
positive = (positive / length)*100
|
33
|
+
evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
|
34
|
+
inserts << [iter.query_def,hit.hit_id.split('|')[1],hit.hit_def,evalue,identity,positive]
|
35
|
+
if inserts.size == 1000
|
36
|
+
db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts)
|
37
|
+
inserts = []
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts) if inserts.size > 0
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.blast2text(file_in,file_out)
|
45
|
+
out = File.open(file_out,"w")
|
46
|
+
out.write("Query ID\tTarget ID\tTarget Description\tE-value\tIdentity\tPositive\n")
|
47
|
+
Bio::Blast::XmlIterator.new(file_in).to_enum.each do |iter|
|
48
|
+
iter.each do |hit|
|
49
|
+
identity = 0.0
|
50
|
+
positive = 0.0
|
51
|
+
evalue = []
|
52
|
+
length = 0
|
53
|
+
hit.each do |hsp|
|
54
|
+
identity += hsp.identity.to_f
|
55
|
+
positive += hsp.positive.to_f
|
56
|
+
evalue << hsp.evalue
|
57
|
+
length += hsp.align_len
|
58
|
+
end
|
59
|
+
identity = (identity / length)*100
|
60
|
+
positive = (positive / length)*100
|
61
|
+
evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
|
62
|
+
out.write([iter.query_def,hit.hit_id,hit.hit_def,evalue,identity,positive].join("\t")+"\n")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
out.close
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
# Method to import a GO Annotation file into GoAnnotation table created according to ActiveRecord model
|
70
|
+
# Params: GOA file, YAML file for db connection (optional)
|
71
|
+
def self.goa_import(file,yaml_file=nil)
|
72
|
+
db = Bio::Ngs::Db.new :homology, yaml_file
|
73
|
+
inserts = []
|
74
|
+
File.open(file).each do |line|
|
75
|
+
next if line.start_with? "!"
|
76
|
+
line.chomp!
|
77
|
+
inserts << line.split("\t")[0..14]
|
78
|
+
if inserts.size == 1000
|
79
|
+
db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts)
|
80
|
+
inserts = []
|
81
|
+
end
|
82
|
+
end
|
83
|
+
db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts) if inserts.size > 0
|
84
|
+
end
|
85
|
+
|
86
|
+
# Method to export the associations among genes and GO and store them into a JSON file that can be imported into the Ontology db
|
87
|
+
# Params: file to write JSON data
|
88
|
+
def self.go_annotation_to_json(file_out,library=nil,yaml_file=nil)
|
89
|
+
db = Bio::Ngs::Db.new :homology, yaml_file
|
90
|
+
ontologies = []
|
91
|
+
BlastOutput.find(:all).each do |result|
|
92
|
+
ontology = Bio::Ngs::Ontology.new result.query_id
|
93
|
+
ontology.go = result.go_annotations.map {|goa| goa.go_id}
|
94
|
+
ontology.library = library
|
95
|
+
ontologies << ontology
|
96
|
+
end
|
97
|
+
File.open(file_out,"w") {|f| f.write ontologies.to_json}
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
# Copyright:: Copyright (C) 2011
|
4
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
#
|
8
|
+
|
9
|
+
module Bio
|
10
|
+
module Ngs
|
11
|
+
class Ontology
|
12
|
+
|
13
|
+
# Method to import a GO OBO file into Go table created according to ActiveRecord model
|
14
|
+
# Params: GO OBO file, YAML file for db connection
|
15
|
+
def self.go_import(file,yaml_file=nil)
|
16
|
+
db = Bio::Ngs::Db.new :ontology,yaml_file
|
17
|
+
inserts = []
|
18
|
+
file = File.open(file)
|
19
|
+
file.each do |line|
|
20
|
+
if line.start_with? "[Term]"
|
21
|
+
block = file.gets("\n\n")
|
22
|
+
is_a = []
|
23
|
+
data = []
|
24
|
+
block.split("\n").each do |elem|
|
25
|
+
if elem.start_with? "id: "
|
26
|
+
data << elem.gsub("id: ","")
|
27
|
+
elsif elem.start_with? "name: "
|
28
|
+
data << elem.gsub("name: ","")
|
29
|
+
elsif elem.start_with? "is_a"
|
30
|
+
is_a << elem.gsub("is_a: ","").split("!").first
|
31
|
+
elsif elem.start_with? "namespace: "
|
32
|
+
data << elem.gsub("namespace: ","")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
data << is_a.join(" ")
|
36
|
+
inserts << data
|
37
|
+
if inserts.size == 1000
|
38
|
+
db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts)
|
39
|
+
inserts = []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts) if inserts.size > 0
|
44
|
+
end
|
45
|
+
|
46
|
+
# Method to lood the Gene-GO associations from a JSON file into the Ontology db
|
47
|
+
# Params: JSON file name, YAML file for db connection (optional)
|
48
|
+
def self.load_go_genes(file,yaml_file=nil)
|
49
|
+
db = Bio::Ngs::Db.new :ontology, yaml_file
|
50
|
+
list = JSON.load File.read(file)
|
51
|
+
ontologies = Bio::Ngs::OntologyCollection.new
|
52
|
+
list.each_with_index do |gene,index|
|
53
|
+
ontologies << Bio::Ngs::Ontology.new(gene["gene_id"],gene["go"],gene["library"])
|
54
|
+
end
|
55
|
+
ontologies.to_db(yaml_file)
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
attr_accessor :gene_id, :go, :library
|
60
|
+
# Constructor for Bio::Ngs::Ontology instances
|
61
|
+
def initialize(gene_id,go=[],library=nil)
|
62
|
+
@gene_id = gene_id
|
63
|
+
@go = go
|
64
|
+
@library = library
|
65
|
+
end
|
66
|
+
|
67
|
+
# Method to store a single Bio::Ngs::Ontology object into the Ontology db
|
68
|
+
def to_db(yaml_file=nil)
|
69
|
+
raise RuntimeError,"You must initialize the Ontolgy db with biongs ontology:db:init" if Go.count == 0
|
70
|
+
db = Bio::Ngs::Db.new :ontology,yaml_file
|
71
|
+
g = Gene.create(:gene_id => @gene_id, :library => @library)
|
72
|
+
Go.where({:go_id => @go}).all.each do |go|
|
73
|
+
g.gene_gos.create(:go_id => go.id)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
# Class to handle collection of Bio::Ngs::Ontology objects.
|
81
|
+
# It provides a method to store all the gene-GO associations into the Ontology db
|
82
|
+
class OntologyCollection < Array
|
83
|
+
|
84
|
+
def to_db(yaml_file=nil)
|
85
|
+
db = Bio::Ngs::Db.new :ontology, yaml_file
|
86
|
+
genes = []
|
87
|
+
ontologies = []
|
88
|
+
go = {}
|
89
|
+
Go.find_by_sql("SELECT id, go_id FROM go").each {|g| go[g.go_id] = g.id}
|
90
|
+
self.each_with_index do |gene,index|
|
91
|
+
raise ArgumentError "OntologyCollection can store only Bio::Ngs::Ontology objects!" if gene.class != Bio::Ngs::Ontology
|
92
|
+
genes << [index+1,gene.gene_id,gene.library]
|
93
|
+
gene.go.each {|o| ontologies << [index+1,go[o]] if go[o]}
|
94
|
+
end
|
95
|
+
db.insert_many(:genes,"INSERT INTO genes(id,gene_id,library) VALUES(?,?,?)",genes)
|
96
|
+
db.insert_many(:gene_gos,"INSERT INTO gene_gos(gene_id,go_id) VALUES(?,?)",ontologies)
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|