bio-ngs 0.3.2.alpha.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +39 -0
- data/Gemfile.lock +81 -0
- data/LICENSE.txt +28 -0
- data/README.rdoc +240 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/bin/biongs +35 -0
- data/bio-ngs.gemspec +215 -0
- data/ext/mkrf_conf.rb +87 -0
- data/lib/bio-ngs.rb +54 -0
- data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
- data/lib/bio/appl/ngs/blast.rb +36 -0
- data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
- data/lib/bio/appl/ngs/cufflinks.rb +489 -0
- data/lib/bio/appl/ngs/fastx.rb +170 -0
- data/lib/bio/appl/ngs/samtools.rb +118 -0
- data/lib/bio/appl/ngs/sff_extract.rb +23 -0
- data/lib/bio/appl/ngs/tophat.rb +158 -0
- data/lib/bio/ngs/converter.rb +100 -0
- data/lib/bio/ngs/core_ext.rb +12 -0
- data/lib/bio/ngs/db.rb +66 -0
- data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
- data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
- data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
- data/lib/bio/ngs/db/models.rb +1 -0
- data/lib/bio/ngs/db/models/homology.rb +8 -0
- data/lib/bio/ngs/db/models/ontology.rb +16 -0
- data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
- data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
- data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
- data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
- data/lib/bio/ngs/ext/versions.yaml +73 -0
- data/lib/bio/ngs/graphics.rb +189 -0
- data/lib/bio/ngs/homology.rb +102 -0
- data/lib/bio/ngs/ontology.rb +103 -0
- data/lib/bio/ngs/quality.rb +64 -0
- data/lib/bio/ngs/record.rb +50 -0
- data/lib/bio/ngs/task.rb +46 -0
- data/lib/bio/ngs/utils.rb +176 -0
- data/lib/development_tasks.rb +34 -0
- data/lib/enumerable.rb +37 -0
- data/lib/tasks/bwa.thor +126 -0
- data/lib/tasks/convert.thor +454 -0
- data/lib/tasks/history.thor +51 -0
- data/lib/tasks/homology.thor +121 -0
- data/lib/tasks/ontology.thor +93 -0
- data/lib/tasks/project.thor +51 -0
- data/lib/tasks/quality.thor +142 -0
- data/lib/tasks/rna.thor +126 -0
- data/lib/tasks/sff_extract.thor +9 -0
- data/lib/templates/README.tt +43 -0
- data/lib/templates/db.tt +6 -0
- data/lib/wrapper.rb +225 -0
- data/spec/converter_qseq_spec.rb +56 -0
- data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
- data/spec/quality_spec.rb +40 -0
- data/spec/sff_extract_spec.rb +98 -0
- data/spec/spec_helper.rb +55 -0
- data/spec/tophat_spec.rb +99 -0
- data/spec/utils_spec.rb +22 -0
- data/test/conf/test_db.yml +4 -0
- data/test/data/blastoutput.xml +69 -0
- data/test/data/gene-GO.json +1 -0
- data/test/data/goa_uniprot +27 -0
- data/test/data/goslim_goa.obo +1763 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-ngs.rb +17 -0
- data/test/test_db.rb +21 -0
- data/test/test_homology.rb +102 -0
- data/test/test_ngs.rb +21 -0
- data/test/test_ontology.rb +74 -0
- data/test/test_utils.rb +29 -0
- metadata +460 -0
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
common:
|
|
2
|
+
libgtextutils:
|
|
3
|
+
version: 0.6
|
|
4
|
+
url: http://hannonlab.cshl.edu/fastx_toolkit/libgtextutils-0.6.tar.bz2
|
|
5
|
+
basename: libgtextutils-0.6
|
|
6
|
+
suffix: tar.bz2
|
|
7
|
+
desc: ""
|
|
8
|
+
type: source
|
|
9
|
+
fastx:
|
|
10
|
+
version: 0.0.13
|
|
11
|
+
url: http://hannonlab.cshl.edu/fastx_toolkit/fastx_toolkit-0.0.13.tar.bz2
|
|
12
|
+
basename: fastx_toolkit-0.0.13
|
|
13
|
+
suffix: tar.bz2
|
|
14
|
+
desc: "Fastx-toolkit version 0.0.13 requires libgtextutils-0.6 (available here for download). A recent g++ compiler (tested with GNU G++ 4.1.2 and later). The fasta_clipping_histogram tool requires two perl modules: PerlIO::gzip and GD::Graph::bars. The fastx_barcode_splitter tool requires GNU sed. The fastq_quality_boxplot tool requires gnuplot version 4.2 or newer."
|
|
15
|
+
type: source
|
|
16
|
+
linux:
|
|
17
|
+
cufflinks:
|
|
18
|
+
version: 1.1.0
|
|
19
|
+
url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.Linux_x86_64.tar.gz
|
|
20
|
+
basename: cufflinks-1.1.0.Linux_x86_64
|
|
21
|
+
suffix: tar.gz
|
|
22
|
+
desc: ""
|
|
23
|
+
type: binary
|
|
24
|
+
tophat:
|
|
25
|
+
version: 1.3.2
|
|
26
|
+
url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.Linux_x86_64.tar.gz
|
|
27
|
+
basename: tophat-1.3.2.Linux_x86_64
|
|
28
|
+
suffix: tar.gz
|
|
29
|
+
desc: ""
|
|
30
|
+
type: binary
|
|
31
|
+
bowtie:
|
|
32
|
+
version: 0.12.7
|
|
33
|
+
url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-linux-x86_64.zip/download
|
|
34
|
+
basename: bowtie-0.12.7-linux-x86_64
|
|
35
|
+
suffix: zip
|
|
36
|
+
desc: ""
|
|
37
|
+
type: binary
|
|
38
|
+
# sra:
|
|
39
|
+
# version:
|
|
40
|
+
# url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-centos_linux64.tar.gz
|
|
41
|
+
# basename: sratoolkit.2.1.0-centos_linux64
|
|
42
|
+
# suffix: tar.gz
|
|
43
|
+
# desc: ""
|
|
44
|
+
# type: binary
|
|
45
|
+
osx:
|
|
46
|
+
cufflinks:
|
|
47
|
+
version: 1.1.0
|
|
48
|
+
url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.OSX_x86_64.tar.gz
|
|
49
|
+
basename: cufflinks-1.1.0.OSX_x86_64
|
|
50
|
+
suffix: tar.gz
|
|
51
|
+
desc: ""
|
|
52
|
+
type: binary
|
|
53
|
+
tophat:
|
|
54
|
+
version: 1.3.2
|
|
55
|
+
url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.OSX_x86_64.tar.gz
|
|
56
|
+
basename: tophat-1.3.2.OSX_x86_64
|
|
57
|
+
suffix: tar.gz
|
|
58
|
+
desc: ""
|
|
59
|
+
type: binary
|
|
60
|
+
bowtie:
|
|
61
|
+
version: 0.12.7
|
|
62
|
+
url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-macos-10.5-x86_64.zip/download
|
|
63
|
+
basename: bowtie-0.12.7-macos-10.5-x86_64
|
|
64
|
+
suffix: zip
|
|
65
|
+
desc: ""
|
|
66
|
+
type: binary
|
|
67
|
+
# sra:
|
|
68
|
+
# version:
|
|
69
|
+
# url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-mac64.tar.gz
|
|
70
|
+
# basename: sratoolkit.2.1.0-mac64
|
|
71
|
+
# suffix: tar.gz
|
|
72
|
+
# desc: ""
|
|
73
|
+
# type: binary
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2011
|
|
5
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
require 'rubyvis'
|
|
11
|
+
|
|
12
|
+
module Bio
|
|
13
|
+
module Ngs
|
|
14
|
+
class Graphics
|
|
15
|
+
|
|
16
|
+
def self.draw_area(data,width,height,out=nil,xlabel,ylabel)
|
|
17
|
+
point = 0
|
|
18
|
+
max = data.max + 10
|
|
19
|
+
data = data.map do |d|
|
|
20
|
+
point += 1
|
|
21
|
+
OpenStruct.new({:x=> point, :y=> d})
|
|
22
|
+
end
|
|
23
|
+
x = pv.Scale.linear(data, lambda {|d| d.x}).range(0, width)
|
|
24
|
+
y = pv.Scale.linear(0, max).range(0, height);
|
|
25
|
+
|
|
26
|
+
#The root panel
|
|
27
|
+
vis = pv.Panel.new() do
|
|
28
|
+
width width
|
|
29
|
+
height height
|
|
30
|
+
bottom 20
|
|
31
|
+
left 50
|
|
32
|
+
right 10
|
|
33
|
+
top 5
|
|
34
|
+
|
|
35
|
+
# Y-axis and ticks
|
|
36
|
+
rule do
|
|
37
|
+
data y.ticks(n_ticks)
|
|
38
|
+
bottom(y)
|
|
39
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
|
40
|
+
label(:anchor=>"left") {
|
|
41
|
+
puts y.inspect
|
|
42
|
+
text y.tick_format
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# X-axis and ticks.
|
|
47
|
+
rule do
|
|
48
|
+
data x.ticks()
|
|
49
|
+
visible {|d| d!=0}
|
|
50
|
+
left(x)
|
|
51
|
+
bottom(-5)
|
|
52
|
+
height(5)
|
|
53
|
+
label(:anchor=>'bottom') {
|
|
54
|
+
text(x.tick_format)
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
#/* The area with top line. */
|
|
59
|
+
area do |a|
|
|
60
|
+
a.data data
|
|
61
|
+
a.bottom(1)
|
|
62
|
+
a.left {|d| x.scale(d.x)}
|
|
63
|
+
a.height {|d| y.scale(d.y)}
|
|
64
|
+
a.fill_style("rgb(121,173,210)")
|
|
65
|
+
a.line(:anchor=>'top') {
|
|
66
|
+
line_width(3)
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# panel legend and title
|
|
72
|
+
panel = vis.add(Rubyvis::Panel).
|
|
73
|
+
width(width-x_padding).
|
|
74
|
+
height(height)
|
|
75
|
+
|
|
76
|
+
panel.anchor('top').add(Rubyvis::Label).
|
|
77
|
+
font("20px sans-serif").
|
|
78
|
+
text(title_label)
|
|
79
|
+
|
|
80
|
+
panel.anchor('bottom').add(Rubyvis::Label).text(xlabel)
|
|
81
|
+
panel.anchor('left').add(Rubyvis::Label).
|
|
82
|
+
text_angle(1.5*Math::PI).
|
|
83
|
+
text(ylabel)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
vis.render();
|
|
87
|
+
|
|
88
|
+
if out
|
|
89
|
+
File.open(out,"w") {|f| f.write(vis.to_svg) }
|
|
90
|
+
else
|
|
91
|
+
puts vis.to_svg
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def self.bubble_chart(fileout,dataset = {}, panel_w = 600, panel_h = 800)
|
|
97
|
+
colors=Rubyvis::Colors.category10()
|
|
98
|
+
c=Rubyvis::Colors.category10().by(lambda {|n| n.parent_node})
|
|
99
|
+
|
|
100
|
+
vis = Rubyvis::Panel.new
|
|
101
|
+
.width(panel_w-10)
|
|
102
|
+
.height(panel_h-10)
|
|
103
|
+
.bottom(5)
|
|
104
|
+
.left(5)
|
|
105
|
+
.right(5)
|
|
106
|
+
.top(5)
|
|
107
|
+
|
|
108
|
+
root=Rubyvis::Dom::Node.new
|
|
109
|
+
dataset.each_pair do |name,value|
|
|
110
|
+
child = Rubyvis::Dom::Node.new(value)
|
|
111
|
+
child.node_name = name
|
|
112
|
+
root.append_child(child)
|
|
113
|
+
end
|
|
114
|
+
root = root.nodes()
|
|
115
|
+
|
|
116
|
+
pack=vis.add(pv.Layout.Pack).
|
|
117
|
+
nodes(root).
|
|
118
|
+
size(lambda {|n| n.node_value})
|
|
119
|
+
|
|
120
|
+
pack.node.add(Rubyvis::Dot).
|
|
121
|
+
visible( lambda {|n| n.parent_node}).
|
|
122
|
+
fill_style(lambda {|n|
|
|
123
|
+
colors.scale(n.parent_node).
|
|
124
|
+
brighter((n.node_value) / 5.0)
|
|
125
|
+
}).
|
|
126
|
+
stroke_style(c)
|
|
127
|
+
|
|
128
|
+
pack.node_label.add(Rubyvis::Label).
|
|
129
|
+
visible( lambda {|n| n.parent_node}).
|
|
130
|
+
text(lambda {|n| n.node_name})
|
|
131
|
+
vis.render()
|
|
132
|
+
File.open(fileout,"w") {|f| f.write vis.to_svg+"\n"}
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def self.bar_charts(labels, data, fileout, width = 500, height = 300)
|
|
137
|
+
|
|
138
|
+
x = pv.Scale.linear(0, data.max).range(0, width)
|
|
139
|
+
y = pv.Scale.ordinal(pv.range(data.size)).split_banded(0, height, 4/5.0)
|
|
140
|
+
|
|
141
|
+
#/* The root panel. */
|
|
142
|
+
vis = pv.Panel.new()
|
|
143
|
+
.width(width)
|
|
144
|
+
.height(height)
|
|
145
|
+
.bottom(20)
|
|
146
|
+
.left(100)
|
|
147
|
+
.right(10)
|
|
148
|
+
.top(5);
|
|
149
|
+
|
|
150
|
+
#/* The bars. */
|
|
151
|
+
bar = vis.add(pv.Bar)
|
|
152
|
+
.data(data)
|
|
153
|
+
.top(lambda {y.scale(self.index)})
|
|
154
|
+
.height(y.range_band)
|
|
155
|
+
.left(0)
|
|
156
|
+
.width(x)
|
|
157
|
+
|
|
158
|
+
#/* The value label. */
|
|
159
|
+
bar.anchor("right").add(pv.Label)
|
|
160
|
+
.text_style("white")
|
|
161
|
+
.text(lambda {|d| "%0.1f" % d})
|
|
162
|
+
|
|
163
|
+
#/* The variable label. */
|
|
164
|
+
bar.anchor("left").add(pv.Label)
|
|
165
|
+
.text_margin(5)
|
|
166
|
+
.text_align("right")
|
|
167
|
+
.text(lambda { labels[self.index]});
|
|
168
|
+
|
|
169
|
+
#/* X-axis ticks. */
|
|
170
|
+
vis.add(pv.Rule)
|
|
171
|
+
.data(x.ticks(5))
|
|
172
|
+
.left(x)
|
|
173
|
+
.stroke_style(lambda {|d| d!=0 ? "rgba(255,255,255,.3)" : "#000"})
|
|
174
|
+
.add(pv.Rule)
|
|
175
|
+
.bottom(0)
|
|
176
|
+
.height(5)
|
|
177
|
+
.stroke_style("#000")
|
|
178
|
+
.anchor("bottom").add(pv.Label).text(x.tick_format)
|
|
179
|
+
|
|
180
|
+
# X-axis Labels
|
|
181
|
+
vis.anchor("top").add(Rubyvis::Label).text("Number of sequences")
|
|
182
|
+
|
|
183
|
+
vis.render();
|
|
184
|
+
File.open(fileout,"w") {|out| out.write vis.to_svg+"\n"}
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
# Copyright:: Copyright (C) 2011
|
|
4
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
module Bio
|
|
10
|
+
module Ngs
|
|
11
|
+
class Homology
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Method to import a Blast XML output file into a BlastOuput table created according to ActiveRecord model
|
|
15
|
+
# Params: XML Blast file, YAML file for db connection, optional ActiveRecord models file
|
|
16
|
+
def self.blast_import(file,yaml_file=nil)
|
|
17
|
+
db = Bio::Ngs::Db.new :homology,yaml_file
|
|
18
|
+
inserts = []
|
|
19
|
+
Bio::Blast::XmlIterator.new(file).to_enum.each do |iter|
|
|
20
|
+
iter.each do |hit|
|
|
21
|
+
identity = 0.0
|
|
22
|
+
positive = 0.0
|
|
23
|
+
evalue = []
|
|
24
|
+
length = 0
|
|
25
|
+
hit.each do |hsp|
|
|
26
|
+
identity += hsp.identity.to_f
|
|
27
|
+
positive += hsp.positive.to_f
|
|
28
|
+
evalue << hsp.evalue
|
|
29
|
+
length += hsp.align_len
|
|
30
|
+
end
|
|
31
|
+
identity = (identity / length)*100
|
|
32
|
+
positive = (positive / length)*100
|
|
33
|
+
evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
|
|
34
|
+
inserts << [iter.query_def,hit.hit_id.split('|')[1],hit.hit_def,evalue,identity,positive]
|
|
35
|
+
if inserts.size == 1000
|
|
36
|
+
db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts)
|
|
37
|
+
inserts = []
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts) if inserts.size > 0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.blast2text(file_in,file_out)
|
|
45
|
+
out = File.open(file_out,"w")
|
|
46
|
+
out.write("Query ID\tTarget ID\tTarget Description\tE-value\tIdentity\tPositive\n")
|
|
47
|
+
Bio::Blast::XmlIterator.new(file_in).to_enum.each do |iter|
|
|
48
|
+
iter.each do |hit|
|
|
49
|
+
identity = 0.0
|
|
50
|
+
positive = 0.0
|
|
51
|
+
evalue = []
|
|
52
|
+
length = 0
|
|
53
|
+
hit.each do |hsp|
|
|
54
|
+
identity += hsp.identity.to_f
|
|
55
|
+
positive += hsp.positive.to_f
|
|
56
|
+
evalue << hsp.evalue
|
|
57
|
+
length += hsp.align_len
|
|
58
|
+
end
|
|
59
|
+
identity = (identity / length)*100
|
|
60
|
+
positive = (positive / length)*100
|
|
61
|
+
evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
|
|
62
|
+
out.write([iter.query_def,hit.hit_id,hit.hit_def,evalue,identity,positive].join("\t")+"\n")
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
out.close
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Method to import a GO Annotation file into GoAnnotation table created according to ActiveRecord model
|
|
70
|
+
# Params: GOA file, YAML file for db connection (optional)
|
|
71
|
+
def self.goa_import(file,yaml_file=nil)
|
|
72
|
+
db = Bio::Ngs::Db.new :homology, yaml_file
|
|
73
|
+
inserts = []
|
|
74
|
+
File.open(file).each do |line|
|
|
75
|
+
next if line.start_with? "!"
|
|
76
|
+
line.chomp!
|
|
77
|
+
inserts << line.split("\t")[0..14]
|
|
78
|
+
if inserts.size == 1000
|
|
79
|
+
db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts)
|
|
80
|
+
inserts = []
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts) if inserts.size > 0
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Method to export the associations among genes and GO and store them into a JSON file that can be imported into the Ontology db
|
|
87
|
+
# Params: file to write JSON data
|
|
88
|
+
def self.go_annotation_to_json(file_out,library=nil,yaml_file=nil)
|
|
89
|
+
db = Bio::Ngs::Db.new :homology, yaml_file
|
|
90
|
+
ontologies = []
|
|
91
|
+
BlastOutput.find(:all).each do |result|
|
|
92
|
+
ontology = Bio::Ngs::Ontology.new result.query_id
|
|
93
|
+
ontology.go = result.go_annotations.map {|goa| goa.go_id}
|
|
94
|
+
ontology.library = library
|
|
95
|
+
ontologies << ontology
|
|
96
|
+
end
|
|
97
|
+
File.open(file_out,"w") {|f| f.write ontologies.to_json}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#
|
|
2
|
+
#
|
|
3
|
+
# Copyright:: Copyright (C) 2011
|
|
4
|
+
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
module Bio
|
|
10
|
+
module Ngs
|
|
11
|
+
class Ontology
|
|
12
|
+
|
|
13
|
+
# Method to import a GO OBO file into Go table created according to ActiveRecord model
|
|
14
|
+
# Params: GO OBO file, YAML file for db connection
|
|
15
|
+
def self.go_import(file,yaml_file=nil)
|
|
16
|
+
db = Bio::Ngs::Db.new :ontology,yaml_file
|
|
17
|
+
inserts = []
|
|
18
|
+
file = File.open(file)
|
|
19
|
+
file.each do |line|
|
|
20
|
+
if line.start_with? "[Term]"
|
|
21
|
+
block = file.gets("\n\n")
|
|
22
|
+
is_a = []
|
|
23
|
+
data = []
|
|
24
|
+
block.split("\n").each do |elem|
|
|
25
|
+
if elem.start_with? "id: "
|
|
26
|
+
data << elem.gsub("id: ","")
|
|
27
|
+
elsif elem.start_with? "name: "
|
|
28
|
+
data << elem.gsub("name: ","")
|
|
29
|
+
elsif elem.start_with? "is_a"
|
|
30
|
+
is_a << elem.gsub("is_a: ","").split("!").first
|
|
31
|
+
elsif elem.start_with? "namespace: "
|
|
32
|
+
data << elem.gsub("namespace: ","")
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
data << is_a.join(" ")
|
|
36
|
+
inserts << data
|
|
37
|
+
if inserts.size == 1000
|
|
38
|
+
db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts)
|
|
39
|
+
inserts = []
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts) if inserts.size > 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Method to lood the Gene-GO associations from a JSON file into the Ontology db
|
|
47
|
+
# Params: JSON file name, YAML file for db connection (optional)
|
|
48
|
+
def self.load_go_genes(file,yaml_file=nil)
|
|
49
|
+
db = Bio::Ngs::Db.new :ontology, yaml_file
|
|
50
|
+
list = JSON.load File.read(file)
|
|
51
|
+
ontologies = Bio::Ngs::OntologyCollection.new
|
|
52
|
+
list.each_with_index do |gene,index|
|
|
53
|
+
ontologies << Bio::Ngs::Ontology.new(gene["gene_id"],gene["go"],gene["library"])
|
|
54
|
+
end
|
|
55
|
+
ontologies.to_db(yaml_file)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
attr_accessor :gene_id, :go, :library
|
|
60
|
+
# Constructor for Bio::Ngs::Ontology instances
|
|
61
|
+
def initialize(gene_id,go=[],library=nil)
|
|
62
|
+
@gene_id = gene_id
|
|
63
|
+
@go = go
|
|
64
|
+
@library = library
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Method to store a single Bio::Ngs::Ontology object into the Ontology db
|
|
68
|
+
def to_db(yaml_file=nil)
|
|
69
|
+
raise RuntimeError,"You must initialize the Ontolgy db with biongs ontology:db:init" if Go.count == 0
|
|
70
|
+
db = Bio::Ngs::Db.new :ontology,yaml_file
|
|
71
|
+
g = Gene.create(:gene_id => @gene_id, :library => @library)
|
|
72
|
+
Go.where({:go_id => @go}).all.each do |go|
|
|
73
|
+
g.gene_gos.create(:go_id => go.id)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Class to handle collection of Bio::Ngs::Ontology objects.
|
|
81
|
+
# It provides a method to store all the gene-GO associations into the Ontology db
|
|
82
|
+
class OntologyCollection < Array
|
|
83
|
+
|
|
84
|
+
def to_db(yaml_file=nil)
|
|
85
|
+
db = Bio::Ngs::Db.new :ontology, yaml_file
|
|
86
|
+
genes = []
|
|
87
|
+
ontologies = []
|
|
88
|
+
go = {}
|
|
89
|
+
Go.find_by_sql("SELECT id, go_id FROM go").each {|g| go[g.go_id] = g.id}
|
|
90
|
+
self.each_with_index do |gene,index|
|
|
91
|
+
raise ArgumentError "OntologyCollection can store only Bio::Ngs::Ontology objects!" if gene.class != Bio::Ngs::Ontology
|
|
92
|
+
genes << [index+1,gene.gene_id,gene.library]
|
|
93
|
+
gene.go.each {|o| ontologies << [index+1,go[o]] if go[o]}
|
|
94
|
+
end
|
|
95
|
+
db.insert_many(:genes,"INSERT INTO genes(id,gene_id,library) VALUES(?,?,?)",genes)
|
|
96
|
+
db.insert_many(:gene_gos,"INSERT INTO gene_gos(gene_id,go_id) VALUES(?,?)",ontologies)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
end
|
|
103
|
+
end
|