bio-ngs 0.3.2.alpha.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/Gemfile +39 -0
  3. data/Gemfile.lock +81 -0
  4. data/LICENSE.txt +28 -0
  5. data/README.rdoc +240 -0
  6. data/Rakefile +60 -0
  7. data/VERSION +1 -0
  8. data/bin/biongs +35 -0
  9. data/bio-ngs.gemspec +215 -0
  10. data/ext/mkrf_conf.rb +87 -0
  11. data/lib/bio-ngs.rb +54 -0
  12. data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
  13. data/lib/bio/appl/ngs/blast.rb +36 -0
  14. data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
  15. data/lib/bio/appl/ngs/cufflinks.rb +489 -0
  16. data/lib/bio/appl/ngs/fastx.rb +170 -0
  17. data/lib/bio/appl/ngs/samtools.rb +118 -0
  18. data/lib/bio/appl/ngs/sff_extract.rb +23 -0
  19. data/lib/bio/appl/ngs/tophat.rb +158 -0
  20. data/lib/bio/ngs/converter.rb +100 -0
  21. data/lib/bio/ngs/core_ext.rb +12 -0
  22. data/lib/bio/ngs/db.rb +66 -0
  23. data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
  24. data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
  25. data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
  26. data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
  27. data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
  28. data/lib/bio/ngs/db/models.rb +1 -0
  29. data/lib/bio/ngs/db/models/homology.rb +8 -0
  30. data/lib/bio/ngs/db/models/ontology.rb +16 -0
  31. data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
  32. data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
  33. data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
  34. data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
  35. data/lib/bio/ngs/ext/versions.yaml +73 -0
  36. data/lib/bio/ngs/graphics.rb +189 -0
  37. data/lib/bio/ngs/homology.rb +102 -0
  38. data/lib/bio/ngs/ontology.rb +103 -0
  39. data/lib/bio/ngs/quality.rb +64 -0
  40. data/lib/bio/ngs/record.rb +50 -0
  41. data/lib/bio/ngs/task.rb +46 -0
  42. data/lib/bio/ngs/utils.rb +176 -0
  43. data/lib/development_tasks.rb +34 -0
  44. data/lib/enumerable.rb +37 -0
  45. data/lib/tasks/bwa.thor +126 -0
  46. data/lib/tasks/convert.thor +454 -0
  47. data/lib/tasks/history.thor +51 -0
  48. data/lib/tasks/homology.thor +121 -0
  49. data/lib/tasks/ontology.thor +93 -0
  50. data/lib/tasks/project.thor +51 -0
  51. data/lib/tasks/quality.thor +142 -0
  52. data/lib/tasks/rna.thor +126 -0
  53. data/lib/tasks/sff_extract.thor +9 -0
  54. data/lib/templates/README.tt +43 -0
  55. data/lib/templates/db.tt +6 -0
  56. data/lib/wrapper.rb +225 -0
  57. data/spec/converter_qseq_spec.rb +56 -0
  58. data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
  59. data/spec/quality_spec.rb +40 -0
  60. data/spec/sff_extract_spec.rb +98 -0
  61. data/spec/spec_helper.rb +55 -0
  62. data/spec/tophat_spec.rb +99 -0
  63. data/spec/utils_spec.rb +22 -0
  64. data/test/conf/test_db.yml +4 -0
  65. data/test/data/blastoutput.xml +69 -0
  66. data/test/data/gene-GO.json +1 -0
  67. data/test/data/goa_uniprot +27 -0
  68. data/test/data/goslim_goa.obo +1763 -0
  69. data/test/helper.rb +18 -0
  70. data/test/test_bio-ngs.rb +17 -0
  71. data/test/test_db.rb +21 -0
  72. data/test/test_homology.rb +102 -0
  73. data/test/test_ngs.rb +21 -0
  74. data/test/test_ontology.rb +74 -0
  75. data/test/test_utils.rb +29 -0
  76. metadata +460 -0
Binary file
Binary file
@@ -0,0 +1,73 @@
1
+ common:
2
+ libgtextutils:
3
+ version: 0.6
4
+ url: http://hannonlab.cshl.edu/fastx_toolkit/libgtextutils-0.6.tar.bz2
5
+ basename: libgtextutils-0.6
6
+ suffix: tar.bz2
7
+ desc: ""
8
+ type: source
9
+ fastx:
10
+ version: 0.0.13
11
+ url: http://hannonlab.cshl.edu/fastx_toolkit/fastx_toolkit-0.0.13.tar.bz2
12
+ basename: fastx_toolkit-0.0.13
13
+ suffix: tar.bz2
14
+ desc: "Fastx-toolkit version 0.0.13 requires libgtextutils-0.6 (available here for download). A recent g++ compiler (tested with GNU G++ 4.1.2 and later). The fasta_clipping_histogram tool requires two perl modules: PerlIO::gzip and GD::Graph::bars. The fastx_barcode_splitter tool requires GNU sed. The fastq_quality_boxplot tool requires gnuplot version 4.2 or newer."
15
+ type: source
16
+ linux:
17
+ cufflinks:
18
+ version: 1.1.0
19
+ url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.Linux_x86_64.tar.gz
20
+ basename: cufflinks-1.1.0.Linux_x86_64
21
+ suffix: tar.gz
22
+ desc: ""
23
+ type: binary
24
+ tophat:
25
+ version: 1.3.2
26
+ url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.Linux_x86_64.tar.gz
27
+ basename: tophat-1.3.2.Linux_x86_64
28
+ suffix: tar.gz
29
+ desc: ""
30
+ type: binary
31
+ bowtie:
32
+ version: 0.12.7
33
+ url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-linux-x86_64.zip/download
34
+ basename: bowtie-0.12.7-linux-x86_64
35
+ suffix: zip
36
+ desc: ""
37
+ type: binary
38
+ # sra:
39
+ # version:
40
+ # url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-centos_linux64.tar.gz
41
+ # basename: sratoolkit.2.1.0-centos_linux64
42
+ # suffix: tar.gz
43
+ # desc: ""
44
+ # type: binary
45
+ osx:
46
+ cufflinks:
47
+ version: 1.1.0
48
+ url: http://cufflinks.cbcb.umd.edu/downloads/cufflinks-1.1.0.OSX_x86_64.tar.gz
49
+ basename: cufflinks-1.1.0.OSX_x86_64
50
+ suffix: tar.gz
51
+ desc: ""
52
+ type: binary
53
+ tophat:
54
+ version: 1.3.2
55
+ url: http://tophat.cbcb.umd.edu/downloads/tophat-1.3.2.OSX_x86_64.tar.gz
56
+ basename: tophat-1.3.2.OSX_x86_64
57
+ suffix: tar.gz
58
+ desc: ""
59
+ type: binary
60
+ bowtie:
61
+ version: 0.12.7
62
+ url: http://sourceforge.net/projects/bowtie-bio/files/bowtie/0.12.7/bowtie-0.12.7-macos-10.5-x86_64.zip/download
63
+ basename: bowtie-0.12.7-macos-10.5-x86_64
64
+ suffix: zip
65
+ desc: ""
66
+ type: binary
67
+ # sra:
68
+ # version:
69
+ # url: http://trace.ncbi.nlm.nih.gov/Traces/sra/static/sratoolkit.2.1.0-mac64.tar.gz
70
+ # basename: sratoolkit.2.1.0-mac64
71
+ # suffix: tar.gz
72
+ # desc: ""
73
+ # type: binary
@@ -0,0 +1,189 @@
1
+ #
2
+ #
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+ require 'rubyvis'
11
+
12
+ module Bio
13
+ module Ngs
14
+ class Graphics
15
+
16
+ def self.draw_area(data,width,height,out=nil,xlabel,ylabel)
17
+ point = 0
18
+ max = data.max + 10
19
+ data = data.map do |d|
20
+ point += 1
21
+ OpenStruct.new({:x=> point, :y=> d})
22
+ end
23
+ x = pv.Scale.linear(data, lambda {|d| d.x}).range(0, width)
24
+ y = pv.Scale.linear(0, max).range(0, height);
25
+
26
+ #The root panel
27
+ vis = pv.Panel.new() do
28
+ width width
29
+ height height
30
+ bottom 20
31
+ left 50
32
+ right 10
33
+ top 5
34
+
35
+ # Y-axis and ticks
36
+ rule do
37
+ data y.ticks(n_ticks)
38
+ bottom(y)
39
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
40
+ label(:anchor=>"left") {
41
+ puts y.inspect
42
+ text y.tick_format
43
+ }
44
+ end
45
+
46
+ # X-axis and ticks.
47
+ rule do
48
+ data x.ticks()
49
+ visible {|d| d!=0}
50
+ left(x)
51
+ bottom(-5)
52
+ height(5)
53
+ label(:anchor=>'bottom') {
54
+ text(x.tick_format)
55
+ }
56
+ end
57
+
58
+ #/* The area with top line. */
59
+ area do |a|
60
+ a.data data
61
+ a.bottom(1)
62
+ a.left {|d| x.scale(d.x)}
63
+ a.height {|d| y.scale(d.y)}
64
+ a.fill_style("rgb(121,173,210)")
65
+ a.line(:anchor=>'top') {
66
+ line_width(3)
67
+ }
68
+ end
69
+ end
70
+
71
+ # panel legend and title
72
+ panel = vis.add(Rubyvis::Panel).
73
+ width(width-x_padding).
74
+ height(height)
75
+
76
+ panel.anchor('top').add(Rubyvis::Label).
77
+ font("20px sans-serif").
78
+ text(title_label)
79
+
80
+ panel.anchor('bottom').add(Rubyvis::Label).text(xlabel)
81
+ panel.anchor('left').add(Rubyvis::Label).
82
+ text_angle(1.5*Math::PI).
83
+ text(ylabel)
84
+
85
+
86
+ vis.render();
87
+
88
+ if out
89
+ File.open(out,"w") {|f| f.write(vis.to_svg) }
90
+ else
91
+ puts vis.to_svg
92
+ end
93
+
94
+ end
95
+
96
+ def self.bubble_chart(fileout,dataset = {}, panel_w = 600, panel_h = 800)
97
+ colors=Rubyvis::Colors.category10()
98
+ c=Rubyvis::Colors.category10().by(lambda {|n| n.parent_node})
99
+
100
+ vis = Rubyvis::Panel.new
101
+ .width(panel_w-10)
102
+ .height(panel_h-10)
103
+ .bottom(5)
104
+ .left(5)
105
+ .right(5)
106
+ .top(5)
107
+
108
+ root=Rubyvis::Dom::Node.new
109
+ dataset.each_pair do |name,value|
110
+ child = Rubyvis::Dom::Node.new(value)
111
+ child.node_name = name
112
+ root.append_child(child)
113
+ end
114
+ root = root.nodes()
115
+
116
+ pack=vis.add(pv.Layout.Pack).
117
+ nodes(root).
118
+ size(lambda {|n| n.node_value})
119
+
120
+ pack.node.add(Rubyvis::Dot).
121
+ visible( lambda {|n| n.parent_node}).
122
+ fill_style(lambda {|n|
123
+ colors.scale(n.parent_node).
124
+ brighter((n.node_value) / 5.0)
125
+ }).
126
+ stroke_style(c)
127
+
128
+ pack.node_label.add(Rubyvis::Label).
129
+ visible( lambda {|n| n.parent_node}).
130
+ text(lambda {|n| n.node_name})
131
+ vis.render()
132
+ File.open(fileout,"w") {|f| f.write vis.to_svg+"\n"}
133
+ end
134
+
135
+
136
+ def self.bar_charts(labels, data, fileout, width = 500, height = 300)
137
+
138
+ x = pv.Scale.linear(0, data.max).range(0, width)
139
+ y = pv.Scale.ordinal(pv.range(data.size)).split_banded(0, height, 4/5.0)
140
+
141
+ #/* The root panel. */
142
+ vis = pv.Panel.new()
143
+ .width(width)
144
+ .height(height)
145
+ .bottom(20)
146
+ .left(100)
147
+ .right(10)
148
+ .top(5);
149
+
150
+ #/* The bars. */
151
+ bar = vis.add(pv.Bar)
152
+ .data(data)
153
+ .top(lambda {y.scale(self.index)})
154
+ .height(y.range_band)
155
+ .left(0)
156
+ .width(x)
157
+
158
+ #/* The value label. */
159
+ bar.anchor("right").add(pv.Label)
160
+ .text_style("white")
161
+ .text(lambda {|d| "%0.1f" % d})
162
+
163
+ #/* The variable label. */
164
+ bar.anchor("left").add(pv.Label)
165
+ .text_margin(5)
166
+ .text_align("right")
167
+ .text(lambda { labels[self.index]});
168
+
169
+ #/* X-axis ticks. */
170
+ vis.add(pv.Rule)
171
+ .data(x.ticks(5))
172
+ .left(x)
173
+ .stroke_style(lambda {|d| d!=0 ? "rgba(255,255,255,.3)" : "#000"})
174
+ .add(pv.Rule)
175
+ .bottom(0)
176
+ .height(5)
177
+ .stroke_style("#000")
178
+ .anchor("bottom").add(pv.Label).text(x.tick_format)
179
+
180
+ # X-axis Labels
181
+ vis.anchor("top").add(Rubyvis::Label).text("Number of sequences")
182
+
183
+ vis.render();
184
+ File.open(fileout,"w") {|out| out.write vis.to_svg+"\n"}
185
+ end
186
+
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,102 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+ #
8
+
9
+ module Bio
10
+ module Ngs
11
+ class Homology
12
+
13
+
14
+ # Method to import a Blast XML output file into a BlastOuput table created according to ActiveRecord model
15
+ # Params: XML Blast file, YAML file for db connection, optional ActiveRecord models file
16
+ def self.blast_import(file,yaml_file=nil)
17
+ db = Bio::Ngs::Db.new :homology,yaml_file
18
+ inserts = []
19
+ Bio::Blast::XmlIterator.new(file).to_enum.each do |iter|
20
+ iter.each do |hit|
21
+ identity = 0.0
22
+ positive = 0.0
23
+ evalue = []
24
+ length = 0
25
+ hit.each do |hsp|
26
+ identity += hsp.identity.to_f
27
+ positive += hsp.positive.to_f
28
+ evalue << hsp.evalue
29
+ length += hsp.align_len
30
+ end
31
+ identity = (identity / length)*100
32
+ positive = (positive / length)*100
33
+ evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
34
+ inserts << [iter.query_def,hit.hit_id.split('|')[1],hit.hit_def,evalue,identity,positive]
35
+ if inserts.size == 1000
36
+ db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts)
37
+ inserts = []
38
+ end
39
+ end
40
+ end
41
+ db.insert_many(:blast_outputs,"INSERT INTO blast_outputs(query_id,target_id,target_description,evalue,identity,positive) VALUES(?,?,?,?,?,?)",inserts) if inserts.size > 0
42
+ end
43
+
44
+ def self.blast2text(file_in,file_out)
45
+ out = File.open(file_out,"w")
46
+ out.write("Query ID\tTarget ID\tTarget Description\tE-value\tIdentity\tPositive\n")
47
+ Bio::Blast::XmlIterator.new(file_in).to_enum.each do |iter|
48
+ iter.each do |hit|
49
+ identity = 0.0
50
+ positive = 0.0
51
+ evalue = []
52
+ length = 0
53
+ hit.each do |hsp|
54
+ identity += hsp.identity.to_f
55
+ positive += hsp.positive.to_f
56
+ evalue << hsp.evalue
57
+ length += hsp.align_len
58
+ end
59
+ identity = (identity / length)*100
60
+ positive = (positive / length)*100
61
+ evalue = evalue.inject{ |sum, el| sum + el }.to_f / evalue.size
62
+ out.write([iter.query_def,hit.hit_id,hit.hit_def,evalue,identity,positive].join("\t")+"\n")
63
+ end
64
+ end
65
+ out.close
66
+ end
67
+
68
+
69
+ # Method to import a GO Annotation file into GoAnnotation table created according to ActiveRecord model
70
+ # Params: GOA file, YAML file for db connection (optional)
71
+ def self.goa_import(file,yaml_file=nil)
72
+ db = Bio::Ngs::Db.new :homology, yaml_file
73
+ inserts = []
74
+ File.open(file).each do |line|
75
+ next if line.start_with? "!"
76
+ line.chomp!
77
+ inserts << line.split("\t")[0..14]
78
+ if inserts.size == 1000
79
+ db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts)
80
+ inserts = []
81
+ end
82
+ end
83
+ db.insert_many(:go_annotations,"INSERT INTO go_annotations(db,entry_id,symbol,qualifier,go_id,db_ref,evidence,additional_identifier,aspect,name,synonym,molecule_type,taxon_id,date,assigned_by) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",inserts) if inserts.size > 0
84
+ end
85
+
86
+ # Method to export the associations among genes and GO and store them into a JSON file that can be imported into the Ontology db
87
+ # Params: file to write JSON data
88
+ def self.go_annotation_to_json(file_out,library=nil,yaml_file=nil)
89
+ db = Bio::Ngs::Db.new :homology, yaml_file
90
+ ontologies = []
91
+ BlastOutput.find(:all).each do |result|
92
+ ontology = Bio::Ngs::Ontology.new result.query_id
93
+ ontology.go = result.go_annotations.map {|goa| goa.go_id}
94
+ ontology.library = library
95
+ ontologies << ontology
96
+ end
97
+ File.open(file_out,"w") {|f| f.write ontologies.to_json}
98
+ end
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,103 @@
1
+ #
2
+ #
3
+ # Copyright:: Copyright (C) 2011
4
+ # Francesco Strozzi <francesco.strozzi@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+ #
8
+
9
+ module Bio
10
+ module Ngs
11
+ class Ontology
12
+
13
+ # Method to import a GO OBO file into Go table created according to ActiveRecord model
14
+ # Params: GO OBO file, YAML file for db connection
15
+ def self.go_import(file,yaml_file=nil)
16
+ db = Bio::Ngs::Db.new :ontology,yaml_file
17
+ inserts = []
18
+ file = File.open(file)
19
+ file.each do |line|
20
+ if line.start_with? "[Term]"
21
+ block = file.gets("\n\n")
22
+ is_a = []
23
+ data = []
24
+ block.split("\n").each do |elem|
25
+ if elem.start_with? "id: "
26
+ data << elem.gsub("id: ","")
27
+ elsif elem.start_with? "name: "
28
+ data << elem.gsub("name: ","")
29
+ elsif elem.start_with? "is_a"
30
+ is_a << elem.gsub("is_a: ","").split("!").first
31
+ elsif elem.start_with? "namespace: "
32
+ data << elem.gsub("namespace: ","")
33
+ end
34
+ end
35
+ data << is_a.join(" ")
36
+ inserts << data
37
+ if inserts.size == 1000
38
+ db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts)
39
+ inserts = []
40
+ end
41
+ end
42
+ end
43
+ db.insert_many(:go,"INSERT INTO go(go_id,name,namespace,is_a) VALUES(?,?,?,?)",inserts) if inserts.size > 0
44
+ end
45
+
46
+ # Method to lood the Gene-GO associations from a JSON file into the Ontology db
47
+ # Params: JSON file name, YAML file for db connection (optional)
48
+ def self.load_go_genes(file,yaml_file=nil)
49
+ db = Bio::Ngs::Db.new :ontology, yaml_file
50
+ list = JSON.load File.read(file)
51
+ ontologies = Bio::Ngs::OntologyCollection.new
52
+ list.each_with_index do |gene,index|
53
+ ontologies << Bio::Ngs::Ontology.new(gene["gene_id"],gene["go"],gene["library"])
54
+ end
55
+ ontologies.to_db(yaml_file)
56
+ end
57
+
58
+
59
+ attr_accessor :gene_id, :go, :library
60
+ # Constructor for Bio::Ngs::Ontology instances
61
+ def initialize(gene_id,go=[],library=nil)
62
+ @gene_id = gene_id
63
+ @go = go
64
+ @library = library
65
+ end
66
+
67
+ # Method to store a single Bio::Ngs::Ontology object into the Ontology db
68
+ def to_db(yaml_file=nil)
69
+ raise RuntimeError,"You must initialize the Ontolgy db with biongs ontology:db:init" if Go.count == 0
70
+ db = Bio::Ngs::Db.new :ontology,yaml_file
71
+ g = Gene.create(:gene_id => @gene_id, :library => @library)
72
+ Go.where({:go_id => @go}).all.each do |go|
73
+ g.gene_gos.create(:go_id => go.id)
74
+ end
75
+ end
76
+
77
+
78
+ end
79
+
80
+ # Class to handle collection of Bio::Ngs::Ontology objects.
81
+ # It provides a method to store all the gene-GO associations into the Ontology db
82
+ class OntologyCollection < Array
83
+
84
+ def to_db(yaml_file=nil)
85
+ db = Bio::Ngs::Db.new :ontology, yaml_file
86
+ genes = []
87
+ ontologies = []
88
+ go = {}
89
+ Go.find_by_sql("SELECT id, go_id FROM go").each {|g| go[g.go_id] = g.id}
90
+ self.each_with_index do |gene,index|
91
+ raise ArgumentError "OntologyCollection can store only Bio::Ngs::Ontology objects!" if gene.class != Bio::Ngs::Ontology
92
+ genes << [index+1,gene.gene_id,gene.library]
93
+ gene.go.each {|o| ontologies << [index+1,go[o]] if go[o]}
94
+ end
95
+ db.insert_many(:genes,"INSERT INTO genes(id,gene_id,library) VALUES(?,?,?)",genes)
96
+ db.insert_many(:gene_gos,"INSERT INTO gene_gos(gene_id,go_id) VALUES(?,?)",ontologies)
97
+ end
98
+
99
+ end
100
+
101
+
102
+ end
103
+ end