bio-velvet 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +12 -0
- data/Gemfile +17 -0
- data/LICENSE.txt +20 -0
- data/README.md +62 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/bio-velvet.rb +13 -0
- data/lib/bio-velvet/graph.rb +517 -0
- data/lib/bio-velvet/runner.rb +91 -0
- data/spec/bio-velvet_arc_array_spec.rb +123 -0
- data/spec/bio-velvet_graph_spec.rb +290 -0
- data/spec/bio-velvet_runner_spec.rb +67 -0
- data/spec/data/node_sequence/LastGraph +33 -0
- data/spec/data/node_sequence/contigs.fa +75 -0
- data/spec/data/runner_input.fa +18 -0
- data/spec/data/short_node_LastGraph +40 -0
- data/spec/data/short_node_sequence_test_graph +20 -0
- data/spec/data/velvet_test_reads_assembly/Graph +3465 -0
- data/spec/data/velvet_test_reads_assembly/HOWTO_RECREATE +2 -0
- data/spec/data/velvet_test_reads_assembly/LastGraph +3462 -0
- data/spec/data/velvet_test_reads_assembly_read_tracking/Graph2 +45602 -0
- data/spec/data/velvet_test_reads_assembly_read_tracking/HOWTO_RECREATE +2 -0
- data/spec/data/velvet_test_trail_sequence_assembly/reads1.fa +18 -0
- data/spec/spec_helper.rb +14 -0
- metadata +211 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'files'
|
2
|
+
require 'systemu'
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
module Velvet
|
6
|
+
class Runner
|
7
|
+
include Bio::Velvet::Logging
|
8
|
+
|
9
|
+
# Run velveth and then velvetg, with the given kmer size. Returned
|
10
|
+
# is a Bio::Velvet::Result class, stored in a temporary directory.
|
11
|
+
# The temporary directory is removed upon program exit.
|
12
|
+
#
|
13
|
+
# The velveth_options and velvetg_options are strings to pass as arguments
|
14
|
+
# to velveth and velvetg, respectively.
|
15
|
+
#
|
16
|
+
# The final options argument is used to specify bio-velvet wrapper options. Currently:
|
17
|
+
# :output_assembly_path: a directory where the assembly takes place (by default, a temporary directory)
|
18
|
+
def velvet(kmer_length, velveth_options_string, velvetg_options_string='', options={})
|
19
|
+
res = velveth kmer_length, velveth_options_string, options
|
20
|
+
velvetg res, velvetg_options_string
|
21
|
+
end
|
22
|
+
|
23
|
+
def velveth(kmer_length, velveth_arguments, options={})
|
24
|
+
result = Result.new
|
25
|
+
outdir = nil
|
26
|
+
if options[:output_assembly_path]
|
27
|
+
log.debug "Using pre-defined assembly directory: #{options[:output_assembly_path]}"
|
28
|
+
outdir = options[:output_assembly_path]
|
29
|
+
else
|
30
|
+
outdir = Files.create.root
|
31
|
+
end
|
32
|
+
result.result_directory = outdir
|
33
|
+
|
34
|
+
# Run velveth
|
35
|
+
cmd = "velveth #{result.result_directory} #{kmer_length} #{velveth_arguments}"
|
36
|
+
log.info "Running velveth: #{cmd}" if log.info?
|
37
|
+
status, stdout, stderr = systemu cmd
|
38
|
+
if status.exitstatus != 0
|
39
|
+
raise VelvetRunnerException, "Error running velveth: #{stderr}\n#{stdout}"
|
40
|
+
end
|
41
|
+
result.velveth_stdout = stdout
|
42
|
+
result.velveth_stderr = stderr
|
43
|
+
|
44
|
+
return result
|
45
|
+
end
|
46
|
+
|
47
|
+
# Run velvetg, with a Bio::Velvet::Result object
|
48
|
+
# generated with velveth
|
49
|
+
def velvetg(velveth_result_object, velvetg_arguments)
|
50
|
+
cmd = "velvetg #{velveth_result_object.result_directory} #{velvetg_arguments}"
|
51
|
+
log.info "Running velvetg: #{cmd}" if log.info?
|
52
|
+
status, stdout, stderr = systemu cmd
|
53
|
+
if status.exitstatus != 0
|
54
|
+
raise VelvetRunnerException, "Error running velvetg: #{stderr}\n#{stdout}"
|
55
|
+
end
|
56
|
+
velveth_result_object.velvetg_stdout = stdout
|
57
|
+
velveth_result_object.velvetg_stderr = stderr
|
58
|
+
|
59
|
+
return velveth_result_object
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
class VelvetRunnerException < Exception; end
|
64
|
+
|
65
|
+
class Result
|
66
|
+
attr_accessor :velveth_stdout, :velveth_stderr
|
67
|
+
attr_accessor :velvetg_stdout, :velvetg_stderr
|
68
|
+
attr_accessor :result_directory
|
69
|
+
|
70
|
+
# Path to the LastGraph output from velvetg
|
71
|
+
def last_graph_path
|
72
|
+
File.join result_directory, 'LastGraph'
|
73
|
+
end
|
74
|
+
|
75
|
+
# Path to the contigs.fa output from velvetg
|
76
|
+
def contigs_path
|
77
|
+
File.join result_directory, 'contigs.fa'
|
78
|
+
end
|
79
|
+
|
80
|
+
# Path to the stats.txt output from velvetg
|
81
|
+
def stats_path
|
82
|
+
File.join result_directory, 'stats.txt'
|
83
|
+
end
|
84
|
+
|
85
|
+
# Return a Bio::Velvet::Graph object built from the LastGraph file
|
86
|
+
def last_graph
|
87
|
+
Bio::Velvet::Graph.parse_from_file(last_graph_path)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
require 'bio'
|
3
|
+
|
4
|
+
include Bio::Velvet
|
5
|
+
|
6
|
+
describe "ArcArray" do
|
7
|
+
it 'should push' do
|
8
|
+
node1 = Graph::Node.new
|
9
|
+
node1.node_id = 1
|
10
|
+
node2 = Graph::Node.new
|
11
|
+
node2.node_id = 2
|
12
|
+
arc = Graph::Arc.new
|
13
|
+
arc.begin_node_id = 1
|
14
|
+
arc.end_node_id = 2
|
15
|
+
|
16
|
+
ary = Graph::ArcArray.new
|
17
|
+
ary.push arc
|
18
|
+
ary.to_a.should == [arc]
|
19
|
+
|
20
|
+
arc2 = Graph::Arc.new
|
21
|
+
arc2.begin_node_id = 1
|
22
|
+
arc2.end_node_id = 2
|
23
|
+
|
24
|
+
ary.push arc2
|
25
|
+
ary.to_a.should == [arc,arc2]
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should get arcs right' do
|
29
|
+
node1 = Graph::Node.new
|
30
|
+
node1.node_id = 1
|
31
|
+
node2 = Graph::Node.new
|
32
|
+
node2.node_id = 2
|
33
|
+
arc = Graph::Arc.new
|
34
|
+
arc.begin_node_id = 1
|
35
|
+
arc.end_node_id = 2
|
36
|
+
|
37
|
+
ary = Graph::ArcArray.new
|
38
|
+
ary.push arc
|
39
|
+
ary.get_arcs_by_node_id(1,2).should == [arc]
|
40
|
+
ary.get_arcs_by_node_id(2,1).should == [arc]
|
41
|
+
ary.get_arcs_by_node_id(1).should == [arc]
|
42
|
+
ary.get_arcs_by_node_id(2).should == [arc]
|
43
|
+
ary.get_arcs_by_node_id(3).should == []
|
44
|
+
|
45
|
+
arc2 = Graph::Arc.new
|
46
|
+
arc2.begin_node_id = 1
|
47
|
+
arc2.end_node_id = 2
|
48
|
+
ary.push arc2
|
49
|
+
ary.get_arcs_by_node_id(1,2).should == [arc, arc2]
|
50
|
+
ary.get_arcs_by_node_id(2,1).should == [arc, arc2]
|
51
|
+
ary.get_arcs_by_node_id(1).should == [arc, arc2]
|
52
|
+
ary.get_arcs_by_node_id(2).should == [arc, arc2]
|
53
|
+
ary.get_arcs_by_node_id(3).should == []
|
54
|
+
|
55
|
+
|
56
|
+
ary = Graph::ArcArray.new
|
57
|
+
arc = Graph::Arc.new
|
58
|
+
arc.begin_node_id = 1
|
59
|
+
arc.end_node_id = 1
|
60
|
+
ary.push arc
|
61
|
+
ary.get_arcs_by_node_id(1).should == [arc]
|
62
|
+
ary.get_arcs_by_node_id(1,1).should == [arc]
|
63
|
+
|
64
|
+
arc2 = Graph::Arc.new
|
65
|
+
arc2.begin_node_id = 1
|
66
|
+
arc2.end_node_id = 2
|
67
|
+
ary.push arc2
|
68
|
+
ary.get_arcs_by_node_id(1).should == [arc, arc2]
|
69
|
+
ary.get_arcs_by_node_id(2).should == [arc2]
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
it 'should length' do
|
74
|
+
node1 = Graph::Node.new
|
75
|
+
node1.node_id = 1
|
76
|
+
node2 = Graph::Node.new
|
77
|
+
node2.node_id = 2
|
78
|
+
arc = Graph::Arc.new
|
79
|
+
arc.begin_node_id = 1
|
80
|
+
arc.end_node_id = 2
|
81
|
+
|
82
|
+
ary = Graph::ArcArray.new
|
83
|
+
ary.length.should == 0
|
84
|
+
ary.push arc
|
85
|
+
ary.length.should == 1
|
86
|
+
|
87
|
+
arc2 = Graph::Arc.new
|
88
|
+
arc2.begin_node_id = 1
|
89
|
+
arc2.end_node_id = 2
|
90
|
+
|
91
|
+
ary.push arc2
|
92
|
+
ary.length.should == 2
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'should delete' do
|
96
|
+
node1 = Graph::Node.new
|
97
|
+
node1.node_id = 1
|
98
|
+
node2 = Graph::Node.new
|
99
|
+
node2.node_id = 2
|
100
|
+
arc = Graph::Arc.new
|
101
|
+
arc.begin_node_id = 1
|
102
|
+
arc.end_node_id = 2
|
103
|
+
|
104
|
+
ary = Graph::ArcArray.new
|
105
|
+
ary.push arc
|
106
|
+
ary.to_a.should == [arc]
|
107
|
+
ary.delete arc
|
108
|
+
ary.to_a.should == []
|
109
|
+
|
110
|
+
ary.push arc
|
111
|
+
|
112
|
+
arc2 = Graph::Arc.new
|
113
|
+
arc2.begin_node_id = 1
|
114
|
+
arc2.end_node_id = 2
|
115
|
+
ary.push arc2
|
116
|
+
ary.length.should == 2
|
117
|
+
ary.delete arc2
|
118
|
+
ary.to_a.should == [arc]
|
119
|
+
ary.push arc2
|
120
|
+
ary.delete arc
|
121
|
+
ary.to_a.should == [arc2]
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,290 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
require 'bio'
|
3
|
+
|
4
|
+
class String
|
5
|
+
def revcom
|
6
|
+
Bio::Sequence::NA.new(self).reverse_complement.to_s.upcase
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "BioVelvet" do
|
11
|
+
it "should be able to parse a graph 1" do
|
12
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'velvet_test_reads_assembly','Graph')
|
13
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
14
|
+
|
15
|
+
graph.number_of_nodes.should eq(967)
|
16
|
+
graph.number_of_sequences.should eq(50000)
|
17
|
+
graph.hash_length.should eq(31)
|
18
|
+
|
19
|
+
graph.nodes[1].should be_kind_of(Bio::Velvet::Graph::Node)
|
20
|
+
graph.nodes.length.should eq(967)
|
21
|
+
graph.nodes[1].node_id.should eq(1)
|
22
|
+
graph.nodes[3].length.should == 3
|
23
|
+
graph.nodes[3].coverages.should eq([236,205,0,0])
|
24
|
+
graph.nodes[3].ends_of_kmers_of_node.should eq('TTG')
|
25
|
+
graph.nodes[3].ends_of_kmers_of_twin_node.should eq('ACA')
|
26
|
+
|
27
|
+
graph.arcs.length.should eq(563)
|
28
|
+
arcs = graph.get_arcs_by_node_id(2,712)
|
29
|
+
arcs.length.should == 1
|
30
|
+
arcs[0].begin_node_id.should eq(2)
|
31
|
+
arcs[0].end_node_id.should eq(712)
|
32
|
+
arcs[0].begin_node_direction.should eq(true)
|
33
|
+
arcs[0].end_node_direction.should eq(false)
|
34
|
+
arcs[0].multiplicity.should eq(1)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should be able to parse a graph that has read tracking" do
|
38
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'velvet_test_reads_assembly_read_tracking','Graph2')
|
39
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
40
|
+
|
41
|
+
graph.number_of_nodes.should eq(967)
|
42
|
+
graph.number_of_sequences.should eq(50000)
|
43
|
+
graph.hash_length.should eq(31)
|
44
|
+
|
45
|
+
graph.nodes[1].should be_kind_of(Bio::Velvet::Graph::Node)
|
46
|
+
graph.nodes.length.should eq(967)
|
47
|
+
graph.nodes[1].node_id.should eq(1)
|
48
|
+
graph.nodes[3].length.should == 3
|
49
|
+
graph.nodes[3].coverages.should eq([236,205,0,0])
|
50
|
+
graph.nodes[3].ends_of_kmers_of_node.should eq('TTG')
|
51
|
+
graph.nodes[3].ends_of_kmers_of_twin_node.should eq('ACA')
|
52
|
+
|
53
|
+
|
54
|
+
arcs = graph.get_arcs_by_node_id(2,712)
|
55
|
+
arcs.length.should == 1
|
56
|
+
arcs[0].begin_node_id.should eq(2)
|
57
|
+
arcs[0].end_node_id.should eq(712)
|
58
|
+
arcs[0].begin_node_direction.should eq(true)
|
59
|
+
arcs[0].end_node_direction.should eq(false)
|
60
|
+
arcs[0].multiplicity.should eq(1)
|
61
|
+
|
62
|
+
# NR -967 1
|
63
|
+
# 49982 0 0
|
64
|
+
# === later
|
65
|
+
# NR 967 1
|
66
|
+
# 49981 0 0
|
67
|
+
node = graph.nodes[967]
|
68
|
+
node.short_reads.nil?.should eq(false)
|
69
|
+
node.short_reads.length.should eq(2), node.inspect
|
70
|
+
node.short_reads[0].read_id.should eq(49982)
|
71
|
+
node.short_reads[0].offset_from_start_of_node.should eq(0)
|
72
|
+
node.short_reads[0].start_coord.should eq(0)
|
73
|
+
node.short_reads[0].direction.should eq(false)
|
74
|
+
node.short_reads[1].read_id.should eq(49981)
|
75
|
+
node.short_reads[1].offset_from_start_of_node.should eq(0)
|
76
|
+
node.short_reads[1].start_coord.should eq(0)
|
77
|
+
node.short_reads[1].direction.should eq(true)
|
78
|
+
|
79
|
+
# NR -951 2
|
80
|
+
#47210 0 0
|
81
|
+
#47223 41 0
|
82
|
+
# ====later
|
83
|
+
# NR 951 2
|
84
|
+
# 47209 54 0
|
85
|
+
# 47224 0 0
|
86
|
+
node = graph.nodes[951]
|
87
|
+
node.short_reads.length.should eq(4)
|
88
|
+
node.number_of_short_reads.should eq(4)
|
89
|
+
node.short_reads[1].offset_from_start_of_node.should eq(41)
|
90
|
+
|
91
|
+
# grep -A 50000 ^NR velvet_test_reads_assembly_read_tracking/Graph2 |grep -v NR |wc -l
|
92
|
+
# 40327
|
93
|
+
graph.nodes.collect{|n| n.short_reads.nil? ? 0 : n.short_reads.length}.reduce(:+).should eq(40327)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should return sets of arcs by id' do
|
97
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'velvet_test_reads_assembly','LastGraph')
|
98
|
+
# ARC 2 -578 1
|
99
|
+
# ARC 2 -473 30
|
100
|
+
# ARC -2 650 3
|
101
|
+
# ARC -2 959 24
|
102
|
+
# ARC 3 4 81
|
103
|
+
# ARC -3 -786 61
|
104
|
+
# ARC -3 -740 1
|
105
|
+
# ARC -3 -568 1
|
106
|
+
# ARC -3 754 6
|
107
|
+
# ....
|
108
|
+
#Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('info'); log = Bio::Log::LoggerPlus.new('bio-velvet'); Bio::Log::CLI.configure('bio-velvet')
|
109
|
+
|
110
|
+
arcs = graph.get_arcs_by_node_id(2,578)
|
111
|
+
arcs.length.should eq(1)
|
112
|
+
arcs[0].begin_node_forward?.should eq(true)
|
113
|
+
arcs[0].begin_node_id.should == 2
|
114
|
+
arcs[0].end_node_id.should == 578
|
115
|
+
|
116
|
+
arcs = graph.get_arcs_by_node_id(578,2)
|
117
|
+
arcs.length.should eq(1)
|
118
|
+
arcs[0].begin_node_forward?.should eq(true)
|
119
|
+
arcs[0].begin_node_id.should == 2
|
120
|
+
arcs[0].end_node_id.should == 578
|
121
|
+
|
122
|
+
arcs = graph.get_arcs_by_node_id(2,178)
|
123
|
+
arcs.length.should == 0
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'should return a set of arcs by node objects' do
|
127
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'velvet_test_reads_assembly','LastGraph')
|
128
|
+
# ARC 2 -578 1
|
129
|
+
# ARC 2 -473 30
|
130
|
+
# ARC -2 650 3
|
131
|
+
# ARC -2 959 24
|
132
|
+
# ARC 3 4 81
|
133
|
+
# ARC -3 -786 61
|
134
|
+
# ARC -3 -740 1
|
135
|
+
# ARC -3 -568 1
|
136
|
+
# ARC -3 754 6
|
137
|
+
# ....
|
138
|
+
#Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('info'); log = Bio::Log::LoggerPlus.new('bio-velvet'); Bio::Log::CLI.configure('bio-velvet')
|
139
|
+
|
140
|
+
node2 = graph.nodes.select{|n| n.node_id == 2}[0]
|
141
|
+
node650 = graph.nodes.select{|n| n.node_id == 650}[0]
|
142
|
+
node754 = graph.nodes.select{|n| n.node_id == 754}[0]
|
143
|
+
|
144
|
+
# forward
|
145
|
+
arcs = graph.get_arcs_by_node(node2, node650)
|
146
|
+
arcs.length.should eq(1)
|
147
|
+
arcs[0].begin_node_forward?.should eq(false)
|
148
|
+
arcs[0].begin_node_id.should == 2
|
149
|
+
arcs[0].end_node_id.should == 650
|
150
|
+
|
151
|
+
#reverse
|
152
|
+
arcs = graph.get_arcs_by_node(node650, node2)
|
153
|
+
arcs.length.should eq(1)
|
154
|
+
arcs[0].begin_node_forward?.should eq(false)
|
155
|
+
arcs[0].begin_node_id.should == 2
|
156
|
+
arcs[0].end_node_id.should == 650
|
157
|
+
|
158
|
+
# no connection
|
159
|
+
arcs = graph.get_arcs_by_node(node2, node754)
|
160
|
+
arcs.length.should == 0
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'should have a functioning NodeArray class' do
|
164
|
+
na = Bio::Velvet::Graph::NodeArray.new
|
165
|
+
na.length.should eq(0)
|
166
|
+
node = Bio::Velvet::Graph::Node.new
|
167
|
+
na[1] = node
|
168
|
+
na.length.should eq(1)
|
169
|
+
na[1].should eq(node)
|
170
|
+
end
|
171
|
+
|
172
|
+
it "arcs should directions_opposing?" do
|
173
|
+
arc = Bio::Velvet::Graph::Arc.new
|
174
|
+
arc.begin_node_direction = true
|
175
|
+
arc.end_node_direction = true
|
176
|
+
arc.directions_opposing?.should eq(false)
|
177
|
+
|
178
|
+
arc.begin_node_direction = true
|
179
|
+
arc.end_node_direction = false
|
180
|
+
arc.directions_opposing?.should eq(true)
|
181
|
+
|
182
|
+
arc.begin_node_direction = false
|
183
|
+
arc.end_node_direction = false
|
184
|
+
arc.directions_opposing?.should eq(false)
|
185
|
+
end
|
186
|
+
|
187
|
+
it "nodes should correctly respond to #sequence" do
|
188
|
+
graph = Bio::Velvet::Graph.new
|
189
|
+
graph.hash_length = 31
|
190
|
+
node = Bio::Velvet::Graph::Node.new
|
191
|
+
node.parent_graph = graph
|
192
|
+
node.ends_of_kmers_of_node = 'AATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAGATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATACGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATGGACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG'
|
193
|
+
node.ends_of_kmers_of_twin_node = 'TTTTTAATTTGTACATTAAATAATACATTGCCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGATATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTTACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCTCCTTCTTTTAAACCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCCTTTCCTTTTAATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTACCATTTTTATCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAATCTACTTTTTTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTATAGTTTGATTCTAAATCGTGATCTTTGGTAGAGATAAGTG'
|
194
|
+
node.sequence.should eq('CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
195
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
196
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
197
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
198
|
+
GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
|
199
|
+
CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
|
200
|
+
ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
|
201
|
+
GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
|
202
|
+
ACTATGCTGGTATTTCACTTCCAGGTACAGG'.gsub(/\n/,''))
|
203
|
+
end
|
204
|
+
|
205
|
+
it 'short nodes should respond to sequence properly 1' do
|
206
|
+
#Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('bio-velvet'); Bio::Log::CLI.configure('bio-velvet')
|
207
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR,'short_node_LastGraph')
|
208
|
+
graph.nodes.length.should == 4
|
209
|
+
expect {graph.nodes[2].sequence}.to raise_error(Bio::Velvet::NotImplementedException)
|
210
|
+
end
|
211
|
+
|
212
|
+
it 'should really correctly respond to #sequence on real data' do
|
213
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'node_sequence','LastGraph')
|
214
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
215
|
+
|
216
|
+
contig_seq_hash = {}
|
217
|
+
Bio::FlatFile.foreach(File.join(TEST_DATA_DIR, 'node_sequence','contigs.fa')) do |seq|
|
218
|
+
node_id = seq.definition.match(/^NODE_(\d+)_/)[1]
|
219
|
+
contig_seq_hash[node_id] = seq.seq
|
220
|
+
end
|
221
|
+
graph.nodes.each do |node|
|
222
|
+
seq = node.sequence
|
223
|
+
exp = contig_seq_hash[node.node_id.to_s]
|
224
|
+
seq.should eq(exp), "Node #{node.node_id}. Found\n#{seq}\nExpected\n#{exp}"
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
it 'should delete nodes correctly path nodes' do
|
229
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'short_node_LastGraph')
|
230
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
231
|
+
|
232
|
+
deleted_nodes, deleted_arcs = graph.delete_nodes_if do |node|
|
233
|
+
node.node_id == 3
|
234
|
+
end
|
235
|
+
deleted_nodes.collect{|n| n.node_id}.sort.should == [3]
|
236
|
+
graph.nodes.collect{|n| n.node_id}.sort.should == [1,2,4]
|
237
|
+
deleted_arcs.collect{|a| [a.begin_node_id, a.end_node_id]}.sort.should == [[1,3],[3,4]]
|
238
|
+
graph.arcs.collect{|a| [a.begin_node_id, a.end_node_id]}.sort.should == [[1,2],[2,4]]
|
239
|
+
|
240
|
+
graph.nodes[1].node_id.should == 1
|
241
|
+
graph.nodes[3].should == nil
|
242
|
+
graph.nodes[4].node_id.should == 4
|
243
|
+
end
|
244
|
+
|
245
|
+
it 'should delete nodes correctly cap nodes' do
|
246
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'short_node_LastGraph')
|
247
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
248
|
+
|
249
|
+
deleted_nodes, deleted_arcs = graph.delete_nodes_if do |node|
|
250
|
+
node.node_id == 4
|
251
|
+
end
|
252
|
+
deleted_nodes.collect{|n| n.node_id}.sort.should == [4]
|
253
|
+
graph.nodes.collect{|n| n.node_id}.sort.should == [1,2,3]
|
254
|
+
deleted_arcs.collect{|a| [a.begin_node_id, a.end_node_id]}.sort.should == [[2,4],[3,4]]
|
255
|
+
graph.arcs.collect{|a| [a.begin_node_id, a.end_node_id]}.sort.should == [[1,2],[1,3]]
|
256
|
+
|
257
|
+
graph.nodes[4].should == nil
|
258
|
+
graph.nodes[1].node_id.should == 1
|
259
|
+
end
|
260
|
+
|
261
|
+
it 'node#coverage' do
|
262
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'short_node_LastGraph')
|
263
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
264
|
+
|
265
|
+
graph.nodes[1].coverage.should == 1140.0/228
|
266
|
+
graph.nodes[2].coverage.should == 58.0/29
|
267
|
+
graph.nodes[3].coverage.should == 114.0/38
|
268
|
+
graph.nodes[4].coverage.should == 1120.0/224
|
269
|
+
end
|
270
|
+
|
271
|
+
it 'should get neighbours_off_end' do
|
272
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'short_node_LastGraph')
|
273
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
274
|
+
|
275
|
+
graph.neighbours_off_end(graph.nodes[1]).should == [graph.nodes[2], graph.nodes[3]]
|
276
|
+
graph.neighbours_off_end(graph.nodes[2]).should == [graph.nodes[4]]
|
277
|
+
graph.neighbours_off_end(graph.nodes[3]).should == [graph.nodes[4]]
|
278
|
+
graph.neighbours_off_end(graph.nodes[4]).should == [graph.nodes[2], graph.nodes[3]] #node 4 is revcom
|
279
|
+
end
|
280
|
+
|
281
|
+
it 'should get neighbours_into_start' do
|
282
|
+
graph = Bio::Velvet::Graph.parse_from_file File.join(TEST_DATA_DIR, 'short_node_LastGraph')
|
283
|
+
graph.should be_kind_of(Bio::Velvet::Graph)
|
284
|
+
|
285
|
+
graph.neighbours_into_start(graph.nodes[1]) == []
|
286
|
+
graph.neighbours_into_start(graph.nodes[2]) == [graph.nodes[1]]
|
287
|
+
graph.neighbours_into_start(graph.nodes[3]) == [graph.nodes[1]]
|
288
|
+
graph.neighbours_into_start(graph.nodes[4]) == []
|
289
|
+
end
|
290
|
+
end
|