bio-velvet_underground 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4af6bceb99a775604acc0b87e641f42834bfe86f
|
4
|
+
data.tar.gz: bc8d0a17768ec2f94fd304c98a58505229b2a955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f652e8333d0e2fa600bab4a9f3c9a0dc5bb4670b0a9caef3cbbf2999f3435068e22bd65b41c43671910a6eaf808af70a2afa5e4787aa1d90cda347c96ec3a510
|
7
|
+
data.tar.gz: d00da35ed8e0926392a60ce93251d293ccb032803fb3e0b0ec81a9477ff7d6f96a452a75caa36e36a97d765e7aff33821bc73a354c2b74c6c73fae9c87001f3d
|
data/.gitmodules
CHANGED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,34 +2,78 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/wwood/bioruby-velvet_underground.png)](http://travis-ci.org/wwood/bioruby-velvet_underground)
|
4
4
|
|
5
|
-
This biogem is aimed at providing Ruby bindings to the velvet assembler's source code.
|
6
|
-
|
7
|
-
Note: this software is under active development!
|
5
|
+
This biogem is aimed at providing Ruby bindings to the velvet assembler's source code. See also [bio-velvet](https://github.com/wwood/bioruby-velvet) for Ruby code that does not bind the velvet C.
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
11
9
|
```sh
|
12
10
|
gem install bio-velvet_underground
|
13
11
|
```
|
12
|
+
This can take a few minutes as several versions of velvet with different kmer sizes are compiled.
|
14
13
|
|
15
14
|
## Usage
|
16
15
|
|
17
|
-
The
|
16
|
+
The code is intended to cater for a few specific purposes.
|
18
17
|
|
18
|
+
### Running velvet
|
19
|
+
Running velvet returns a `Result` object, which is effectively a pointer to a velvet result directory
|
19
20
|
```ruby
|
20
21
|
require 'bio-velvet_underground'
|
21
22
|
|
23
|
+
#kmer 29, '-short my.fasta' the argument to velveth, no special arguments given to velvetg.
|
24
|
+
result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'')
|
25
|
+
result.result_directory #=> path to temporary directory, containing velvet generated files e.g. contigs.fna
|
26
|
+
|
27
|
+
# A pre-defined velvet result directory:
|
28
|
+
result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'',:output_assembly_path => '/path/to/result')
|
29
|
+
result.result_directory #=> '/path/to/result'
|
30
|
+
```
|
31
|
+
With the magic of Ruby-FFI, the library with the smallest kmer size >= 29 is chosen (in this case 31).
|
32
|
+
Several libraries are pre-compiled at gem install-time, and then bound at runtime. `velveth` and `velvetg`
|
33
|
+
steps can be run separetely if required.
|
34
|
+
|
35
|
+
### Working with the binary sequence file
|
36
|
+
The binary sequence file created when velveth is run with the `-create_binary` flag.
|
37
|
+
|
38
|
+
```ruby
|
22
39
|
seqs = Bio::Velvet::Underground::BinarySequenceStore.new '/path/to/velvet/directory/CnyUnifiedSeq'
|
23
|
-
seqs[1] #=> 'CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGA'
|
24
40
|
seqs.length #=> 77 (there is 77 sequences in the CnyUnifiedSeq)
|
41
|
+
seqs[1] #=> 'CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGA'
|
42
|
+
seqs[0] #=> nil (indices map directly to the indices in other velvet files)
|
43
|
+
```
|
25
44
|
|
45
|
+
### Working with LastGraph file
|
46
|
+
```ruby
|
47
|
+
path = 'spec/data/3/Assem/LastGraph'
|
48
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path #=> Bio::Velvet::Underground::Graph object
|
49
|
+
|
50
|
+
graph.hash_length #=> 31 (kmer length)
|
51
|
+
graph.node_count #=> 4
|
52
|
+
|
53
|
+
graph.nodes[1] #=> Bio::Velvet::Underground::Graph::Node object
|
54
|
+
graph.nodes[2].ends_of_kmers_of_node #=> 'GTTTAAAAGAAGGAGATTACTTTATAAAA'
|
55
|
+
graph.nodes[2].coverages #=> [58,0] (coverages from different categories)
|
56
|
+
|
57
|
+
graph.nodes[1].short_reads #=> Array of Bio::Velvet::Underground::Graph::NodedRead objects
|
58
|
+
graph.nodes[1].short_reads[0].direction #=> true (i.e. forward w.r.t the node)
|
59
|
+
graph.nodes[1].short_reads[2].read_id #=> 4
|
26
60
|
```
|
61
|
+
There are more to these objects - see the documention.
|
62
|
+
|
63
|
+
|
64
|
+
Patches to these and other parts of velvet welcome.
|
65
|
+
|
66
|
+
## Development practice
|
67
|
+
|
68
|
+
The velvet C code 'underground' here is for the most part vanilla velvet code as you might expect.
|
69
|
+
However some changes were necessary to allow binding from this biogem. For instance the library
|
70
|
+
does not write to `$stdout` as this interferes with Ruby's writes to `$stdout`.
|
27
71
|
|
28
|
-
|
72
|
+
There are also some extra options for controlling velvet's behaviour, geared towards taking
|
73
|
+
some of the guesswork out of the assembly process at the expense of a less resolved `LastGraph`.
|
74
|
+
These are currently non-standard modifications - get in touch with @wwood if you are interested.
|
75
|
+
Not invoking these options should leave 'normal' velvet behaviour intact.
|
29
76
|
|
30
|
-
The API doc is online. For more code examples see the test files in
|
31
|
-
the source tree.
|
32
|
-
|
33
77
|
## Project home page
|
34
78
|
|
35
79
|
Information on the source tree, documentation, examples, issues and
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/ext/mkrf_conf.rb
CHANGED
@@ -22,15 +22,33 @@ require 'rake/clean'
|
|
22
22
|
path = File.expand_path(File.dirname(__FILE__))
|
23
23
|
path_external = File.join(File.dirname(__FILE__), "../lib/bio-velvet_underground/external")
|
24
24
|
|
25
|
+
# Require constants - code shared between before and after installation
|
26
|
+
require File.join(File.dirname(__FILE__), "../lib/bio-velvet_underground/constants")
|
27
|
+
|
25
28
|
task :compile do
|
26
29
|
cd(File.join(File.dirname(__FILE__),'src')) do
|
27
|
-
sh "patch -p1 < ../bioruby.patch"
|
28
30
|
case Config::CONFIG['host_os']
|
29
31
|
when /linux/
|
30
|
-
|
31
|
-
|
32
|
+
|
33
|
+
# Create library with default install params
|
34
|
+
$stdout.puts "Making velvet shared library with default parameters"
|
35
|
+
sh "make shared"
|
36
|
+
shared_location = 'obj/shared'
|
37
|
+
cp(File.join(shared_location,"libvelvet.so.1.0"), path_external)
|
38
|
+
$stdout.puts "Finished installing default library version"
|
39
|
+
# Create libraries with larger non-default kmer sizes
|
40
|
+
Bio::Velvet::Underground.max_kmers.each do |max_kmer|
|
41
|
+
next if max_kmer == Bio::Velvet::Underground::DEFAULT_MAXKMERLENGTH
|
42
|
+
|
43
|
+
$stdout.puts "Making velvet shared library with kmer "+max_kmer.to_s
|
44
|
+
library_name = File.basename Bio::Velvet::Underground.library_location_of(max_kmer)
|
45
|
+
sh "make clean; make MAXKMERLENGTH="+max_kmer.to_s+" shared"
|
46
|
+
cp(File.join(shared_location,"libvelvet.so.1.0"),
|
47
|
+
File.join(path_external,File.basename(library_name)))
|
48
|
+
end
|
49
|
+
|
32
50
|
when /darwin/
|
33
|
-
|
51
|
+
raise NotImplementedError, "possibly will work, but bio-velvet_underground is not tested on OSX"
|
34
52
|
when /mswin|mingw/ then raise NotImplementedError, "bio-velvet_underground library is not available for Windows platform"
|
35
53
|
end #case
|
36
54
|
end #cd
|
data/ext/src/Makefile
CHANGED
@@ -20,14 +20,14 @@ endif
|
|
20
20
|
|
21
21
|
ifdef VBIGASSEMBLY
|
22
22
|
override DEF := $(DEF) -D BIGASSEMBLY -D VBIGASSEMBLY
|
23
|
-
endif
|
23
|
+
endif
|
24
24
|
|
25
25
|
|
26
26
|
ifdef LONGSEQUENCES
|
27
27
|
override DEF := $(DEF) -D LONGSEQUENCES
|
28
28
|
endif
|
29
29
|
|
30
|
-
# OpenMP
|
30
|
+
# OpenMP
|
31
31
|
ifdef OPENMP
|
32
32
|
override CFLAGS := $(CFLAGS) -fopenmp
|
33
33
|
endif
|
@@ -39,23 +39,24 @@ endif
|
|
39
39
|
|
40
40
|
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
41
41
|
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
42
|
+
OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
42
43
|
|
43
|
-
default :
|
44
|
+
default : zlib obj velveth velvetg
|
44
45
|
|
45
46
|
clean : clean-zlib
|
46
|
-
-rm obj/*.o obj/dbg/*.o ./velvet*
|
47
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
|
47
48
|
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
48
49
|
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
49
50
|
|
50
|
-
cleanobj:
|
51
|
-
-rm obj/*.o obj/dbg/*.o
|
51
|
+
cleanobj:
|
52
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o
|
52
53
|
|
53
54
|
ifdef BUNDLEDZLIB
|
54
55
|
Z_LIB_DIR=third-party/zlib-1.2.3
|
55
56
|
Z_LIB_FILES=$(Z_LIB_DIR)/*.o
|
56
57
|
override DEF := $(DEF) -D BUNDLEDZLIB
|
57
58
|
|
58
|
-
zlib:
|
59
|
+
zlib:
|
59
60
|
cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o
|
60
61
|
|
61
62
|
clean-zlib :
|
@@ -67,14 +68,14 @@ zlib :
|
|
67
68
|
clean-zlib :
|
68
69
|
endif
|
69
70
|
|
70
|
-
velveth : obj
|
71
|
+
velveth : obj
|
71
72
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/binarySequences.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
72
73
|
|
73
74
|
|
74
75
|
velvetg : obj
|
75
76
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
76
77
|
|
77
|
-
debug : override DEF := $(DEF) -D DEBUG
|
78
|
+
debug : override DEF := $(DEF) -D DEBUG
|
78
79
|
debug : cleanobj obj/dbg
|
79
80
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
80
81
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
@@ -92,25 +93,25 @@ colordebug : cleanobj obj/dbg_de
|
|
92
93
|
objdir:
|
93
94
|
mkdir -p obj
|
94
95
|
|
95
|
-
obj: zlib
|
96
|
+
obj: zlib objdir $(OBJ)
|
96
97
|
|
97
98
|
obj_de: override DEF := $(DEF) -D COLOR
|
98
99
|
obj_de: zlib cleanobj objdir $(OBJ)
|
99
100
|
|
100
|
-
obj/dbgdir:
|
101
|
+
obj/dbgdir:
|
101
102
|
mkdir -p obj/dbg
|
102
103
|
|
103
|
-
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
|
+
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
105
|
obj/dbg: zlib cleanobj obj/dbgdir $(OBJDBG)
|
105
106
|
|
106
107
|
obj/dbg_de: override DEF := $(DEF) -D COLOR -D DEBUG
|
107
108
|
obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG)
|
108
109
|
|
109
110
|
obj/%.o: src/%.c
|
110
|
-
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
|
+
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
112
|
|
112
113
|
obj/dbg/%.o: src/%.c
|
113
|
-
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
|
+
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
115
|
|
115
116
|
doc: Manual.pdf
|
116
117
|
|
@@ -123,3 +124,15 @@ endif
|
|
123
124
|
|
124
125
|
test: velvetg velveth
|
125
126
|
cd tests && ./run-tests.sh
|
127
|
+
|
128
|
+
sharedobjdir:
|
129
|
+
mkdir -p obj/shared
|
130
|
+
|
131
|
+
obj/shared: sharedobjdir $(OBJSHARED)
|
132
|
+
|
133
|
+
obj/shared/%.o: src/%.c
|
134
|
+
$(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
135
|
+
|
136
|
+
shared: zlib obj/shared
|
137
|
+
cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o run.o run2.o
|
138
|
+
|
@@ -47,7 +47,7 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
47
47
|
hasSinglePreArc_pg(getOtherEnd_pg
|
48
48
|
(preArc, preNodeBID),
|
49
49
|
preGraph)
|
50
|
-
&& !isLoop_pg(preArc)
|
50
|
+
&& !isLoop_pg(preArc)
|
51
51
|
&& getDestination_pg(preArc, preNodeBID) != preNodeAID) {
|
52
52
|
|
53
53
|
totalLength += getPreNodeLength_pg(preNodeBID, preGraph);
|
@@ -99,11 +99,11 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
99
99
|
appendDescriptors_pg(&ptr, &writeOffset ,currentPreNodeID, preGraph, false);
|
100
100
|
}
|
101
101
|
|
102
|
-
if (writeOffset != 0)
|
102
|
+
if (writeOffset != 0)
|
103
103
|
while (writeOffset++ != 4)
|
104
104
|
(*ptr) >>= 2;
|
105
105
|
|
106
|
-
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
106
|
+
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
107
107
|
|
108
108
|
// Correct preArcs
|
109
109
|
for (preArc = getPreArc_pg(preNodeBID, preGraph); preArc != NULL_IDX;
|
@@ -209,7 +209,7 @@ static boolean isEligibleTip(IDnum index, PreGraph * preGraph, Coordinate
|
|
209
209
|
if (currentIndex == 0)
|
210
210
|
return true;
|
211
211
|
|
212
|
-
// Joined tips
|
212
|
+
// Joined tips
|
213
213
|
if (simplePreArcCount_pg(-currentIndex, preGraph) < 2)
|
214
214
|
return false;
|
215
215
|
|
@@ -764,7 +764,7 @@ static void remapNodeMarkersOntoNeighbour(Node * source,
|
|
764
764
|
}
|
765
765
|
}
|
766
766
|
|
767
|
-
// Short read markers
|
767
|
+
// Short read markers
|
768
768
|
if (readStartsAreActivated(graph)) {
|
769
769
|
// Update Coordinates
|
770
770
|
sourceArray = getNodeReads(source, graph);
|
@@ -1250,7 +1250,7 @@ static void foldSymmetricalNode(Node * node)
|
|
1250
1250
|
twinNode = node;
|
1251
1251
|
node = tmp;
|
1252
1252
|
}
|
1253
|
-
// Destroy link to old markers
|
1253
|
+
// Destroy link to old markers
|
1254
1254
|
setMarker(node, NULL_IDX);
|
1255
1255
|
|
1256
1256
|
// Reinsert markers properly
|
@@ -1951,7 +1951,7 @@ static void transferNodeData(Node * source, Node * target)
|
|
1951
1951
|
if (getNode(fastPath) == twinSource)
|
1952
1952
|
fastPath = getNextInSequence(fastPath);
|
1953
1953
|
|
1954
|
-
// Next node
|
1954
|
+
// Next node
|
1955
1955
|
if (source == activeNode) {
|
1956
1956
|
activeNode = target;
|
1957
1957
|
todo =
|
@@ -2013,7 +2013,7 @@ static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
|
|
2013
2013
|
// Read starts
|
2014
2014
|
concatenateReadStarts(nodeA, nodeB, graph);
|
2015
2015
|
|
2016
|
-
// Descriptor management
|
2016
|
+
// Descriptor management
|
2017
2017
|
appendDescriptors(nodeA, nodeB);
|
2018
2018
|
|
2019
2019
|
// Update uniqueness:
|
@@ -2094,9 +2094,11 @@ static void cleanUpRedundancy()
|
|
2094
2094
|
if (isTerminal(slowMarker))
|
2095
2095
|
slowLength = finalLength;
|
2096
2096
|
else {
|
2097
|
-
|
2098
|
-
|
2099
|
-
|
2097
|
+
if(getPassageMarkerFinish(slowMarker) != 0)
|
2098
|
+
slowLength = slowToFastMapping[getPassageMarkerFinish(slowMarker) - 1];
|
2099
|
+
else
|
2100
|
+
slowLength = slowToFastMapping[0];
|
2101
|
+
|
2100
2102
|
if (slowLength < slowConstraint)
|
2101
2103
|
slowLength = slowConstraint;
|
2102
2104
|
}
|
@@ -2165,7 +2167,7 @@ static void cleanUpRedundancy()
|
|
2165
2167
|
|
2166
2168
|
//velvetLog("Concatenation\n");
|
2167
2169
|
|
2168
|
-
// Freeing up memory
|
2170
|
+
// Freeing up memory
|
2169
2171
|
if (slowMarker != NULL_IDX)
|
2170
2172
|
concatenatePathNodes(slowPath);
|
2171
2173
|
else
|
@@ -2458,7 +2460,7 @@ void clipTipsHard(Graph * graph, boolean conserveLong)
|
|
2458
2460
|
|
2459
2461
|
if (current == NULL)
|
2460
2462
|
continue;
|
2461
|
-
|
2463
|
+
|
2462
2464
|
if (conserveLong && getMarker(current))
|
2463
2465
|
continue;
|
2464
2466
|
|
@@ -2558,7 +2560,7 @@ void correctGraph(Graph * argGraph, ShortLength * argSequenceLengths, Category *
|
|
2558
2560
|
eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
|
2559
2561
|
progressStatus = callocOrExit(2 * nodes + 1, boolean);
|
2560
2562
|
todoLists = callocOrExit(2 * nodes + 1, Ticket *);
|
2561
|
-
//Done with memory
|
2563
|
+
//Done with memory
|
2562
2564
|
|
2563
2565
|
resetNodeStatus(graph);
|
2564
2566
|
determineEligibleStartingPoints();
|
@@ -2605,9 +2607,9 @@ void setMaxReadLength(int value)
|
|
2605
2607
|
if (value < 0) {
|
2606
2608
|
velvetLog("Negative branch length %i!\n", value);
|
2607
2609
|
velvetLog("Exiting...\n");
|
2608
|
-
#ifdef DEBUG
|
2610
|
+
#ifdef DEBUG
|
2609
2611
|
abort();
|
2610
|
-
#endif
|
2612
|
+
#endif
|
2611
2613
|
exit(1);
|
2612
2614
|
}
|
2613
2615
|
MAXREADLENGTH = value;
|
@@ -2619,9 +2621,9 @@ void setMaxGaps(int value)
|
|
2619
2621
|
if (value < 0) {
|
2620
2622
|
velvetLog("Negative max gap count %i!\n", value);
|
2621
2623
|
velvetLog("Exiting...\n");
|
2622
|
-
#ifdef DEBUG
|
2624
|
+
#ifdef DEBUG
|
2623
2625
|
abort();
|
2624
|
-
#endif
|
2626
|
+
#endif
|
2625
2627
|
exit(1);
|
2626
2628
|
}
|
2627
2629
|
MAXGAPS = value;
|
@@ -2633,9 +2635,9 @@ void setMaxDivergence(double value)
|
|
2633
2635
|
velvetLog("Divergence rate %lf out of bounds [0,1]!\n",
|
2634
2636
|
value);
|
2635
2637
|
velvetLog("Exiting...\n");
|
2636
|
-
#ifdef DEBUG
|
2638
|
+
#ifdef DEBUG
|
2637
2639
|
abort();
|
2638
|
-
#endif
|
2640
|
+
#endif
|
2639
2641
|
exit(1);
|
2640
2642
|
}
|
2641
2643
|
MAXDIVERGENCE = value;
|
data/ext/src/src/graph.c
CHANGED
@@ -906,7 +906,7 @@ void appendDescriptors(Node * destination, Node * source)
|
|
906
906
|
twinDestination->length = newLength;
|
907
907
|
}
|
908
908
|
|
909
|
-
static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
|
909
|
+
static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
|
910
910
|
{
|
911
911
|
Coordinate index;
|
912
912
|
Nucleotide nucleotide;
|
@@ -917,7 +917,7 @@ static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Des
|
|
917
917
|
}
|
918
918
|
}
|
919
919
|
|
920
|
-
static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
|
920
|
+
static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
|
921
921
|
{
|
922
922
|
Coordinate shift = totalLength - destinationLength - sourceLength;
|
923
923
|
Coordinate index;
|
@@ -2052,7 +2052,7 @@ Graph *importGraph(char *filename)
|
|
2052
2052
|
short short_var;
|
2053
2053
|
char c;
|
2054
2054
|
|
2055
|
-
if (file == NULL)
|
2055
|
+
if (file == NULL)
|
2056
2056
|
exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
|
2057
2057
|
|
2058
2058
|
velvetLog("Reading graph file %s\n", filename);
|
@@ -2204,9 +2204,9 @@ Graph *importGraph(char *filename)
|
|
2204
2204
|
velvetLog
|
2205
2205
|
("ERROR: reading in graph - only %d items read for line '%s'",
|
2206
2206
|
sCount, line);
|
2207
|
-
#ifdef DEBUG
|
2207
|
+
#ifdef DEBUG
|
2208
2208
|
abort();
|
2209
|
-
#endif
|
2209
|
+
#endif
|
2210
2210
|
exit(1);
|
2211
2211
|
}
|
2212
2212
|
newMarker =
|
@@ -2404,7 +2404,7 @@ Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
|
|
2404
2404
|
#endif
|
2405
2405
|
}
|
2406
2406
|
}
|
2407
|
-
|
2407
|
+
|
2408
2408
|
index++;
|
2409
2409
|
}
|
2410
2410
|
|
@@ -2506,7 +2506,7 @@ Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_str
|
|
2506
2506
|
twin = node->twinNode;
|
2507
2507
|
twin->length = node->length;
|
2508
2508
|
twin->descriptor =
|
2509
|
-
callocOrExit(arrayLength, Descriptor);
|
2509
|
+
callocOrExit(arrayLength, Descriptor);
|
2510
2510
|
|
2511
2511
|
index = 0;
|
2512
2512
|
while ((c = getc(file)) != '\n') {
|
@@ -3145,9 +3145,13 @@ ShortReadMarker *extractFrontOfNodeReads(Node * node,
|
|
3145
3145
|
}
|
3146
3146
|
|
3147
3147
|
free(sourceArray);
|
3148
|
-
graph->nodeReads[sourceID] = newArray;
|
3149
3148
|
graph->nodeReadCounts[sourceID] = newLength;
|
3150
3149
|
|
3150
|
+
if(newLength > 0)
|
3151
|
+
graph->nodeReads[sourceID] = newArray;
|
3152
|
+
else
|
3153
|
+
graph->nodeReads[sourceID] = NULL;
|
3154
|
+
|
3151
3155
|
*length = mergeLength;
|
3152
3156
|
return mergeArray;
|
3153
3157
|
}
|
@@ -3275,9 +3279,13 @@ ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
|
|
3275
3279
|
}
|
3276
3280
|
|
3277
3281
|
free(sourceArray);
|
3278
|
-
graph->nodeReads[sourceID] = newArray;
|
3279
3282
|
graph->nodeReadCounts[sourceID] = newLength;
|
3280
3283
|
|
3284
|
+
if(newLength > 0)
|
3285
|
+
graph->nodeReads[sourceID] = newArray;
|
3286
|
+
else
|
3287
|
+
graph->nodeReads[sourceID] = NULL;
|
3288
|
+
|
3281
3289
|
*length = mergeLength;
|
3282
3290
|
return mergeArray;
|
3283
3291
|
}
|
@@ -3321,8 +3329,13 @@ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
|
|
3321
3329
|
sourceIndex++;
|
3322
3330
|
}
|
3323
3331
|
|
3324
|
-
graph->nodeReads[targetID] = mergeArray;
|
3325
3332
|
graph->nodeReadCounts[targetID] = sourceLength;
|
3333
|
+
|
3334
|
+
if(sourceLength > 0)
|
3335
|
+
graph->nodeReads[targetID] = mergeArray;
|
3336
|
+
else
|
3337
|
+
graph->nodeReads[targetID] = NULL;
|
3338
|
+
|
3326
3339
|
return;
|
3327
3340
|
}
|
3328
3341
|
|
@@ -3402,8 +3415,12 @@ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
|
|
3402
3415
|
}
|
3403
3416
|
|
3404
3417
|
free(targetArray);
|
3405
|
-
graph->nodeReads[targetID] = mergeArray;
|
3406
3418
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3419
|
+
|
3420
|
+
if(mergeLength > 0)
|
3421
|
+
graph->nodeReads[targetID] = mergeArray;
|
3422
|
+
else
|
3423
|
+
graph->nodeReads[targetID] = NULL;
|
3407
3424
|
}
|
3408
3425
|
|
3409
3426
|
static inline Coordinate min(Coordinate A, Coordinate B)
|
@@ -3437,8 +3454,13 @@ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
|
|
3437
3454
|
|
3438
3455
|
if (targetLength == 0) {
|
3439
3456
|
free(targetArray);
|
3440
|
-
graph->nodeReads[targetID] = sourceArray;
|
3441
3457
|
graph->nodeReadCounts[targetID] = sourceLength;
|
3458
|
+
|
3459
|
+
if(sourceLength > 0)
|
3460
|
+
graph->nodeReads[targetID] = sourceArray;
|
3461
|
+
else
|
3462
|
+
graph->nodeReads[targetID] = NULL;
|
3463
|
+
|
3442
3464
|
return;
|
3443
3465
|
}
|
3444
3466
|
|
@@ -3548,9 +3570,13 @@ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
|
|
3548
3570
|
}
|
3549
3571
|
|
3550
3572
|
free(targetArray);
|
3551
|
-
graph->nodeReads[targetID] = mergeArray;
|
3552
3573
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3553
3574
|
|
3575
|
+
if(mergeLength > 0)
|
3576
|
+
graph->nodeReads[targetID] = mergeArray;
|
3577
|
+
else
|
3578
|
+
graph->nodeReads[targetID] = NULL;
|
3579
|
+
|
3554
3580
|
free(sourceArray);
|
3555
3581
|
}
|
3556
3582
|
|
@@ -3693,12 +3719,20 @@ void foldSymmetricalNodeReads(Node * node, Graph * graph)
|
|
3693
3719
|
}
|
3694
3720
|
|
3695
3721
|
free(targetArray);
|
3696
|
-
graph->nodeReads[targetID] = mergeArray;
|
3697
3722
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3698
3723
|
|
3724
|
+
if(mergeLength > 0)
|
3725
|
+
graph->nodeReads[targetID] = mergeArray;
|
3726
|
+
else
|
3727
|
+
graph->nodeReads[targetID] = NULL;
|
3728
|
+
|
3699
3729
|
free(sourceArray);
|
3700
|
-
graph->nodeReads[sourceID] = mergeArray2;
|
3701
3730
|
graph->nodeReadCounts[sourceID] = mergeLength;
|
3731
|
+
|
3732
|
+
if(mergeLength > 0)
|
3733
|
+
graph->nodeReads[sourceID] = mergeArray2;
|
3734
|
+
else
|
3735
|
+
graph->nodeReads[sourceID] = NULL;
|
3702
3736
|
}
|
3703
3737
|
|
3704
3738
|
void shareReadStarts(Node * target, Node * source, Graph * graph)
|
@@ -3973,7 +4007,7 @@ void reallocateNodeDescriptor(Node * node, Coordinate length) {
|
|
3973
4007
|
nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
|
3974
4008
|
writeNucleotideInDescriptor(nucleotide, array, index + shift);
|
3975
4009
|
}
|
3976
|
-
|
4010
|
+
|
3977
4011
|
free(twin->descriptor);
|
3978
4012
|
twin->descriptor = array;
|
3979
4013
|
}
|