bio-velvet_underground 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4af6bceb99a775604acc0b87e641f42834bfe86f
|
4
|
+
data.tar.gz: bc8d0a17768ec2f94fd304c98a58505229b2a955
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f652e8333d0e2fa600bab4a9f3c9a0dc5bb4670b0a9caef3cbbf2999f3435068e22bd65b41c43671910a6eaf808af70a2afa5e4787aa1d90cda347c96ec3a510
|
7
|
+
data.tar.gz: d00da35ed8e0926392a60ce93251d293ccb032803fb3e0b0ec81a9477ff7d6f96a452a75caa36e36a97d765e7aff33821bc73a354c2b74c6c73fae9c87001f3d
|
data/.gitmodules
CHANGED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,34 +2,78 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/wwood/bioruby-velvet_underground)
|
4
4
|
|
5
|
-
This biogem is aimed at providing Ruby bindings to the velvet assembler's source code.
|
6
|
-
|
7
|
-
Note: this software is under active development!
|
5
|
+
This biogem is aimed at providing Ruby bindings to the velvet assembler's source code. See also [bio-velvet](https://github.com/wwood/bioruby-velvet) for Ruby code that does not bind the velvet C.
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
11
9
|
```sh
|
12
10
|
gem install bio-velvet_underground
|
13
11
|
```
|
12
|
+
This can take a few minutes as several versions of velvet with different kmer sizes are compiled.
|
14
13
|
|
15
14
|
## Usage
|
16
15
|
|
17
|
-
The
|
16
|
+
The code is intended to cater for a few specific purposes.
|
18
17
|
|
18
|
+
### Running velvet
|
19
|
+
Running velvet returns a `Result` object, which is effectively a pointer to a velvet result directory
|
19
20
|
```ruby
|
20
21
|
require 'bio-velvet_underground'
|
21
22
|
|
23
|
+
#kmer 29, '-short my.fasta' the argument to velveth, no special arguments given to velvetg.
|
24
|
+
result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'')
|
25
|
+
result.result_directory #=> path to temporary directory, containing velvet generated files e.g. contigs.fna
|
26
|
+
|
27
|
+
# A pre-defined velvet result directory:
|
28
|
+
result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'',:output_assembly_path => '/path/to/result')
|
29
|
+
result.result_directory #=> '/path/to/result'
|
30
|
+
```
|
31
|
+
With the magic of Ruby-FFI, the library with the smallest kmer size >= 29 is chosen (in this case 31).
|
32
|
+
Several libraries are pre-compiled at gem install-time, and then bound at runtime. `velveth` and `velvetg`
|
33
|
+
steps can be run separetely if required.
|
34
|
+
|
35
|
+
### Working with the binary sequence file
|
36
|
+
The binary sequence file created when velveth is run with the `-create_binary` flag.
|
37
|
+
|
38
|
+
```ruby
|
22
39
|
seqs = Bio::Velvet::Underground::BinarySequenceStore.new '/path/to/velvet/directory/CnyUnifiedSeq'
|
23
|
-
seqs[1] #=> 'CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGA'
|
24
40
|
seqs.length #=> 77 (there is 77 sequences in the CnyUnifiedSeq)
|
41
|
+
seqs[1] #=> 'CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGA'
|
42
|
+
seqs[0] #=> nil (indices map directly to the indices in other velvet files)
|
43
|
+
```
|
25
44
|
|
45
|
+
### Working with LastGraph file
|
46
|
+
```ruby
|
47
|
+
path = 'spec/data/3/Assem/LastGraph'
|
48
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path #=> Bio::Velvet::Underground::Graph object
|
49
|
+
|
50
|
+
graph.hash_length #=> 31 (kmer length)
|
51
|
+
graph.node_count #=> 4
|
52
|
+
|
53
|
+
graph.nodes[1] #=> Bio::Velvet::Underground::Graph::Node object
|
54
|
+
graph.nodes[2].ends_of_kmers_of_node #=> 'GTTTAAAAGAAGGAGATTACTTTATAAAA'
|
55
|
+
graph.nodes[2].coverages #=> [58,0] (coverages from different categories)
|
56
|
+
|
57
|
+
graph.nodes[1].short_reads #=> Array of Bio::Velvet::Underground::Graph::NodedRead objects
|
58
|
+
graph.nodes[1].short_reads[0].direction #=> true (i.e. forward w.r.t the node)
|
59
|
+
graph.nodes[1].short_reads[2].read_id #=> 4
|
26
60
|
```
|
61
|
+
There are more to these objects - see the documention.
|
62
|
+
|
63
|
+
|
64
|
+
Patches to these and other parts of velvet welcome.
|
65
|
+
|
66
|
+
## Development practice
|
67
|
+
|
68
|
+
The velvet C code 'underground' here is for the most part vanilla velvet code as you might expect.
|
69
|
+
However some changes were necessary to allow binding from this biogem. For instance the library
|
70
|
+
does not write to `$stdout` as this interferes with Ruby's writes to `$stdout`.
|
27
71
|
|
28
|
-
|
72
|
+
There are also some extra options for controlling velvet's behaviour, geared towards taking
|
73
|
+
some of the guesswork out of the assembly process at the expense of a less resolved `LastGraph`.
|
74
|
+
These are currently non-standard modifications - get in touch with @wwood if you are interested.
|
75
|
+
Not invoking these options should leave 'normal' velvet behaviour intact.
|
29
76
|
|
30
|
-
The API doc is online. For more code examples see the test files in
|
31
|
-
the source tree.
|
32
|
-
|
33
77
|
## Project home page
|
34
78
|
|
35
79
|
Information on the source tree, documentation, examples, issues and
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/ext/mkrf_conf.rb
CHANGED
@@ -22,15 +22,33 @@ require 'rake/clean'
|
|
22
22
|
path = File.expand_path(File.dirname(__FILE__))
|
23
23
|
path_external = File.join(File.dirname(__FILE__), "../lib/bio-velvet_underground/external")
|
24
24
|
|
25
|
+
# Require constants - code shared between before and after installation
|
26
|
+
require File.join(File.dirname(__FILE__), "../lib/bio-velvet_underground/constants")
|
27
|
+
|
25
28
|
task :compile do
|
26
29
|
cd(File.join(File.dirname(__FILE__),'src')) do
|
27
|
-
sh "patch -p1 < ../bioruby.patch"
|
28
30
|
case Config::CONFIG['host_os']
|
29
31
|
when /linux/
|
30
|
-
|
31
|
-
|
32
|
+
|
33
|
+
# Create library with default install params
|
34
|
+
$stdout.puts "Making velvet shared library with default parameters"
|
35
|
+
sh "make shared"
|
36
|
+
shared_location = 'obj/shared'
|
37
|
+
cp(File.join(shared_location,"libvelvet.so.1.0"), path_external)
|
38
|
+
$stdout.puts "Finished installing default library version"
|
39
|
+
# Create libraries with larger non-default kmer sizes
|
40
|
+
Bio::Velvet::Underground.max_kmers.each do |max_kmer|
|
41
|
+
next if max_kmer == Bio::Velvet::Underground::DEFAULT_MAXKMERLENGTH
|
42
|
+
|
43
|
+
$stdout.puts "Making velvet shared library with kmer "+max_kmer.to_s
|
44
|
+
library_name = File.basename Bio::Velvet::Underground.library_location_of(max_kmer)
|
45
|
+
sh "make clean; make MAXKMERLENGTH="+max_kmer.to_s+" shared"
|
46
|
+
cp(File.join(shared_location,"libvelvet.so.1.0"),
|
47
|
+
File.join(path_external,File.basename(library_name)))
|
48
|
+
end
|
49
|
+
|
32
50
|
when /darwin/
|
33
|
-
|
51
|
+
raise NotImplementedError, "possibly will work, but bio-velvet_underground is not tested on OSX"
|
34
52
|
when /mswin|mingw/ then raise NotImplementedError, "bio-velvet_underground library is not available for Windows platform"
|
35
53
|
end #case
|
36
54
|
end #cd
|
data/ext/src/Makefile
CHANGED
@@ -20,14 +20,14 @@ endif
|
|
20
20
|
|
21
21
|
ifdef VBIGASSEMBLY
|
22
22
|
override DEF := $(DEF) -D BIGASSEMBLY -D VBIGASSEMBLY
|
23
|
-
endif
|
23
|
+
endif
|
24
24
|
|
25
25
|
|
26
26
|
ifdef LONGSEQUENCES
|
27
27
|
override DEF := $(DEF) -D LONGSEQUENCES
|
28
28
|
endif
|
29
29
|
|
30
|
-
# OpenMP
|
30
|
+
# OpenMP
|
31
31
|
ifdef OPENMP
|
32
32
|
override CFLAGS := $(CFLAGS) -fopenmp
|
33
33
|
endif
|
@@ -39,23 +39,24 @@ endif
|
|
39
39
|
|
40
40
|
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
41
41
|
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
42
|
+
OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
42
43
|
|
43
|
-
default :
|
44
|
+
default : zlib obj velveth velvetg
|
44
45
|
|
45
46
|
clean : clean-zlib
|
46
|
-
-rm obj/*.o obj/dbg/*.o ./velvet*
|
47
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
|
47
48
|
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
48
49
|
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
49
50
|
|
50
|
-
cleanobj:
|
51
|
-
-rm obj/*.o obj/dbg/*.o
|
51
|
+
cleanobj:
|
52
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o
|
52
53
|
|
53
54
|
ifdef BUNDLEDZLIB
|
54
55
|
Z_LIB_DIR=third-party/zlib-1.2.3
|
55
56
|
Z_LIB_FILES=$(Z_LIB_DIR)/*.o
|
56
57
|
override DEF := $(DEF) -D BUNDLEDZLIB
|
57
58
|
|
58
|
-
zlib:
|
59
|
+
zlib:
|
59
60
|
cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o
|
60
61
|
|
61
62
|
clean-zlib :
|
@@ -67,14 +68,14 @@ zlib :
|
|
67
68
|
clean-zlib :
|
68
69
|
endif
|
69
70
|
|
70
|
-
velveth : obj
|
71
|
+
velveth : obj
|
71
72
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/binarySequences.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
72
73
|
|
73
74
|
|
74
75
|
velvetg : obj
|
75
76
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
76
77
|
|
77
|
-
debug : override DEF := $(DEF) -D DEBUG
|
78
|
+
debug : override DEF := $(DEF) -D DEBUG
|
78
79
|
debug : cleanobj obj/dbg
|
79
80
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
80
81
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
@@ -92,25 +93,25 @@ colordebug : cleanobj obj/dbg_de
|
|
92
93
|
objdir:
|
93
94
|
mkdir -p obj
|
94
95
|
|
95
|
-
obj: zlib
|
96
|
+
obj: zlib objdir $(OBJ)
|
96
97
|
|
97
98
|
obj_de: override DEF := $(DEF) -D COLOR
|
98
99
|
obj_de: zlib cleanobj objdir $(OBJ)
|
99
100
|
|
100
|
-
obj/dbgdir:
|
101
|
+
obj/dbgdir:
|
101
102
|
mkdir -p obj/dbg
|
102
103
|
|
103
|
-
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
|
+
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
105
|
obj/dbg: zlib cleanobj obj/dbgdir $(OBJDBG)
|
105
106
|
|
106
107
|
obj/dbg_de: override DEF := $(DEF) -D COLOR -D DEBUG
|
107
108
|
obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG)
|
108
109
|
|
109
110
|
obj/%.o: src/%.c
|
110
|
-
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
|
+
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
112
|
|
112
113
|
obj/dbg/%.o: src/%.c
|
113
|
-
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
|
+
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
115
|
|
115
116
|
doc: Manual.pdf
|
116
117
|
|
@@ -123,3 +124,15 @@ endif
|
|
123
124
|
|
124
125
|
test: velvetg velveth
|
125
126
|
cd tests && ./run-tests.sh
|
127
|
+
|
128
|
+
sharedobjdir:
|
129
|
+
mkdir -p obj/shared
|
130
|
+
|
131
|
+
obj/shared: sharedobjdir $(OBJSHARED)
|
132
|
+
|
133
|
+
obj/shared/%.o: src/%.c
|
134
|
+
$(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
135
|
+
|
136
|
+
shared: zlib obj/shared
|
137
|
+
cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o run.o run2.o
|
138
|
+
|
@@ -47,7 +47,7 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
47
47
|
hasSinglePreArc_pg(getOtherEnd_pg
|
48
48
|
(preArc, preNodeBID),
|
49
49
|
preGraph)
|
50
|
-
&& !isLoop_pg(preArc)
|
50
|
+
&& !isLoop_pg(preArc)
|
51
51
|
&& getDestination_pg(preArc, preNodeBID) != preNodeAID) {
|
52
52
|
|
53
53
|
totalLength += getPreNodeLength_pg(preNodeBID, preGraph);
|
@@ -99,11 +99,11 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
99
99
|
appendDescriptors_pg(&ptr, &writeOffset ,currentPreNodeID, preGraph, false);
|
100
100
|
}
|
101
101
|
|
102
|
-
if (writeOffset != 0)
|
102
|
+
if (writeOffset != 0)
|
103
103
|
while (writeOffset++ != 4)
|
104
104
|
(*ptr) >>= 2;
|
105
105
|
|
106
|
-
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
106
|
+
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
107
107
|
|
108
108
|
// Correct preArcs
|
109
109
|
for (preArc = getPreArc_pg(preNodeBID, preGraph); preArc != NULL_IDX;
|
@@ -209,7 +209,7 @@ static boolean isEligibleTip(IDnum index, PreGraph * preGraph, Coordinate
|
|
209
209
|
if (currentIndex == 0)
|
210
210
|
return true;
|
211
211
|
|
212
|
-
// Joined tips
|
212
|
+
// Joined tips
|
213
213
|
if (simplePreArcCount_pg(-currentIndex, preGraph) < 2)
|
214
214
|
return false;
|
215
215
|
|
@@ -764,7 +764,7 @@ static void remapNodeMarkersOntoNeighbour(Node * source,
|
|
764
764
|
}
|
765
765
|
}
|
766
766
|
|
767
|
-
// Short read markers
|
767
|
+
// Short read markers
|
768
768
|
if (readStartsAreActivated(graph)) {
|
769
769
|
// Update Coordinates
|
770
770
|
sourceArray = getNodeReads(source, graph);
|
@@ -1250,7 +1250,7 @@ static void foldSymmetricalNode(Node * node)
|
|
1250
1250
|
twinNode = node;
|
1251
1251
|
node = tmp;
|
1252
1252
|
}
|
1253
|
-
// Destroy link to old markers
|
1253
|
+
// Destroy link to old markers
|
1254
1254
|
setMarker(node, NULL_IDX);
|
1255
1255
|
|
1256
1256
|
// Reinsert markers properly
|
@@ -1951,7 +1951,7 @@ static void transferNodeData(Node * source, Node * target)
|
|
1951
1951
|
if (getNode(fastPath) == twinSource)
|
1952
1952
|
fastPath = getNextInSequence(fastPath);
|
1953
1953
|
|
1954
|
-
// Next node
|
1954
|
+
// Next node
|
1955
1955
|
if (source == activeNode) {
|
1956
1956
|
activeNode = target;
|
1957
1957
|
todo =
|
@@ -2013,7 +2013,7 @@ static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
|
|
2013
2013
|
// Read starts
|
2014
2014
|
concatenateReadStarts(nodeA, nodeB, graph);
|
2015
2015
|
|
2016
|
-
// Descriptor management
|
2016
|
+
// Descriptor management
|
2017
2017
|
appendDescriptors(nodeA, nodeB);
|
2018
2018
|
|
2019
2019
|
// Update uniqueness:
|
@@ -2094,9 +2094,11 @@ static void cleanUpRedundancy()
|
|
2094
2094
|
if (isTerminal(slowMarker))
|
2095
2095
|
slowLength = finalLength;
|
2096
2096
|
else {
|
2097
|
-
|
2098
|
-
|
2099
|
-
|
2097
|
+
if(getPassageMarkerFinish(slowMarker) != 0)
|
2098
|
+
slowLength = slowToFastMapping[getPassageMarkerFinish(slowMarker) - 1];
|
2099
|
+
else
|
2100
|
+
slowLength = slowToFastMapping[0];
|
2101
|
+
|
2100
2102
|
if (slowLength < slowConstraint)
|
2101
2103
|
slowLength = slowConstraint;
|
2102
2104
|
}
|
@@ -2165,7 +2167,7 @@ static void cleanUpRedundancy()
|
|
2165
2167
|
|
2166
2168
|
//velvetLog("Concatenation\n");
|
2167
2169
|
|
2168
|
-
// Freeing up memory
|
2170
|
+
// Freeing up memory
|
2169
2171
|
if (slowMarker != NULL_IDX)
|
2170
2172
|
concatenatePathNodes(slowPath);
|
2171
2173
|
else
|
@@ -2458,7 +2460,7 @@ void clipTipsHard(Graph * graph, boolean conserveLong)
|
|
2458
2460
|
|
2459
2461
|
if (current == NULL)
|
2460
2462
|
continue;
|
2461
|
-
|
2463
|
+
|
2462
2464
|
if (conserveLong && getMarker(current))
|
2463
2465
|
continue;
|
2464
2466
|
|
@@ -2558,7 +2560,7 @@ void correctGraph(Graph * argGraph, ShortLength * argSequenceLengths, Category *
|
|
2558
2560
|
eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
|
2559
2561
|
progressStatus = callocOrExit(2 * nodes + 1, boolean);
|
2560
2562
|
todoLists = callocOrExit(2 * nodes + 1, Ticket *);
|
2561
|
-
//Done with memory
|
2563
|
+
//Done with memory
|
2562
2564
|
|
2563
2565
|
resetNodeStatus(graph);
|
2564
2566
|
determineEligibleStartingPoints();
|
@@ -2605,9 +2607,9 @@ void setMaxReadLength(int value)
|
|
2605
2607
|
if (value < 0) {
|
2606
2608
|
velvetLog("Negative branch length %i!\n", value);
|
2607
2609
|
velvetLog("Exiting...\n");
|
2608
|
-
#ifdef DEBUG
|
2610
|
+
#ifdef DEBUG
|
2609
2611
|
abort();
|
2610
|
-
#endif
|
2612
|
+
#endif
|
2611
2613
|
exit(1);
|
2612
2614
|
}
|
2613
2615
|
MAXREADLENGTH = value;
|
@@ -2619,9 +2621,9 @@ void setMaxGaps(int value)
|
|
2619
2621
|
if (value < 0) {
|
2620
2622
|
velvetLog("Negative max gap count %i!\n", value);
|
2621
2623
|
velvetLog("Exiting...\n");
|
2622
|
-
#ifdef DEBUG
|
2624
|
+
#ifdef DEBUG
|
2623
2625
|
abort();
|
2624
|
-
#endif
|
2626
|
+
#endif
|
2625
2627
|
exit(1);
|
2626
2628
|
}
|
2627
2629
|
MAXGAPS = value;
|
@@ -2633,9 +2635,9 @@ void setMaxDivergence(double value)
|
|
2633
2635
|
velvetLog("Divergence rate %lf out of bounds [0,1]!\n",
|
2634
2636
|
value);
|
2635
2637
|
velvetLog("Exiting...\n");
|
2636
|
-
#ifdef DEBUG
|
2638
|
+
#ifdef DEBUG
|
2637
2639
|
abort();
|
2638
|
-
#endif
|
2640
|
+
#endif
|
2639
2641
|
exit(1);
|
2640
2642
|
}
|
2641
2643
|
MAXDIVERGENCE = value;
|
data/ext/src/src/graph.c
CHANGED
@@ -906,7 +906,7 @@ void appendDescriptors(Node * destination, Node * source)
|
|
906
906
|
twinDestination->length = newLength;
|
907
907
|
}
|
908
908
|
|
909
|
-
static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
|
909
|
+
static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
|
910
910
|
{
|
911
911
|
Coordinate index;
|
912
912
|
Nucleotide nucleotide;
|
@@ -917,7 +917,7 @@ static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Des
|
|
917
917
|
}
|
918
918
|
}
|
919
919
|
|
920
|
-
static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
|
920
|
+
static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
|
921
921
|
{
|
922
922
|
Coordinate shift = totalLength - destinationLength - sourceLength;
|
923
923
|
Coordinate index;
|
@@ -2052,7 +2052,7 @@ Graph *importGraph(char *filename)
|
|
2052
2052
|
short short_var;
|
2053
2053
|
char c;
|
2054
2054
|
|
2055
|
-
if (file == NULL)
|
2055
|
+
if (file == NULL)
|
2056
2056
|
exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
|
2057
2057
|
|
2058
2058
|
velvetLog("Reading graph file %s\n", filename);
|
@@ -2204,9 +2204,9 @@ Graph *importGraph(char *filename)
|
|
2204
2204
|
velvetLog
|
2205
2205
|
("ERROR: reading in graph - only %d items read for line '%s'",
|
2206
2206
|
sCount, line);
|
2207
|
-
#ifdef DEBUG
|
2207
|
+
#ifdef DEBUG
|
2208
2208
|
abort();
|
2209
|
-
#endif
|
2209
|
+
#endif
|
2210
2210
|
exit(1);
|
2211
2211
|
}
|
2212
2212
|
newMarker =
|
@@ -2404,7 +2404,7 @@ Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
|
|
2404
2404
|
#endif
|
2405
2405
|
}
|
2406
2406
|
}
|
2407
|
-
|
2407
|
+
|
2408
2408
|
index++;
|
2409
2409
|
}
|
2410
2410
|
|
@@ -2506,7 +2506,7 @@ Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_str
|
|
2506
2506
|
twin = node->twinNode;
|
2507
2507
|
twin->length = node->length;
|
2508
2508
|
twin->descriptor =
|
2509
|
-
callocOrExit(arrayLength, Descriptor);
|
2509
|
+
callocOrExit(arrayLength, Descriptor);
|
2510
2510
|
|
2511
2511
|
index = 0;
|
2512
2512
|
while ((c = getc(file)) != '\n') {
|
@@ -3145,9 +3145,13 @@ ShortReadMarker *extractFrontOfNodeReads(Node * node,
|
|
3145
3145
|
}
|
3146
3146
|
|
3147
3147
|
free(sourceArray);
|
3148
|
-
graph->nodeReads[sourceID] = newArray;
|
3149
3148
|
graph->nodeReadCounts[sourceID] = newLength;
|
3150
3149
|
|
3150
|
+
if(newLength > 0)
|
3151
|
+
graph->nodeReads[sourceID] = newArray;
|
3152
|
+
else
|
3153
|
+
graph->nodeReads[sourceID] = NULL;
|
3154
|
+
|
3151
3155
|
*length = mergeLength;
|
3152
3156
|
return mergeArray;
|
3153
3157
|
}
|
@@ -3275,9 +3279,13 @@ ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
|
|
3275
3279
|
}
|
3276
3280
|
|
3277
3281
|
free(sourceArray);
|
3278
|
-
graph->nodeReads[sourceID] = newArray;
|
3279
3282
|
graph->nodeReadCounts[sourceID] = newLength;
|
3280
3283
|
|
3284
|
+
if(newLength > 0)
|
3285
|
+
graph->nodeReads[sourceID] = newArray;
|
3286
|
+
else
|
3287
|
+
graph->nodeReads[sourceID] = NULL;
|
3288
|
+
|
3281
3289
|
*length = mergeLength;
|
3282
3290
|
return mergeArray;
|
3283
3291
|
}
|
@@ -3321,8 +3329,13 @@ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
|
|
3321
3329
|
sourceIndex++;
|
3322
3330
|
}
|
3323
3331
|
|
3324
|
-
graph->nodeReads[targetID] = mergeArray;
|
3325
3332
|
graph->nodeReadCounts[targetID] = sourceLength;
|
3333
|
+
|
3334
|
+
if(sourceLength > 0)
|
3335
|
+
graph->nodeReads[targetID] = mergeArray;
|
3336
|
+
else
|
3337
|
+
graph->nodeReads[targetID] = NULL;
|
3338
|
+
|
3326
3339
|
return;
|
3327
3340
|
}
|
3328
3341
|
|
@@ -3402,8 +3415,12 @@ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
|
|
3402
3415
|
}
|
3403
3416
|
|
3404
3417
|
free(targetArray);
|
3405
|
-
graph->nodeReads[targetID] = mergeArray;
|
3406
3418
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3419
|
+
|
3420
|
+
if(mergeLength > 0)
|
3421
|
+
graph->nodeReads[targetID] = mergeArray;
|
3422
|
+
else
|
3423
|
+
graph->nodeReads[targetID] = NULL;
|
3407
3424
|
}
|
3408
3425
|
|
3409
3426
|
static inline Coordinate min(Coordinate A, Coordinate B)
|
@@ -3437,8 +3454,13 @@ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
|
|
3437
3454
|
|
3438
3455
|
if (targetLength == 0) {
|
3439
3456
|
free(targetArray);
|
3440
|
-
graph->nodeReads[targetID] = sourceArray;
|
3441
3457
|
graph->nodeReadCounts[targetID] = sourceLength;
|
3458
|
+
|
3459
|
+
if(sourceLength > 0)
|
3460
|
+
graph->nodeReads[targetID] = sourceArray;
|
3461
|
+
else
|
3462
|
+
graph->nodeReads[targetID] = NULL;
|
3463
|
+
|
3442
3464
|
return;
|
3443
3465
|
}
|
3444
3466
|
|
@@ -3548,9 +3570,13 @@ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
|
|
3548
3570
|
}
|
3549
3571
|
|
3550
3572
|
free(targetArray);
|
3551
|
-
graph->nodeReads[targetID] = mergeArray;
|
3552
3573
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3553
3574
|
|
3575
|
+
if(mergeLength > 0)
|
3576
|
+
graph->nodeReads[targetID] = mergeArray;
|
3577
|
+
else
|
3578
|
+
graph->nodeReads[targetID] = NULL;
|
3579
|
+
|
3554
3580
|
free(sourceArray);
|
3555
3581
|
}
|
3556
3582
|
|
@@ -3693,12 +3719,20 @@ void foldSymmetricalNodeReads(Node * node, Graph * graph)
|
|
3693
3719
|
}
|
3694
3720
|
|
3695
3721
|
free(targetArray);
|
3696
|
-
graph->nodeReads[targetID] = mergeArray;
|
3697
3722
|
graph->nodeReadCounts[targetID] = mergeLength;
|
3698
3723
|
|
3724
|
+
if(mergeLength > 0)
|
3725
|
+
graph->nodeReads[targetID] = mergeArray;
|
3726
|
+
else
|
3727
|
+
graph->nodeReads[targetID] = NULL;
|
3728
|
+
|
3699
3729
|
free(sourceArray);
|
3700
|
-
graph->nodeReads[sourceID] = mergeArray2;
|
3701
3730
|
graph->nodeReadCounts[sourceID] = mergeLength;
|
3731
|
+
|
3732
|
+
if(mergeLength > 0)
|
3733
|
+
graph->nodeReads[sourceID] = mergeArray2;
|
3734
|
+
else
|
3735
|
+
graph->nodeReads[sourceID] = NULL;
|
3702
3736
|
}
|
3703
3737
|
|
3704
3738
|
void shareReadStarts(Node * target, Node * source, Graph * graph)
|
@@ -3973,7 +4007,7 @@ void reallocateNodeDescriptor(Node * node, Coordinate length) {
|
|
3973
4007
|
nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
|
3974
4008
|
writeNucleotideInDescriptor(nucleotide, array, index + shift);
|
3975
4009
|
}
|
3976
|
-
|
4010
|
+
|
3977
4011
|
free(twin->descriptor);
|
3978
4012
|
twin->descriptor = array;
|
3979
4013
|
}
|