bio-velvet_underground 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -5
- data/VERSION +1 -1
- data/ext/src/Makefile +16 -29
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +13 -13
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +37 -51
- data/ext/src/src/scaffold.h +1 -1
- data/ext/src/src/utility.c +9 -10
- data/lib/bio-velvet_underground/binary_sequence_store.rb +18 -1
- data/lib/bio-velvet_underground/graph.rb +5 -4
- data/spec/binary_sequence_store_spec.rb +9 -0
- data/spec/data/5_singles_and_pairs/CnyUnifiedSeq +0 -0
- data/spec/data/5_singles_and_pairs/CnyUnifiedSeq.names +100000 -0
- data/spec/data/5_singles_and_pairs/HOWTO_RECREATE.txt +1 -0
- data/spec/data/5_singles_and_pairs/Log +20 -0
- data/spec/data/5_singles_and_pairs/Roadmaps +293985 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 696cf2a0e11ae0c6848fe4d717e1add4e1452370
|
4
|
+
data.tar.gz: 3b88bb295352c038eddeb118835e88596b66641b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 464490f17d6ab3a98c47c4a4fe7f7dcd08359ba02284986a45119106d8a29da620c1417c1af0d1615199fc912adb0331fdf66242968f5b63f8e2cc03cdf4e1f2
|
7
|
+
data.tar.gz: a0331bd42caecc5adbb2fde466e2c8ea6de71db07c621f844a8ccc9456e78ef13ea1ce41073052bd574e32655676c057d3854a44c9875bedad1314c322746345
|
data/README.md
CHANGED
@@ -20,12 +20,10 @@ Running velvet returns a `Result` object, which is effectively a pointer to a ve
|
|
20
20
|
```ruby
|
21
21
|
require 'bio-velvet_underground'
|
22
22
|
|
23
|
-
#kmer 29, '-short my.fasta' the
|
24
|
-
|
25
|
-
result.result_directory #=> path to temporary directory, containing velvet generated files e.g. contigs.fna
|
26
|
-
|
23
|
+
# Run assembly with kmer 29, '-short my.fasta' the arguments to velveth (not including kmer and directory),
|
24
|
+
# no special arguments given to velvetg.
|
27
25
|
# A pre-defined velvet result directory:
|
28
|
-
result = Bio::Velvet::Runner.new.velvet(29,
|
26
|
+
result = Bio::Velvet::Runner.new.velvet(29, %w(-short my.fasta),[],:output_assembly_path => '/path/to/result')
|
29
27
|
result.result_directory #=> '/path/to/result'
|
30
28
|
```
|
31
29
|
With the magic of Ruby-FFI, the library with the smallest kmer size >= 29 is chosen (in this case 31).
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/ext/src/Makefile
CHANGED
@@ -3,8 +3,8 @@ CFLAGS = -Wall
|
|
3
3
|
DEBUG = -g
|
4
4
|
LIBS = -lm
|
5
5
|
OPT = -O3
|
6
|
-
MAXKMERLENGTH
|
7
|
-
CATEGORIES
|
6
|
+
MAXKMERLENGTH?=31
|
7
|
+
CATEGORIES?=2
|
8
8
|
DEF = -D MAXKMERLENGTH=$(MAXKMERLENGTH) -D CATEGORIES=$(CATEGORIES)
|
9
9
|
PDFLATEX_VERSION := $(shell pdflatex --version 2> /dev/null)
|
10
10
|
|
@@ -20,14 +20,14 @@ endif
|
|
20
20
|
|
21
21
|
ifdef VBIGASSEMBLY
|
22
22
|
override DEF := $(DEF) -D BIGASSEMBLY -D VBIGASSEMBLY
|
23
|
-
endif
|
23
|
+
endif
|
24
24
|
|
25
25
|
|
26
26
|
ifdef LONGSEQUENCES
|
27
27
|
override DEF := $(DEF) -D LONGSEQUENCES
|
28
28
|
endif
|
29
29
|
|
30
|
-
# OpenMP
|
30
|
+
# OpenMP
|
31
31
|
ifdef OPENMP
|
32
32
|
override CFLAGS := $(CFLAGS) -fopenmp
|
33
33
|
endif
|
@@ -39,24 +39,23 @@ endif
|
|
39
39
|
|
40
40
|
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
41
41
|
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
42
|
-
OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
43
42
|
|
44
|
-
default : zlib obj velveth velvetg
|
43
|
+
default : cleanobj zlib obj velveth velvetg doc
|
45
44
|
|
46
45
|
clean : clean-zlib
|
47
|
-
-rm obj/*.o obj/dbg/*.o
|
46
|
+
-rm obj/*.o obj/dbg/*.o ./velvet*
|
48
47
|
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
49
48
|
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
50
49
|
|
51
|
-
cleanobj:
|
52
|
-
-rm obj/*.o obj/dbg/*.o
|
50
|
+
cleanobj:
|
51
|
+
-rm obj/*.o obj/dbg/*.o
|
53
52
|
|
54
53
|
ifdef BUNDLEDZLIB
|
55
54
|
Z_LIB_DIR=third-party/zlib-1.2.3
|
56
55
|
Z_LIB_FILES=$(Z_LIB_DIR)/*.o
|
57
56
|
override DEF := $(DEF) -D BUNDLEDZLIB
|
58
57
|
|
59
|
-
zlib:
|
58
|
+
zlib:
|
60
59
|
cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o
|
61
60
|
|
62
61
|
clean-zlib :
|
@@ -68,14 +67,14 @@ zlib :
|
|
68
67
|
clean-zlib :
|
69
68
|
endif
|
70
69
|
|
71
|
-
velveth : obj
|
70
|
+
velveth : obj
|
72
71
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/binarySequences.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
73
72
|
|
74
73
|
|
75
74
|
velvetg : obj
|
76
75
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
77
76
|
|
78
|
-
debug : override DEF := $(DEF) -D DEBUG
|
77
|
+
debug : override DEF := $(DEF) -D DEBUG
|
79
78
|
debug : cleanobj obj/dbg
|
80
79
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
81
80
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
@@ -93,25 +92,25 @@ colordebug : cleanobj obj/dbg_de
|
|
93
92
|
objdir:
|
94
93
|
mkdir -p obj
|
95
94
|
|
96
|
-
obj: zlib objdir $(OBJ)
|
95
|
+
obj: zlib cleanobj objdir $(OBJ)
|
97
96
|
|
98
97
|
obj_de: override DEF := $(DEF) -D COLOR
|
99
98
|
obj_de: zlib cleanobj objdir $(OBJ)
|
100
99
|
|
101
|
-
obj/dbgdir:
|
100
|
+
obj/dbgdir:
|
102
101
|
mkdir -p obj/dbg
|
103
102
|
|
104
|
-
obj/dbg: override DEF := $(DEF) -D DEBUG
|
103
|
+
obj/dbg: override DEF := $(DEF) -D DEBUG
|
105
104
|
obj/dbg: zlib cleanobj obj/dbgdir $(OBJDBG)
|
106
105
|
|
107
106
|
obj/dbg_de: override DEF := $(DEF) -D COLOR -D DEBUG
|
108
107
|
obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG)
|
109
108
|
|
110
109
|
obj/%.o: src/%.c
|
111
|
-
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
110
|
+
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
112
111
|
|
113
112
|
obj/dbg/%.o: src/%.c
|
114
|
-
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
113
|
+
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
115
114
|
|
116
115
|
doc: Manual.pdf
|
117
116
|
|
@@ -124,15 +123,3 @@ endif
|
|
124
123
|
|
125
124
|
test: velvetg velveth
|
126
125
|
cd tests && ./run-tests.sh
|
127
|
-
|
128
|
-
sharedobjdir:
|
129
|
-
mkdir -p obj/shared
|
130
|
-
|
131
|
-
obj/shared: sharedobjdir $(OBJSHARED)
|
132
|
-
|
133
|
-
obj/shared/%.o: src/%.c
|
134
|
-
$(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
135
|
-
|
136
|
-
shared: zlib obj/shared
|
137
|
-
cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o run.o run2.o
|
138
|
-
|
@@ -47,7 +47,7 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
47
47
|
hasSinglePreArc_pg(getOtherEnd_pg
|
48
48
|
(preArc, preNodeBID),
|
49
49
|
preGraph)
|
50
|
-
&& !isLoop_pg(preArc)
|
50
|
+
&& !isLoop_pg(preArc)
|
51
51
|
&& getDestination_pg(preArc, preNodeBID) != preNodeAID) {
|
52
52
|
|
53
53
|
totalLength += getPreNodeLength_pg(preNodeBID, preGraph);
|
@@ -99,11 +99,11 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
99
99
|
appendDescriptors_pg(&ptr, &writeOffset ,currentPreNodeID, preGraph, false);
|
100
100
|
}
|
101
101
|
|
102
|
-
if (writeOffset != 0)
|
102
|
+
if (writeOffset != 0)
|
103
103
|
while (writeOffset++ != 4)
|
104
104
|
(*ptr) >>= 2;
|
105
105
|
|
106
|
-
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
106
|
+
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
107
107
|
|
108
108
|
// Correct preArcs
|
109
109
|
for (preArc = getPreArc_pg(preNodeBID, preGraph); preArc != NULL_IDX;
|
@@ -209,7 +209,7 @@ static boolean isEligibleTip(IDnum index, PreGraph * preGraph, Coordinate
|
|
209
209
|
if (currentIndex == 0)
|
210
210
|
return true;
|
211
211
|
|
212
|
-
// Joined tips
|
212
|
+
// Joined tips
|
213
213
|
if (simplePreArcCount_pg(-currentIndex, preGraph) < 2)
|
214
214
|
return false;
|
215
215
|
|
@@ -764,7 +764,7 @@ static void remapNodeMarkersOntoNeighbour(Node * source,
|
|
764
764
|
}
|
765
765
|
}
|
766
766
|
|
767
|
-
// Short read markers
|
767
|
+
// Short read markers
|
768
768
|
if (readStartsAreActivated(graph)) {
|
769
769
|
// Update Coordinates
|
770
770
|
sourceArray = getNodeReads(source, graph);
|
@@ -1250,7 +1250,7 @@ static void foldSymmetricalNode(Node * node)
|
|
1250
1250
|
twinNode = node;
|
1251
1251
|
node = tmp;
|
1252
1252
|
}
|
1253
|
-
// Destroy link to old markers
|
1253
|
+
// Destroy link to old markers
|
1254
1254
|
setMarker(node, NULL_IDX);
|
1255
1255
|
|
1256
1256
|
// Reinsert markers properly
|
@@ -1951,7 +1951,7 @@ static void transferNodeData(Node * source, Node * target)
|
|
1951
1951
|
if (getNode(fastPath) == twinSource)
|
1952
1952
|
fastPath = getNextInSequence(fastPath);
|
1953
1953
|
|
1954
|
-
// Next node
|
1954
|
+
// Next node
|
1955
1955
|
if (source == activeNode) {
|
1956
1956
|
activeNode = target;
|
1957
1957
|
todo =
|
@@ -2013,7 +2013,7 @@ static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
|
|
2013
2013
|
// Read starts
|
2014
2014
|
concatenateReadStarts(nodeA, nodeB, graph);
|
2015
2015
|
|
2016
|
-
// Descriptor management
|
2016
|
+
// Descriptor management
|
2017
2017
|
appendDescriptors(nodeA, nodeB);
|
2018
2018
|
|
2019
2019
|
// Update uniqueness:
|
@@ -2167,7 +2167,7 @@ static void cleanUpRedundancy()
|
|
2167
2167
|
|
2168
2168
|
//velvetLog("Concatenation\n");
|
2169
2169
|
|
2170
|
-
// Freeing up memory
|
2170
|
+
// Freeing up memory
|
2171
2171
|
if (slowMarker != NULL_IDX)
|
2172
2172
|
concatenatePathNodes(slowPath);
|
2173
2173
|
else
|
@@ -2460,7 +2460,7 @@ void clipTipsHard(Graph * graph, boolean conserveLong)
|
|
2460
2460
|
|
2461
2461
|
if (current == NULL)
|
2462
2462
|
continue;
|
2463
|
-
|
2463
|
+
|
2464
2464
|
if (conserveLong && getMarker(current))
|
2465
2465
|
continue;
|
2466
2466
|
|
@@ -2560,7 +2560,7 @@ void correctGraph(Graph * argGraph, ShortLength * argSequenceLengths, Category *
|
|
2560
2560
|
eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
|
2561
2561
|
progressStatus = callocOrExit(2 * nodes + 1, boolean);
|
2562
2562
|
todoLists = callocOrExit(2 * nodes + 1, Ticket *);
|
2563
|
-
//Done with memory
|
2563
|
+
//Done with memory
|
2564
2564
|
|
2565
2565
|
resetNodeStatus(graph);
|
2566
2566
|
determineEligibleStartingPoints();
|
@@ -2607,9 +2607,9 @@ void setMaxReadLength(int value)
|
|
2607
2607
|
if (value < 0) {
|
2608
2608
|
velvetLog("Negative branch length %i!\n", value);
|
2609
2609
|
velvetLog("Exiting...\n");
|
2610
|
-
#ifdef DEBUG
|
2610
|
+
#ifdef DEBUG
|
2611
2611
|
abort();
|
2612
|
-
#endif
|
2612
|
+
#endif
|
2613
2613
|
exit(1);
|
2614
2614
|
}
|
2615
2615
|
MAXREADLENGTH = value;
|
@@ -2621,9 +2621,9 @@ void setMaxGaps(int value)
|
|
2621
2621
|
if (value < 0) {
|
2622
2622
|
velvetLog("Negative max gap count %i!\n", value);
|
2623
2623
|
velvetLog("Exiting...\n");
|
2624
|
-
#ifdef DEBUG
|
2624
|
+
#ifdef DEBUG
|
2625
2625
|
abort();
|
2626
|
-
#endif
|
2626
|
+
#endif
|
2627
2627
|
exit(1);
|
2628
2628
|
}
|
2629
2629
|
MAXGAPS = value;
|
@@ -2635,9 +2635,9 @@ void setMaxDivergence(double value)
|
|
2635
2635
|
velvetLog("Divergence rate %lf out of bounds [0,1]!\n",
|
2636
2636
|
value);
|
2637
2637
|
velvetLog("Exiting...\n");
|
2638
|
-
#ifdef DEBUG
|
2638
|
+
#ifdef DEBUG
|
2639
2639
|
abort();
|
2640
|
-
#endif
|
2640
|
+
#endif
|
2641
2641
|
exit(1);
|
2642
2642
|
}
|
2643
2643
|
MAXDIVERGENCE = value;
|
data/ext/src/src/graphStats.c
CHANGED
@@ -522,7 +522,7 @@ void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads)
|
|
522
522
|
}
|
523
523
|
|
524
524
|
velvetFprintf(outfile, "\t%li", (long) markerCount(node));
|
525
|
-
printShortCounts(outfile, node, graph, reads);
|
525
|
+
printShortCounts(outfile, node, graph, reads);
|
526
526
|
|
527
527
|
velvetFprintf(outfile, "\n");
|
528
528
|
}
|
@@ -579,7 +579,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
579
579
|
if (destination == NULL)
|
580
580
|
return;
|
581
581
|
|
582
|
-
// Eliminate those that point to uniquely strain sequences
|
582
|
+
// Eliminate those that point to uniquely strain sequences
|
583
583
|
if (nodeGenomicMultiplicity(destination, firstStrain) != 1) {
|
584
584
|
// velvetLog("Multiple genome reads\n");
|
585
585
|
return;
|
@@ -600,7 +600,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
600
600
|
if (destination == destination2)
|
601
601
|
return;
|
602
602
|
|
603
|
-
// Eliminate those that point to uniquely strain sequences
|
603
|
+
// Eliminate those that point to uniquely strain sequences
|
604
604
|
if (isOnlyGenome(destination2, firstStrain))
|
605
605
|
return;
|
606
606
|
|
@@ -776,7 +776,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
776
776
|
if (regions) {
|
777
777
|
lastRegion->next = newMask(stops[indexStop]);
|
778
778
|
lastRegion = lastRegion->next;
|
779
|
-
} else {
|
779
|
+
} else {
|
780
780
|
regions = newMask(stops[indexStop]);
|
781
781
|
lastRegion = regions;
|
782
782
|
}
|
@@ -803,7 +803,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
803
803
|
if (regions) {
|
804
804
|
lastRegion->next = newMask(stops[indexStop]);
|
805
805
|
lastRegion = lastRegion->next;
|
806
|
-
} else {
|
806
|
+
} else {
|
807
807
|
regions = newMask(stops[indexStop]);
|
808
808
|
lastRegion = regions;
|
809
809
|
}
|
@@ -837,7 +837,7 @@ static int compareCoords(const void * A, const void * B) {
|
|
837
837
|
|
838
838
|
static void sortCoords(Coordinate * array, IDnum length) {
|
839
839
|
qsort(array, (size_t) length, sizeof(Coordinate), compareCoords);
|
840
|
-
}
|
840
|
+
}
|
841
841
|
|
842
842
|
static void getShortReadCoords(Coordinate * starts, Coordinate * stops, Node * node, Graph * graph, ShortLength * readLengths) {
|
843
843
|
ShortReadMarker * markers = getNodeReads(node, graph);
|
@@ -918,7 +918,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
918
918
|
gap = getGap(node, graph);
|
919
919
|
for (position = 0; position < WORDLENGTH; position++) {
|
920
920
|
if (position % 60 == 0 && position > 0)
|
921
|
-
velvetFprintf(outfile, "\n");
|
921
|
+
velvetFprintf(outfile, "\n");
|
922
922
|
nucleotide = getNucleotideChar(position, tString);
|
923
923
|
velvetFprintf(outfile, "%c", nucleotide);
|
924
924
|
}
|
@@ -938,7 +938,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
938
938
|
mask->finish) {
|
939
939
|
next = mask->next;
|
940
940
|
deallocateMask(mask);
|
941
|
-
mask = next;
|
941
|
+
mask = next;
|
942
942
|
}
|
943
943
|
|
944
944
|
if (gap
|
@@ -1047,7 +1047,7 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1047
1047
|
Node * nodeB = *((Node **) B);
|
1048
1048
|
double covA;
|
1049
1049
|
double covB;
|
1050
|
-
|
1050
|
+
|
1051
1051
|
if (getNodeLength(nodeA) == 0)
|
1052
1052
|
nodeA = NULL;
|
1053
1053
|
|
@@ -1063,8 +1063,8 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1063
1063
|
return -1;
|
1064
1064
|
|
1065
1065
|
// Deal with real coverage numbers:
|
1066
|
-
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
-
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1066
|
+
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
+
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1068
1068
|
|
1069
1069
|
if (covA > covB)
|
1070
1070
|
return 1;
|
@@ -1075,14 +1075,14 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1075
1075
|
|
1076
1076
|
double estimated_cov(Graph * graph, char * directory)
|
1077
1077
|
{
|
1078
|
-
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1078
|
+
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1079
1079
|
IDnum index;
|
1080
1080
|
Coordinate halfTotalLength = 0;
|
1081
1081
|
Coordinate sumLength = 0;
|
1082
1082
|
Node *node;
|
1083
1083
|
char *logFilename =
|
1084
1084
|
mallocOrExit(strlen(directory) + 100, char);
|
1085
|
-
char *statsLine =
|
1085
|
+
char *statsLine =
|
1086
1086
|
mallocOrExit(5000, char);
|
1087
1087
|
FILE *logFile;
|
1088
1088
|
|
@@ -1152,7 +1152,7 @@ static boolean terminalReferenceMarker(Node * node, ReadSet * reads) {
|
|
1152
1152
|
|
1153
1153
|
static boolean hasReferenceMarker(Node * node, ReadSet * reads) {
|
1154
1154
|
PassageMarkerI marker;
|
1155
|
-
|
1155
|
+
|
1156
1156
|
for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker))
|
1157
1157
|
if (reads->categories[getAbsolutePassMarkerSeqID(marker) - 1] == REFERENCE)
|
1158
1158
|
return true;
|
@@ -1229,14 +1229,14 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1229
1229
|
IDnum index;
|
1230
1230
|
Node *node;
|
1231
1231
|
boolean denounceReads = readStartsAreActivated(graph);
|
1232
|
-
boolean *res = NULL;
|
1232
|
+
boolean *res = NULL;
|
1233
1233
|
FILE * outfile = NULL;
|
1234
1234
|
|
1235
1235
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1236
|
-
|
1236
|
+
|
1237
1237
|
if (denounceReads)
|
1238
1238
|
res = callocOrExit(sequenceCount(graph), boolean);
|
1239
|
-
|
1239
|
+
|
1240
1240
|
if (export) {
|
1241
1241
|
outfile = fopen(filename, "w");
|
1242
1242
|
|
@@ -1255,7 +1255,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1255
1255
|
if (getNodeLength(node) == 0)
|
1256
1256
|
continue;
|
1257
1257
|
|
1258
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1258
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1259
1259
|
&& !hasReferenceMarker(node, reads))
|
1260
1260
|
removeNodeAndDenounceDubiousReads(graph,
|
1261
1261
|
node,
|
@@ -1273,7 +1273,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1273
1273
|
if (getNodeLength(node) == 0)
|
1274
1274
|
continue;
|
1275
1275
|
|
1276
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1276
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1277
1277
|
&& !terminalReferenceMarker(node, reads))
|
1278
1278
|
removeNodeAndDenounceDubiousReads(graph,
|
1279
1279
|
node,
|
@@ -1296,7 +1296,7 @@ static Coordinate getLongCoverage(Node * node) {
|
|
1296
1296
|
|
1297
1297
|
for (marker = getMarker(node); marker; marker = getNextInNode(marker))
|
1298
1298
|
total += getPassageMarkerLength(marker);
|
1299
|
-
|
1299
|
+
|
1300
1300
|
return total;
|
1301
1301
|
}
|
1302
1302
|
|
@@ -1313,7 +1313,7 @@ void removeLowCoverageReferenceNodes(Graph * graph, double minCov, double minLon
|
|
1313
1313
|
if (getNodeLength(node) == 0)
|
1314
1314
|
continue;
|
1315
1315
|
|
1316
|
-
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1316
|
+
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1317
1317
|
|| getLongCoverage(node) / getNodeLength(node) < minLongCov)
|
1318
1318
|
&& hasReferenceMarker(node, reads)) {
|
1319
1319
|
destroyNodePassageMarkers(graph, node);
|
@@ -1342,7 +1342,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1342
1342
|
return;
|
1343
1343
|
|
1344
1344
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1345
|
-
|
1345
|
+
|
1346
1346
|
if (export) {
|
1347
1347
|
outfile = fopen(filename, "a");
|
1348
1348
|
|
@@ -1360,7 +1360,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1360
1360
|
if (getNodeLength(node) == 0)
|
1361
1361
|
continue;
|
1362
1362
|
|
1363
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1363
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1364
1364
|
&& !hasReferenceMarker(node, reads))
|
1365
1365
|
removeNodeAndDenounceDubiousReads(graph,
|
1366
1366
|
node,
|
@@ -1378,7 +1378,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1378
1378
|
if (getNodeLength(node) == 0)
|
1379
1379
|
continue;
|
1380
1380
|
|
1381
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1381
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1382
1382
|
&& !terminalReferenceMarker(node, reads))
|
1383
1383
|
removeNodeAndDenounceDubiousReads(graph,
|
1384
1384
|
node,
|
@@ -1404,7 +1404,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1404
1404
|
return;
|
1405
1405
|
|
1406
1406
|
velvetLog("Applying an upper coverage cutoff of %f...\n", maxCov);
|
1407
|
-
|
1407
|
+
|
1408
1408
|
if (export) {
|
1409
1409
|
outfile = fopen(filename, "w");
|
1410
1410
|
|
@@ -1423,7 +1423,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1423
1423
|
&& getTotalCoverage(node) / getNodeLength(node) > maxCov) {
|
1424
1424
|
destroyNodePassageMarkers(graph, node);
|
1425
1425
|
|
1426
|
-
if (export && getNodeLength(node) > minLength)
|
1426
|
+
if (export && getNodeLength(node) > minLength)
|
1427
1427
|
exportLongNodeSequence(outfile, node, graph, NULL, NULL, -1);
|
1428
1428
|
|
1429
1429
|
destroyNode(node, graph);
|
@@ -1784,7 +1784,7 @@ Coordinate totalAssemblyLength(Graph * graph)
|
|
1784
1784
|
return total;
|
1785
1785
|
}
|
1786
1786
|
|
1787
|
-
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1787
|
+
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1788
1788
|
{
|
1789
1789
|
IDnum res = 0;
|
1790
1790
|
boolean * used = callocOrExit(sequenceCount(graph) + 1, boolean);
|
@@ -1798,16 +1798,16 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1798
1798
|
node = getNodeInGraph(graph, nodeID);
|
1799
1799
|
if (node == NULL || getNodeLength(node) < minContigLength)
|
1800
1800
|
continue;
|
1801
|
-
|
1801
|
+
|
1802
1802
|
// Long reads
|
1803
1803
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1804
1804
|
readID = getPassageMarkerSequenceID(marker);
|
1805
1805
|
if (readID < 0)
|
1806
1806
|
readID = -readID;
|
1807
|
-
used[readID] = true;
|
1808
|
-
}
|
1807
|
+
used[readID] = true;
|
1808
|
+
}
|
1809
1809
|
|
1810
|
-
// Short reads
|
1810
|
+
// Short reads
|
1811
1811
|
if (!readStartsAreActivated(graph))
|
1812
1812
|
continue;
|
1813
1813
|
|
@@ -1816,23 +1816,23 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1816
1816
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1817
1817
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1818
1818
|
readID = getShortReadMarkerID(shortReadMarker);
|
1819
|
-
used[readID] = true;
|
1819
|
+
used[readID] = true;
|
1820
1820
|
}
|
1821
|
-
|
1821
|
+
|
1822
1822
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1823
1823
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1824
1824
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1825
1825
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1826
1826
|
readID = getShortReadMarkerID(shortReadMarker);
|
1827
|
-
used[readID] = true;
|
1827
|
+
used[readID] = true;
|
1828
1828
|
}
|
1829
1829
|
}
|
1830
1830
|
|
1831
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1831
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1832
1832
|
if (used[readID])
|
1833
1833
|
res++;
|
1834
1834
|
|
1835
|
-
free(used);
|
1835
|
+
free(used);
|
1836
1836
|
|
1837
1837
|
return res;
|
1838
1838
|
}
|
@@ -1841,7 +1841,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1841
1841
|
{
|
1842
1842
|
char *logFilename =
|
1843
1843
|
mallocOrExit(strlen(directory) + 100, char);
|
1844
|
-
char *statsLine =
|
1844
|
+
char *statsLine =
|
1845
1845
|
mallocOrExit(5000, char);
|
1846
1846
|
FILE *logFile;
|
1847
1847
|
|
@@ -1860,7 +1860,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1860
1860
|
(long) sequenceCount(graph));
|
1861
1861
|
|
1862
1862
|
velvetFprintf(logFile, "%s", statsLine);
|
1863
|
-
|
1863
|
+
velvetFprintf(stdout, "%s", statsLine);
|
1864
1864
|
|
1865
1865
|
fclose(logFile);
|
1866
1866
|
free(logFilename);
|
@@ -1888,16 +1888,16 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1888
1888
|
node = getNodeInGraph(graph, nodeID);
|
1889
1889
|
if (node == NULL || getNodeLength(node) < minContigKmerLength)
|
1890
1890
|
continue;
|
1891
|
-
|
1891
|
+
|
1892
1892
|
// Long reads
|
1893
1893
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1894
1894
|
readID = getPassageMarkerSequenceID(marker);
|
1895
1895
|
if (readID < 0)
|
1896
1896
|
readID = -readID;
|
1897
|
-
used[readID] = true;
|
1898
|
-
}
|
1897
|
+
used[readID] = true;
|
1898
|
+
}
|
1899
1899
|
|
1900
|
-
// Short reads
|
1900
|
+
// Short reads
|
1901
1901
|
if (!readStartsAreActivated(graph))
|
1902
1902
|
continue;
|
1903
1903
|
|
@@ -1906,31 +1906,31 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1906
1906
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1907
1907
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1908
1908
|
readID = getShortReadMarkerID(shortReadMarker);
|
1909
|
-
used[readID] = true;
|
1909
|
+
used[readID] = true;
|
1910
1910
|
}
|
1911
|
-
|
1911
|
+
|
1912
1912
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1913
1913
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1914
1914
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1915
1915
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1916
1916
|
readID = getShortReadMarkerID(shortReadMarker);
|
1917
|
-
used[readID] = true;
|
1917
|
+
used[readID] = true;
|
1918
1918
|
}
|
1919
1919
|
}
|
1920
1920
|
|
1921
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1921
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1922
1922
|
if (!used[readID])
|
1923
1923
|
exportTightString(outfile, getTightStringInArray(reads->tSequences, readID - 1), readID);
|
1924
1924
|
|
1925
1925
|
free(outFilename);
|
1926
|
-
free(used);
|
1926
|
+
free(used);
|
1927
1927
|
fclose(outfile);
|
1928
1928
|
}
|
1929
1929
|
|
1930
1930
|
static IDnum getReferenceCount(ReadSet * reads) {
|
1931
1931
|
IDnum index;
|
1932
1932
|
|
1933
|
-
for (index = 0; index < reads->readCount; index++)
|
1933
|
+
for (index = 0; index < reads->readCount; index++)
|
1934
1934
|
if (reads->categories[index] != REFERENCE)
|
1935
1935
|
break;
|
1936
1936
|
|
@@ -1979,7 +1979,7 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1979
1979
|
start = longlongvar;
|
1980
1980
|
sscanf(strtok(NULL, ":-\r\n"), "%lli", &longlongvar);
|
1981
1981
|
finish = longlongvar;
|
1982
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1982
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1983
1983
|
if (start <= finish) {
|
1984
1984
|
strcpy(refCoords[refIndex].name, name);
|
1985
1985
|
refCoords[refIndex].start = start;
|
@@ -1998,17 +1998,17 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1998
1998
|
}
|
1999
1999
|
|
2000
2000
|
strcpy(name, line + 1);
|
2001
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2001
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2002
2002
|
strcpy(refCoords[refIndex].name, name);
|
2003
2003
|
refCoords[refIndex].start = 1;
|
2004
2004
|
refCoords[refIndex].finish = -1;
|
2005
2005
|
refCoords[refIndex].positive_strand = true;
|
2006
2006
|
}
|
2007
2007
|
if (++refIndex == referenceCount)
|
2008
|
-
break;
|
2008
|
+
break;
|
2009
2009
|
}
|
2010
2010
|
}
|
2011
|
-
|
2011
|
+
|
2012
2012
|
fclose(file);
|
2013
2013
|
}
|
2014
2014
|
return refCoords;
|
@@ -2020,23 +2020,23 @@ typedef struct refMap_st {
|
|
2020
2020
|
IDnum refID;
|
2021
2021
|
IDnum refStart;
|
2022
2022
|
IDnum refFinish;
|
2023
|
-
} ATTRIBUTE_PACKED ReferenceMapping;
|
2023
|
+
} ATTRIBUTE_PACKED ReferenceMapping;
|
2024
2024
|
|
2025
2025
|
static int compareReferenceMappings(const void * A, const void * B) {
|
2026
2026
|
ReferenceMapping * refMapA = (ReferenceMapping *) A;
|
2027
2027
|
ReferenceMapping * refMapB = (ReferenceMapping *) B;
|
2028
|
-
|
2028
|
+
|
2029
2029
|
if (refMapA->start < refMapB->start)
|
2030
2030
|
return -1;
|
2031
2031
|
else if (refMapA->start == refMapB->start)
|
2032
2032
|
return 0;
|
2033
|
-
else
|
2033
|
+
else
|
2034
2034
|
return 1;
|
2035
2035
|
}
|
2036
2036
|
|
2037
2037
|
static void initializeReferenceMapping(ReferenceMapping * refMap, PassageMarkerI marker, Node * node) {
|
2038
2038
|
refMap->start = getStartOffset(marker);
|
2039
|
-
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2039
|
+
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2040
2040
|
refMap->refID = getPassageMarkerSequenceID(marker);
|
2041
2041
|
refMap->refStart = getPassageMarkerStart(marker);
|
2042
2042
|
refMap->refFinish = getPassageMarkerFinish(marker);
|
@@ -2046,7 +2046,7 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2046
2046
|
ReferenceCoord * refCoord;
|
2047
2047
|
Coordinate start, finish;
|
2048
2048
|
|
2049
|
-
if (mapping->refID > 0)
|
2049
|
+
if (mapping->refID > 0)
|
2050
2050
|
refCoord = &refCoords[mapping->refID - 1];
|
2051
2051
|
else
|
2052
2052
|
refCoord = &refCoords[-mapping->refID - 1];
|
@@ -2064,13 +2064,13 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2064
2064
|
start = refCoord->start + mapping->refStart + wordLength - 1;
|
2065
2065
|
finish = refCoord->start + mapping->refFinish + 1;
|
2066
2066
|
} else {
|
2067
|
-
start = refCoord->finish - mapping->refStart;
|
2068
|
-
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2067
|
+
start = refCoord->finish - mapping->refStart;
|
2068
|
+
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2069
2069
|
}
|
2070
2070
|
}
|
2071
|
-
|
2071
|
+
|
2072
2072
|
velvetFprintf(file, "%lli\t%lli\t%s\t%lli\t%lli\n",
|
2073
|
-
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2073
|
+
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2074
2074
|
refCoord->name, (long long) start, (long long) finish);
|
2075
2075
|
}
|
2076
2076
|
|
@@ -2089,7 +2089,7 @@ static void exportLongNodeMapping(FILE * outfile, Node * node, ReadSet * reads,
|
|
2089
2089
|
velvetFprintf(outfile, ">contig_%li\n", (long) getNodeID(node));
|
2090
2090
|
|
2091
2091
|
// Create table
|
2092
|
-
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2092
|
+
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2093
2093
|
|
2094
2094
|
// Initialize table
|
2095
2095
|
referenceCount = 0;
|
@@ -2115,9 +2115,9 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2115
2115
|
IDnum nodeIndex, refIndex;
|
2116
2116
|
Node *node;
|
2117
2117
|
ReferenceCoord * refCoords;
|
2118
|
-
IDnum referenceCount = getReferenceCount(reads);
|
2118
|
+
IDnum referenceCount = getReferenceCount(reads);
|
2119
2119
|
|
2120
|
-
if (referenceCount == 0)
|
2120
|
+
if (referenceCount == 0)
|
2121
2121
|
return;
|
2122
2122
|
|
2123
2123
|
refCoords = collectReferenceCoords(seqReadInfo, referenceCount);
|
@@ -2135,7 +2135,7 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2135
2135
|
|
2136
2136
|
if (node == NULL || getNodeLength(node) < minLength)
|
2137
2137
|
continue;
|
2138
|
-
|
2138
|
+
|
2139
2139
|
exportLongNodeMapping(outfile, node, reads, refCoords, getWordLength(graph));
|
2140
2140
|
}
|
2141
2141
|
|