bio-velvet_underground 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +13 -13
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65689e857131512e65603fb6fa5f6d66cca0b9af
|
4
|
+
data.tar.gz: f3898e399d3ff7c44e6ebcb37920be2a119603e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86af06bc883460bd7c05d3f3a3925cd9e9aaf7b2914d552936a84ad243c322f4777b0797c58c868a3f7045507a969a2d2b1f1b2ad56f593d4ab3a011afab2b6a
|
7
|
+
data.tar.gz: 4e42bf15c4bfde9bdd437203b2f559caef66fd3c47c86cabd5f6255f1666c4933e6253b7ae09f21622748f25dc4b495cee77e90b035b7edcc24f2cc2c16ae2f6
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/ext/src/Makefile
CHANGED
@@ -20,14 +20,14 @@ endif
|
|
20
20
|
|
21
21
|
ifdef VBIGASSEMBLY
|
22
22
|
override DEF := $(DEF) -D BIGASSEMBLY -D VBIGASSEMBLY
|
23
|
-
endif
|
23
|
+
endif
|
24
24
|
|
25
25
|
|
26
26
|
ifdef LONGSEQUENCES
|
27
27
|
override DEF := $(DEF) -D LONGSEQUENCES
|
28
28
|
endif
|
29
29
|
|
30
|
-
# OpenMP
|
30
|
+
# OpenMP
|
31
31
|
ifdef OPENMP
|
32
32
|
override CFLAGS := $(CFLAGS) -fopenmp
|
33
33
|
endif
|
@@ -39,23 +39,24 @@ endif
|
|
39
39
|
|
40
40
|
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
41
41
|
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
42
|
+
OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
42
43
|
|
43
|
-
default :
|
44
|
+
default : zlib obj velveth velvetg
|
44
45
|
|
45
46
|
clean : clean-zlib
|
46
|
-
-rm obj/*.o obj/dbg/*.o ./velvet*
|
47
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
|
47
48
|
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
48
49
|
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
49
50
|
|
50
|
-
cleanobj:
|
51
|
-
-rm obj/*.o obj/dbg/*.o
|
51
|
+
cleanobj:
|
52
|
+
-rm obj/*.o obj/dbg/*.o obj/shared/*.o
|
52
53
|
|
53
54
|
ifdef BUNDLEDZLIB
|
54
55
|
Z_LIB_DIR=third-party/zlib-1.2.3
|
55
56
|
Z_LIB_FILES=$(Z_LIB_DIR)/*.o
|
56
57
|
override DEF := $(DEF) -D BUNDLEDZLIB
|
57
58
|
|
58
|
-
zlib:
|
59
|
+
zlib:
|
59
60
|
cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o
|
60
61
|
|
61
62
|
clean-zlib :
|
@@ -67,14 +68,14 @@ zlib :
|
|
67
68
|
clean-zlib :
|
68
69
|
endif
|
69
70
|
|
70
|
-
velveth : obj
|
71
|
+
velveth : obj
|
71
72
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/binarySequences.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
72
73
|
|
73
74
|
|
74
75
|
velvetg : obj
|
75
76
|
$(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
76
77
|
|
77
|
-
debug : override DEF := $(DEF) -D DEBUG
|
78
|
+
debug : override DEF := $(DEF) -D DEBUG
|
78
79
|
debug : cleanobj obj/dbg
|
79
80
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
80
81
|
$(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
|
@@ -92,25 +93,25 @@ colordebug : cleanobj obj/dbg_de
|
|
92
93
|
objdir:
|
93
94
|
mkdir -p obj
|
94
95
|
|
95
|
-
obj: zlib
|
96
|
+
obj: zlib objdir $(OBJ)
|
96
97
|
|
97
98
|
obj_de: override DEF := $(DEF) -D COLOR
|
98
99
|
obj_de: zlib cleanobj objdir $(OBJ)
|
99
100
|
|
100
|
-
obj/dbgdir:
|
101
|
+
obj/dbgdir:
|
101
102
|
mkdir -p obj/dbg
|
102
103
|
|
103
|
-
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
|
+
obj/dbg: override DEF := $(DEF) -D DEBUG
|
104
105
|
obj/dbg: zlib cleanobj obj/dbgdir $(OBJDBG)
|
105
106
|
|
106
107
|
obj/dbg_de: override DEF := $(DEF) -D COLOR -D DEBUG
|
107
108
|
obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG)
|
108
109
|
|
109
110
|
obj/%.o: src/%.c
|
110
|
-
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
|
+
$(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
|
111
112
|
|
112
113
|
obj/dbg/%.o: src/%.c
|
113
|
-
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
|
+
$(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
114
115
|
|
115
116
|
doc: Manual.pdf
|
116
117
|
|
@@ -123,3 +124,15 @@ endif
|
|
123
124
|
|
124
125
|
test: velvetg velveth
|
125
126
|
cd tests && ./run-tests.sh
|
127
|
+
|
128
|
+
sharedobjdir:
|
129
|
+
mkdir -p obj/shared
|
130
|
+
|
131
|
+
obj/shared: sharedobjdir $(OBJSHARED)
|
132
|
+
|
133
|
+
obj/shared/%.o: src/%.c
|
134
|
+
$(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
135
|
+
|
136
|
+
shared: zlib obj/shared
|
137
|
+
cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o run.o run2.o
|
138
|
+
|
@@ -47,7 +47,7 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
47
47
|
hasSinglePreArc_pg(getOtherEnd_pg
|
48
48
|
(preArc, preNodeBID),
|
49
49
|
preGraph)
|
50
|
-
&& !isLoop_pg(preArc)
|
50
|
+
&& !isLoop_pg(preArc)
|
51
51
|
&& getDestination_pg(preArc, preNodeBID) != preNodeAID) {
|
52
52
|
|
53
53
|
totalLength += getPreNodeLength_pg(preNodeBID, preGraph);
|
@@ -99,11 +99,11 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
|
|
99
99
|
appendDescriptors_pg(&ptr, &writeOffset ,currentPreNodeID, preGraph, false);
|
100
100
|
}
|
101
101
|
|
102
|
-
if (writeOffset != 0)
|
102
|
+
if (writeOffset != 0)
|
103
103
|
while (writeOffset++ != 4)
|
104
104
|
(*ptr) >>= 2;
|
105
105
|
|
106
|
-
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
106
|
+
setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
|
107
107
|
|
108
108
|
// Correct preArcs
|
109
109
|
for (preArc = getPreArc_pg(preNodeBID, preGraph); preArc != NULL_IDX;
|
@@ -209,7 +209,7 @@ static boolean isEligibleTip(IDnum index, PreGraph * preGraph, Coordinate
|
|
209
209
|
if (currentIndex == 0)
|
210
210
|
return true;
|
211
211
|
|
212
|
-
// Joined tips
|
212
|
+
// Joined tips
|
213
213
|
if (simplePreArcCount_pg(-currentIndex, preGraph) < 2)
|
214
214
|
return false;
|
215
215
|
|
@@ -764,7 +764,7 @@ static void remapNodeMarkersOntoNeighbour(Node * source,
|
|
764
764
|
}
|
765
765
|
}
|
766
766
|
|
767
|
-
// Short read markers
|
767
|
+
// Short read markers
|
768
768
|
if (readStartsAreActivated(graph)) {
|
769
769
|
// Update Coordinates
|
770
770
|
sourceArray = getNodeReads(source, graph);
|
@@ -1250,7 +1250,7 @@ static void foldSymmetricalNode(Node * node)
|
|
1250
1250
|
twinNode = node;
|
1251
1251
|
node = tmp;
|
1252
1252
|
}
|
1253
|
-
// Destroy link to old markers
|
1253
|
+
// Destroy link to old markers
|
1254
1254
|
setMarker(node, NULL_IDX);
|
1255
1255
|
|
1256
1256
|
// Reinsert markers properly
|
@@ -1951,7 +1951,7 @@ static void transferNodeData(Node * source, Node * target)
|
|
1951
1951
|
if (getNode(fastPath) == twinSource)
|
1952
1952
|
fastPath = getNextInSequence(fastPath);
|
1953
1953
|
|
1954
|
-
// Next node
|
1954
|
+
// Next node
|
1955
1955
|
if (source == activeNode) {
|
1956
1956
|
activeNode = target;
|
1957
1957
|
todo =
|
@@ -2013,7 +2013,7 @@ static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
|
|
2013
2013
|
// Read starts
|
2014
2014
|
concatenateReadStarts(nodeA, nodeB, graph);
|
2015
2015
|
|
2016
|
-
// Descriptor management
|
2016
|
+
// Descriptor management
|
2017
2017
|
appendDescriptors(nodeA, nodeB);
|
2018
2018
|
|
2019
2019
|
// Update uniqueness:
|
@@ -2167,7 +2167,7 @@ static void cleanUpRedundancy()
|
|
2167
2167
|
|
2168
2168
|
//velvetLog("Concatenation\n");
|
2169
2169
|
|
2170
|
-
// Freeing up memory
|
2170
|
+
// Freeing up memory
|
2171
2171
|
if (slowMarker != NULL_IDX)
|
2172
2172
|
concatenatePathNodes(slowPath);
|
2173
2173
|
else
|
@@ -2460,7 +2460,7 @@ void clipTipsHard(Graph * graph, boolean conserveLong)
|
|
2460
2460
|
|
2461
2461
|
if (current == NULL)
|
2462
2462
|
continue;
|
2463
|
-
|
2463
|
+
|
2464
2464
|
if (conserveLong && getMarker(current))
|
2465
2465
|
continue;
|
2466
2466
|
|
@@ -2560,7 +2560,7 @@ void correctGraph(Graph * argGraph, ShortLength * argSequenceLengths, Category *
|
|
2560
2560
|
eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
|
2561
2561
|
progressStatus = callocOrExit(2 * nodes + 1, boolean);
|
2562
2562
|
todoLists = callocOrExit(2 * nodes + 1, Ticket *);
|
2563
|
-
//Done with memory
|
2563
|
+
//Done with memory
|
2564
2564
|
|
2565
2565
|
resetNodeStatus(graph);
|
2566
2566
|
determineEligibleStartingPoints();
|
@@ -2607,9 +2607,9 @@ void setMaxReadLength(int value)
|
|
2607
2607
|
if (value < 0) {
|
2608
2608
|
velvetLog("Negative branch length %i!\n", value);
|
2609
2609
|
velvetLog("Exiting...\n");
|
2610
|
-
#ifdef DEBUG
|
2610
|
+
#ifdef DEBUG
|
2611
2611
|
abort();
|
2612
|
-
#endif
|
2612
|
+
#endif
|
2613
2613
|
exit(1);
|
2614
2614
|
}
|
2615
2615
|
MAXREADLENGTH = value;
|
@@ -2621,9 +2621,9 @@ void setMaxGaps(int value)
|
|
2621
2621
|
if (value < 0) {
|
2622
2622
|
velvetLog("Negative max gap count %i!\n", value);
|
2623
2623
|
velvetLog("Exiting...\n");
|
2624
|
-
#ifdef DEBUG
|
2624
|
+
#ifdef DEBUG
|
2625
2625
|
abort();
|
2626
|
-
#endif
|
2626
|
+
#endif
|
2627
2627
|
exit(1);
|
2628
2628
|
}
|
2629
2629
|
MAXGAPS = value;
|
@@ -2635,9 +2635,9 @@ void setMaxDivergence(double value)
|
|
2635
2635
|
velvetLog("Divergence rate %lf out of bounds [0,1]!\n",
|
2636
2636
|
value);
|
2637
2637
|
velvetLog("Exiting...\n");
|
2638
|
-
#ifdef DEBUG
|
2638
|
+
#ifdef DEBUG
|
2639
2639
|
abort();
|
2640
|
-
#endif
|
2640
|
+
#endif
|
2641
2641
|
exit(1);
|
2642
2642
|
}
|
2643
2643
|
MAXDIVERGENCE = value;
|
data/ext/src/src/graphStats.c
CHANGED
@@ -522,7 +522,7 @@ void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads)
|
|
522
522
|
}
|
523
523
|
|
524
524
|
velvetFprintf(outfile, "\t%li", (long) markerCount(node));
|
525
|
-
printShortCounts(outfile, node, graph, reads);
|
525
|
+
printShortCounts(outfile, node, graph, reads);
|
526
526
|
|
527
527
|
velvetFprintf(outfile, "\n");
|
528
528
|
}
|
@@ -579,7 +579,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
579
579
|
if (destination == NULL)
|
580
580
|
return;
|
581
581
|
|
582
|
-
// Eliminate those that point to uniquely strain sequences
|
582
|
+
// Eliminate those that point to uniquely strain sequences
|
583
583
|
if (nodeGenomicMultiplicity(destination, firstStrain) != 1) {
|
584
584
|
// velvetLog("Multiple genome reads\n");
|
585
585
|
return;
|
@@ -600,7 +600,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
600
600
|
if (destination == destination2)
|
601
601
|
return;
|
602
602
|
|
603
|
-
// Eliminate those that point to uniquely strain sequences
|
603
|
+
// Eliminate those that point to uniquely strain sequences
|
604
604
|
if (isOnlyGenome(destination2, firstStrain))
|
605
605
|
return;
|
606
606
|
|
@@ -776,7 +776,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
776
776
|
if (regions) {
|
777
777
|
lastRegion->next = newMask(stops[indexStop]);
|
778
778
|
lastRegion = lastRegion->next;
|
779
|
-
} else {
|
779
|
+
} else {
|
780
780
|
regions = newMask(stops[indexStop]);
|
781
781
|
lastRegion = regions;
|
782
782
|
}
|
@@ -803,7 +803,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
803
803
|
if (regions) {
|
804
804
|
lastRegion->next = newMask(stops[indexStop]);
|
805
805
|
lastRegion = lastRegion->next;
|
806
|
-
} else {
|
806
|
+
} else {
|
807
807
|
regions = newMask(stops[indexStop]);
|
808
808
|
lastRegion = regions;
|
809
809
|
}
|
@@ -837,7 +837,7 @@ static int compareCoords(const void * A, const void * B) {
|
|
837
837
|
|
838
838
|
static void sortCoords(Coordinate * array, IDnum length) {
|
839
839
|
qsort(array, (size_t) length, sizeof(Coordinate), compareCoords);
|
840
|
-
}
|
840
|
+
}
|
841
841
|
|
842
842
|
static void getShortReadCoords(Coordinate * starts, Coordinate * stops, Node * node, Graph * graph, ShortLength * readLengths) {
|
843
843
|
ShortReadMarker * markers = getNodeReads(node, graph);
|
@@ -918,7 +918,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
918
918
|
gap = getGap(node, graph);
|
919
919
|
for (position = 0; position < WORDLENGTH; position++) {
|
920
920
|
if (position % 60 == 0 && position > 0)
|
921
|
-
velvetFprintf(outfile, "\n");
|
921
|
+
velvetFprintf(outfile, "\n");
|
922
922
|
nucleotide = getNucleotideChar(position, tString);
|
923
923
|
velvetFprintf(outfile, "%c", nucleotide);
|
924
924
|
}
|
@@ -938,7 +938,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
938
938
|
mask->finish) {
|
939
939
|
next = mask->next;
|
940
940
|
deallocateMask(mask);
|
941
|
-
mask = next;
|
941
|
+
mask = next;
|
942
942
|
}
|
943
943
|
|
944
944
|
if (gap
|
@@ -1047,7 +1047,7 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1047
1047
|
Node * nodeB = *((Node **) B);
|
1048
1048
|
double covA;
|
1049
1049
|
double covB;
|
1050
|
-
|
1050
|
+
|
1051
1051
|
if (getNodeLength(nodeA) == 0)
|
1052
1052
|
nodeA = NULL;
|
1053
1053
|
|
@@ -1063,8 +1063,8 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1063
1063
|
return -1;
|
1064
1064
|
|
1065
1065
|
// Deal with real coverage numbers:
|
1066
|
-
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
-
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1066
|
+
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
+
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1068
1068
|
|
1069
1069
|
if (covA > covB)
|
1070
1070
|
return 1;
|
@@ -1075,14 +1075,14 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1075
1075
|
|
1076
1076
|
double estimated_cov(Graph * graph, char * directory)
|
1077
1077
|
{
|
1078
|
-
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1078
|
+
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1079
1079
|
IDnum index;
|
1080
1080
|
Coordinate halfTotalLength = 0;
|
1081
1081
|
Coordinate sumLength = 0;
|
1082
1082
|
Node *node;
|
1083
1083
|
char *logFilename =
|
1084
1084
|
mallocOrExit(strlen(directory) + 100, char);
|
1085
|
-
char *statsLine =
|
1085
|
+
char *statsLine =
|
1086
1086
|
mallocOrExit(5000, char);
|
1087
1087
|
FILE *logFile;
|
1088
1088
|
|
@@ -1152,7 +1152,7 @@ static boolean terminalReferenceMarker(Node * node, ReadSet * reads) {
|
|
1152
1152
|
|
1153
1153
|
static boolean hasReferenceMarker(Node * node, ReadSet * reads) {
|
1154
1154
|
PassageMarkerI marker;
|
1155
|
-
|
1155
|
+
|
1156
1156
|
for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker))
|
1157
1157
|
if (reads->categories[getAbsolutePassMarkerSeqID(marker) - 1] == REFERENCE)
|
1158
1158
|
return true;
|
@@ -1229,14 +1229,14 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1229
1229
|
IDnum index;
|
1230
1230
|
Node *node;
|
1231
1231
|
boolean denounceReads = readStartsAreActivated(graph);
|
1232
|
-
boolean *res = NULL;
|
1232
|
+
boolean *res = NULL;
|
1233
1233
|
FILE * outfile = NULL;
|
1234
1234
|
|
1235
1235
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1236
|
-
|
1236
|
+
|
1237
1237
|
if (denounceReads)
|
1238
1238
|
res = callocOrExit(sequenceCount(graph), boolean);
|
1239
|
-
|
1239
|
+
|
1240
1240
|
if (export) {
|
1241
1241
|
outfile = fopen(filename, "w");
|
1242
1242
|
|
@@ -1255,7 +1255,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1255
1255
|
if (getNodeLength(node) == 0)
|
1256
1256
|
continue;
|
1257
1257
|
|
1258
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1258
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1259
1259
|
&& !hasReferenceMarker(node, reads))
|
1260
1260
|
removeNodeAndDenounceDubiousReads(graph,
|
1261
1261
|
node,
|
@@ -1273,7 +1273,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1273
1273
|
if (getNodeLength(node) == 0)
|
1274
1274
|
continue;
|
1275
1275
|
|
1276
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1276
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1277
1277
|
&& !terminalReferenceMarker(node, reads))
|
1278
1278
|
removeNodeAndDenounceDubiousReads(graph,
|
1279
1279
|
node,
|
@@ -1296,7 +1296,7 @@ static Coordinate getLongCoverage(Node * node) {
|
|
1296
1296
|
|
1297
1297
|
for (marker = getMarker(node); marker; marker = getNextInNode(marker))
|
1298
1298
|
total += getPassageMarkerLength(marker);
|
1299
|
-
|
1299
|
+
|
1300
1300
|
return total;
|
1301
1301
|
}
|
1302
1302
|
|
@@ -1313,7 +1313,7 @@ void removeLowCoverageReferenceNodes(Graph * graph, double minCov, double minLon
|
|
1313
1313
|
if (getNodeLength(node) == 0)
|
1314
1314
|
continue;
|
1315
1315
|
|
1316
|
-
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1316
|
+
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1317
1317
|
|| getLongCoverage(node) / getNodeLength(node) < minLongCov)
|
1318
1318
|
&& hasReferenceMarker(node, reads)) {
|
1319
1319
|
destroyNodePassageMarkers(graph, node);
|
@@ -1342,7 +1342,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1342
1342
|
return;
|
1343
1343
|
|
1344
1344
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1345
|
-
|
1345
|
+
|
1346
1346
|
if (export) {
|
1347
1347
|
outfile = fopen(filename, "a");
|
1348
1348
|
|
@@ -1360,7 +1360,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1360
1360
|
if (getNodeLength(node) == 0)
|
1361
1361
|
continue;
|
1362
1362
|
|
1363
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1363
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1364
1364
|
&& !hasReferenceMarker(node, reads))
|
1365
1365
|
removeNodeAndDenounceDubiousReads(graph,
|
1366
1366
|
node,
|
@@ -1378,7 +1378,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1378
1378
|
if (getNodeLength(node) == 0)
|
1379
1379
|
continue;
|
1380
1380
|
|
1381
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1381
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1382
1382
|
&& !terminalReferenceMarker(node, reads))
|
1383
1383
|
removeNodeAndDenounceDubiousReads(graph,
|
1384
1384
|
node,
|
@@ -1404,7 +1404,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1404
1404
|
return;
|
1405
1405
|
|
1406
1406
|
velvetLog("Applying an upper coverage cutoff of %f...\n", maxCov);
|
1407
|
-
|
1407
|
+
|
1408
1408
|
if (export) {
|
1409
1409
|
outfile = fopen(filename, "w");
|
1410
1410
|
|
@@ -1423,7 +1423,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1423
1423
|
&& getTotalCoverage(node) / getNodeLength(node) > maxCov) {
|
1424
1424
|
destroyNodePassageMarkers(graph, node);
|
1425
1425
|
|
1426
|
-
if (export && getNodeLength(node) > minLength)
|
1426
|
+
if (export && getNodeLength(node) > minLength)
|
1427
1427
|
exportLongNodeSequence(outfile, node, graph, NULL, NULL, -1);
|
1428
1428
|
|
1429
1429
|
destroyNode(node, graph);
|
@@ -1784,7 +1784,7 @@ Coordinate totalAssemblyLength(Graph * graph)
|
|
1784
1784
|
return total;
|
1785
1785
|
}
|
1786
1786
|
|
1787
|
-
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1787
|
+
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1788
1788
|
{
|
1789
1789
|
IDnum res = 0;
|
1790
1790
|
boolean * used = callocOrExit(sequenceCount(graph) + 1, boolean);
|
@@ -1798,16 +1798,16 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1798
1798
|
node = getNodeInGraph(graph, nodeID);
|
1799
1799
|
if (node == NULL || getNodeLength(node) < minContigLength)
|
1800
1800
|
continue;
|
1801
|
-
|
1801
|
+
|
1802
1802
|
// Long reads
|
1803
1803
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1804
1804
|
readID = getPassageMarkerSequenceID(marker);
|
1805
1805
|
if (readID < 0)
|
1806
1806
|
readID = -readID;
|
1807
|
-
used[readID] = true;
|
1808
|
-
}
|
1807
|
+
used[readID] = true;
|
1808
|
+
}
|
1809
1809
|
|
1810
|
-
// Short reads
|
1810
|
+
// Short reads
|
1811
1811
|
if (!readStartsAreActivated(graph))
|
1812
1812
|
continue;
|
1813
1813
|
|
@@ -1816,23 +1816,23 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1816
1816
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1817
1817
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1818
1818
|
readID = getShortReadMarkerID(shortReadMarker);
|
1819
|
-
used[readID] = true;
|
1819
|
+
used[readID] = true;
|
1820
1820
|
}
|
1821
|
-
|
1821
|
+
|
1822
1822
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1823
1823
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1824
1824
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1825
1825
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1826
1826
|
readID = getShortReadMarkerID(shortReadMarker);
|
1827
|
-
used[readID] = true;
|
1827
|
+
used[readID] = true;
|
1828
1828
|
}
|
1829
1829
|
}
|
1830
1830
|
|
1831
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1831
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1832
1832
|
if (used[readID])
|
1833
1833
|
res++;
|
1834
1834
|
|
1835
|
-
free(used);
|
1835
|
+
free(used);
|
1836
1836
|
|
1837
1837
|
return res;
|
1838
1838
|
}
|
@@ -1841,7 +1841,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1841
1841
|
{
|
1842
1842
|
char *logFilename =
|
1843
1843
|
mallocOrExit(strlen(directory) + 100, char);
|
1844
|
-
char *statsLine =
|
1844
|
+
char *statsLine =
|
1845
1845
|
mallocOrExit(5000, char);
|
1846
1846
|
FILE *logFile;
|
1847
1847
|
|
@@ -1860,7 +1860,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1860
1860
|
(long) sequenceCount(graph));
|
1861
1861
|
|
1862
1862
|
velvetFprintf(logFile, "%s", statsLine);
|
1863
|
-
velvetFprintf(stdout, "%s", statsLine);
|
1863
|
+
//velvetFprintf(stdout, "%s", statsLine); //don't print to stdout when part of a library
|
1864
1864
|
|
1865
1865
|
fclose(logFile);
|
1866
1866
|
free(logFilename);
|
@@ -1888,16 +1888,16 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1888
1888
|
node = getNodeInGraph(graph, nodeID);
|
1889
1889
|
if (node == NULL || getNodeLength(node) < minContigKmerLength)
|
1890
1890
|
continue;
|
1891
|
-
|
1891
|
+
|
1892
1892
|
// Long reads
|
1893
1893
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1894
1894
|
readID = getPassageMarkerSequenceID(marker);
|
1895
1895
|
if (readID < 0)
|
1896
1896
|
readID = -readID;
|
1897
|
-
used[readID] = true;
|
1898
|
-
}
|
1897
|
+
used[readID] = true;
|
1898
|
+
}
|
1899
1899
|
|
1900
|
-
// Short reads
|
1900
|
+
// Short reads
|
1901
1901
|
if (!readStartsAreActivated(graph))
|
1902
1902
|
continue;
|
1903
1903
|
|
@@ -1906,31 +1906,31 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1906
1906
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1907
1907
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1908
1908
|
readID = getShortReadMarkerID(shortReadMarker);
|
1909
|
-
used[readID] = true;
|
1909
|
+
used[readID] = true;
|
1910
1910
|
}
|
1911
|
-
|
1911
|
+
|
1912
1912
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1913
1913
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1914
1914
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1915
1915
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1916
1916
|
readID = getShortReadMarkerID(shortReadMarker);
|
1917
|
-
used[readID] = true;
|
1917
|
+
used[readID] = true;
|
1918
1918
|
}
|
1919
1919
|
}
|
1920
1920
|
|
1921
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1921
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1922
1922
|
if (!used[readID])
|
1923
1923
|
exportTightString(outfile, getTightStringInArray(reads->tSequences, readID - 1), readID);
|
1924
1924
|
|
1925
1925
|
free(outFilename);
|
1926
|
-
free(used);
|
1926
|
+
free(used);
|
1927
1927
|
fclose(outfile);
|
1928
1928
|
}
|
1929
1929
|
|
1930
1930
|
static IDnum getReferenceCount(ReadSet * reads) {
|
1931
1931
|
IDnum index;
|
1932
1932
|
|
1933
|
-
for (index = 0; index < reads->readCount; index++)
|
1933
|
+
for (index = 0; index < reads->readCount; index++)
|
1934
1934
|
if (reads->categories[index] != REFERENCE)
|
1935
1935
|
break;
|
1936
1936
|
|
@@ -1979,7 +1979,7 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1979
1979
|
start = longlongvar;
|
1980
1980
|
sscanf(strtok(NULL, ":-\r\n"), "%lli", &longlongvar);
|
1981
1981
|
finish = longlongvar;
|
1982
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1982
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1983
1983
|
if (start <= finish) {
|
1984
1984
|
strcpy(refCoords[refIndex].name, name);
|
1985
1985
|
refCoords[refIndex].start = start;
|
@@ -1998,17 +1998,17 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1998
1998
|
}
|
1999
1999
|
|
2000
2000
|
strcpy(name, line + 1);
|
2001
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2001
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2002
2002
|
strcpy(refCoords[refIndex].name, name);
|
2003
2003
|
refCoords[refIndex].start = 1;
|
2004
2004
|
refCoords[refIndex].finish = -1;
|
2005
2005
|
refCoords[refIndex].positive_strand = true;
|
2006
2006
|
}
|
2007
2007
|
if (++refIndex == referenceCount)
|
2008
|
-
break;
|
2008
|
+
break;
|
2009
2009
|
}
|
2010
2010
|
}
|
2011
|
-
|
2011
|
+
|
2012
2012
|
fclose(file);
|
2013
2013
|
}
|
2014
2014
|
return refCoords;
|
@@ -2020,23 +2020,23 @@ typedef struct refMap_st {
|
|
2020
2020
|
IDnum refID;
|
2021
2021
|
IDnum refStart;
|
2022
2022
|
IDnum refFinish;
|
2023
|
-
} ATTRIBUTE_PACKED ReferenceMapping;
|
2023
|
+
} ATTRIBUTE_PACKED ReferenceMapping;
|
2024
2024
|
|
2025
2025
|
static int compareReferenceMappings(const void * A, const void * B) {
|
2026
2026
|
ReferenceMapping * refMapA = (ReferenceMapping *) A;
|
2027
2027
|
ReferenceMapping * refMapB = (ReferenceMapping *) B;
|
2028
|
-
|
2028
|
+
|
2029
2029
|
if (refMapA->start < refMapB->start)
|
2030
2030
|
return -1;
|
2031
2031
|
else if (refMapA->start == refMapB->start)
|
2032
2032
|
return 0;
|
2033
|
-
else
|
2033
|
+
else
|
2034
2034
|
return 1;
|
2035
2035
|
}
|
2036
2036
|
|
2037
2037
|
static void initializeReferenceMapping(ReferenceMapping * refMap, PassageMarkerI marker, Node * node) {
|
2038
2038
|
refMap->start = getStartOffset(marker);
|
2039
|
-
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2039
|
+
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2040
2040
|
refMap->refID = getPassageMarkerSequenceID(marker);
|
2041
2041
|
refMap->refStart = getPassageMarkerStart(marker);
|
2042
2042
|
refMap->refFinish = getPassageMarkerFinish(marker);
|
@@ -2046,7 +2046,7 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2046
2046
|
ReferenceCoord * refCoord;
|
2047
2047
|
Coordinate start, finish;
|
2048
2048
|
|
2049
|
-
if (mapping->refID > 0)
|
2049
|
+
if (mapping->refID > 0)
|
2050
2050
|
refCoord = &refCoords[mapping->refID - 1];
|
2051
2051
|
else
|
2052
2052
|
refCoord = &refCoords[-mapping->refID - 1];
|
@@ -2064,13 +2064,13 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2064
2064
|
start = refCoord->start + mapping->refStart + wordLength - 1;
|
2065
2065
|
finish = refCoord->start + mapping->refFinish + 1;
|
2066
2066
|
} else {
|
2067
|
-
start = refCoord->finish - mapping->refStart;
|
2068
|
-
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2067
|
+
start = refCoord->finish - mapping->refStart;
|
2068
|
+
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2069
2069
|
}
|
2070
2070
|
}
|
2071
|
-
|
2071
|
+
|
2072
2072
|
velvetFprintf(file, "%lli\t%lli\t%s\t%lli\t%lli\n",
|
2073
|
-
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2073
|
+
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2074
2074
|
refCoord->name, (long long) start, (long long) finish);
|
2075
2075
|
}
|
2076
2076
|
|
@@ -2089,7 +2089,7 @@ static void exportLongNodeMapping(FILE * outfile, Node * node, ReadSet * reads,
|
|
2089
2089
|
velvetFprintf(outfile, ">contig_%li\n", (long) getNodeID(node));
|
2090
2090
|
|
2091
2091
|
// Create table
|
2092
|
-
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2092
|
+
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2093
2093
|
|
2094
2094
|
// Initialize table
|
2095
2095
|
referenceCount = 0;
|
@@ -2115,9 +2115,9 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2115
2115
|
IDnum nodeIndex, refIndex;
|
2116
2116
|
Node *node;
|
2117
2117
|
ReferenceCoord * refCoords;
|
2118
|
-
IDnum referenceCount = getReferenceCount(reads);
|
2118
|
+
IDnum referenceCount = getReferenceCount(reads);
|
2119
2119
|
|
2120
|
-
if (referenceCount == 0)
|
2120
|
+
if (referenceCount == 0)
|
2121
2121
|
return;
|
2122
2122
|
|
2123
2123
|
refCoords = collectReferenceCoords(seqReadInfo, referenceCount);
|
@@ -2135,7 +2135,7 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2135
2135
|
|
2136
2136
|
if (node == NULL || getNodeLength(node) < minLength)
|
2137
2137
|
continue;
|
2138
|
-
|
2138
|
+
|
2139
2139
|
exportLongNodeMapping(outfile, node, reads, refCoords, getWordLength(graph));
|
2140
2140
|
}
|
2141
2141
|
|
data/ext/src/src/run.c
CHANGED
@@ -39,7 +39,7 @@ static void printUsage()
|
|
39
39
|
printf("\thash_length\t: EITHER an odd integer (if even, it will be decremented) <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
40
40
|
printf("\t\t\t: OR: m,M,s where m and M are odd integers (if not, they will be decremented) with m < M <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
41
41
|
puts("\t\t\t\tand s is a step (even number). Velvet will then hash from k=m to k=M with a step of s");
|
42
|
-
puts("\tfilename\t: path to sequence file or - for standard input");
|
42
|
+
puts("\tfilename\t: path to sequence file or - for standard input");
|
43
43
|
puts("");
|
44
44
|
puts("File format options:");
|
45
45
|
puts("\t-fasta\t-fastq\t-raw\t-fasta.gz\t-fastq.gz\t-raw.gz\t-sam\t-bam\t-fmtAuto");
|
@@ -52,7 +52,7 @@ static void printUsage()
|
|
52
52
|
puts("Read type options:");
|
53
53
|
puts("\t-short\t-shortPaired");
|
54
54
|
#if CATEGORIES <= 5
|
55
|
-
Category cat;
|
55
|
+
Category cat;
|
56
56
|
for (cat = 2; cat <= CATEGORIES; cat++)
|
57
57
|
printf("\t-short%i\t-shortPaired%i\n", cat, cat);
|
58
58
|
#else
|
@@ -93,7 +93,7 @@ static void printUsage()
|
|
93
93
|
puts("\t\t[Both files are picked up by graph, so please leave them there]");
|
94
94
|
}
|
95
95
|
|
96
|
-
int
|
96
|
+
int velveth(int argc, char **argv)
|
97
97
|
{
|
98
98
|
ReadSet *allSequences = NULL;
|
99
99
|
SplayTable *splayTable;
|
@@ -161,18 +161,18 @@ int main(int argc, char **argv)
|
|
161
161
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
162
162
|
hashLength, MAXKMERLENGTH);
|
163
163
|
hashLength = MAXKMERLENGTH;
|
164
|
-
}
|
164
|
+
}
|
165
165
|
if (hashLength <= 0) {
|
166
166
|
velvetLog("Invalid hash length: %s\n", argv[2]);
|
167
167
|
printUsage();
|
168
168
|
return 0;
|
169
|
-
}
|
169
|
+
}
|
170
170
|
if (hashLength % 2 == 0) {
|
171
171
|
velvetLog
|
172
172
|
("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
|
173
173
|
hashLength, hashLength - 1);
|
174
174
|
hashLength--;
|
175
|
-
}
|
175
|
+
}
|
176
176
|
|
177
177
|
if (multiple_kmers) {
|
178
178
|
if (hashLengthMax > MAXKMERLENGTH + 1) {
|
@@ -180,12 +180,12 @@ int main(int argc, char **argv)
|
|
180
180
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
181
181
|
hashLengthMax, MAXKMERLENGTH + 1);
|
182
182
|
hashLengthMax = MAXKMERLENGTH + 1;
|
183
|
-
}
|
183
|
+
}
|
184
184
|
if (hashLengthMax <= hashLength) {
|
185
185
|
velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
|
186
186
|
printUsage();
|
187
187
|
return 0;
|
188
|
-
}
|
188
|
+
}
|
189
189
|
|
190
190
|
if (hashLengthStep <= 0) {
|
191
191
|
velvetLog("Non-positive hash length! Setting it to 2\n");
|
@@ -215,7 +215,7 @@ int main(int argc, char **argv)
|
|
215
215
|
sprintf(buf,"%s_%d",argv[1],h);
|
216
216
|
directory = mallocOrExit(strlen(buf) + 100, char);
|
217
217
|
strcpy(directory,buf);
|
218
|
-
} else
|
218
|
+
} else
|
219
219
|
directory = argv[1];
|
220
220
|
|
221
221
|
filename = mallocOrExit(strlen(directory) + 100, char);
|
data/ext/src/src/run2.c
CHANGED
@@ -68,6 +68,8 @@ static void printUsage()
|
|
68
68
|
puts("\t-paired_exp_fraction <double>\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)");
|
69
69
|
puts("\t-shortMatePaired* <yes|no>\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)");
|
70
70
|
puts("\t-conserveLong <yes|no>\t\t: preserve sequences with long reads in them (default no)");
|
71
|
+
puts("\t-clip_tips <yes|no>\t\t: do tip clipping on pre-graph (default yes)");
|
72
|
+
puts("\t-tour_bus <yes|no>\t\t: apply the tour bus algorithm (default yes)");
|
71
73
|
puts("");
|
72
74
|
puts("Output:");
|
73
75
|
puts("\tdirectory/contigs.fa\t\t: fasta file of contigs longer than twice hash length");
|
@@ -76,7 +78,7 @@ static void printUsage()
|
|
76
78
|
puts("\tdirectory/velvet_asm.afg\t: (if requested) AMOS compatible assembly file");
|
77
79
|
}
|
78
80
|
|
79
|
-
int
|
81
|
+
int velvetg(int argc, char **argv)
|
80
82
|
{
|
81
83
|
ReadSet *sequences = NULL;
|
82
84
|
RoadMapArray *rdmaps;
|
@@ -86,7 +88,9 @@ int main(int argc, char **argv)
|
|
86
88
|
*preGraphFilename, *seqFilename, *roadmapFilename,
|
87
89
|
*lowCovContigsFilename, *highCovContigsFilename;
|
88
90
|
double coverageCutoff = -1;
|
89
|
-
|
91
|
+
boolean doClipTips = true;
|
92
|
+
boolean doTourBus = true;
|
93
|
+
double longCoverageCutoff = -1;
|
90
94
|
double maxCoverageCutoff = -1;
|
91
95
|
double expectedCoverage = -1;
|
92
96
|
Coordinate minContigLength = -1;
|
@@ -164,7 +168,7 @@ int main(int argc, char **argv)
|
|
164
168
|
return 0;
|
165
169
|
}
|
166
170
|
|
167
|
-
// Memory allocation
|
171
|
+
// Memory allocation
|
168
172
|
directory = argv[1];
|
169
173
|
graphFilename = mallocOrExit(strlen(directory) + 100, char);
|
170
174
|
connectedGraphFilename = mallocOrExit(strlen(directory) + 100, char);
|
@@ -181,9 +185,9 @@ int main(int argc, char **argv)
|
|
181
185
|
if (arg_index >= argc) {
|
182
186
|
velvetLog("Unusual number of arguments!\n");
|
183
187
|
printUsage();
|
184
|
-
#ifdef DEBUG
|
188
|
+
#ifdef DEBUG
|
185
189
|
abort();
|
186
|
-
#endif
|
190
|
+
#endif
|
187
191
|
exit(1);
|
188
192
|
}
|
189
193
|
|
@@ -210,9 +214,9 @@ int main(int argc, char **argv)
|
|
210
214
|
if (insertLength[0] < 0) {
|
211
215
|
velvetLog("Invalid insert length: %lli\n",
|
212
216
|
(long long) insertLength[0]);
|
213
|
-
#ifdef DEBUG
|
217
|
+
#ifdef DEBUG
|
214
218
|
abort();
|
215
|
-
#endif
|
219
|
+
#endif
|
216
220
|
exit(1);
|
217
221
|
}
|
218
222
|
} else if (strcmp(arg, "-ins_length_sd") == 0) {
|
@@ -221,9 +225,9 @@ int main(int argc, char **argv)
|
|
221
225
|
if (std_dev[0] < 0) {
|
222
226
|
velvetLog("Invalid std deviation: %lli\n",
|
223
227
|
(long long) std_dev[0]);
|
224
|
-
#ifdef DEBUG
|
228
|
+
#ifdef DEBUG
|
225
229
|
abort();
|
226
|
-
#endif
|
230
|
+
#endif
|
227
231
|
exit(1);
|
228
232
|
}
|
229
233
|
} else if (strcmp(arg, "-ins_length_long") == 0) {
|
@@ -238,9 +242,9 @@ int main(int argc, char **argv)
|
|
238
242
|
cat = (Category) short_var;
|
239
243
|
if (cat < 1 || cat > CATEGORIES) {
|
240
244
|
velvetLog("Unknown option: %s\n", arg);
|
241
|
-
#ifdef DEBUG
|
245
|
+
#ifdef DEBUG
|
242
246
|
abort();
|
243
|
-
#endif
|
247
|
+
#endif
|
244
248
|
exit(1);
|
245
249
|
}
|
246
250
|
sscanf(argv[arg_index], "%lli", &longlong_var);
|
@@ -248,9 +252,9 @@ int main(int argc, char **argv)
|
|
248
252
|
if (insertLength[cat - 1] < 0) {
|
249
253
|
velvetLog("Invalid insert length: %lli\n",
|
250
254
|
(long long) insertLength[cat - 1]);
|
251
|
-
#ifdef DEBUG
|
255
|
+
#ifdef DEBUG
|
252
256
|
abort();
|
253
|
-
#endif
|
257
|
+
#endif
|
254
258
|
exit(1);
|
255
259
|
}
|
256
260
|
} else if (strncmp(arg, "-ins_length", 11) == 0) {
|
@@ -258,9 +262,9 @@ int main(int argc, char **argv)
|
|
258
262
|
cat = (Category) short_var;
|
259
263
|
if (cat < 1 || cat > CATEGORIES) {
|
260
264
|
velvetLog("Unknown option: %s\n", arg);
|
261
|
-
#ifdef DEBUG
|
265
|
+
#ifdef DEBUG
|
262
266
|
abort();
|
263
|
-
#endif
|
267
|
+
#endif
|
264
268
|
exit(1);
|
265
269
|
}
|
266
270
|
sscanf(argv[arg_index], "%lli", &longlong_var);
|
@@ -268,9 +272,9 @@ int main(int argc, char **argv)
|
|
268
272
|
if (std_dev[cat - 1] < 0) {
|
269
273
|
velvetLog("Invalid std deviation: %lli\n",
|
270
274
|
(long long) std_dev[cat - 1]);
|
271
|
-
#ifdef DEBUG
|
275
|
+
#ifdef DEBUG
|
272
276
|
abort();
|
273
|
-
#endif
|
277
|
+
#endif
|
274
278
|
exit(1);
|
275
279
|
}
|
276
280
|
} else if (strcmp(arg, "-read_trkg") == 0) {
|
@@ -353,6 +357,12 @@ int main(int argc, char **argv)
|
|
353
357
|
exit(1);
|
354
358
|
}
|
355
359
|
shadows[cat - 1] = (strcmp(argv[arg_index], "yes") == 0);
|
360
|
+
} else if (strcmp(arg,"-clip_tips") == 0){
|
361
|
+
if (strcmp(argv[arg_index], "no") == 0)
|
362
|
+
doClipTips = false;
|
363
|
+
} else if (strcmp(arg,"-tour_bus") == 0){
|
364
|
+
if (strcmp(argv[arg_index], "no") == 0)
|
365
|
+
doTourBus = false;
|
356
366
|
} else if (strcmp(arg, "--help") == 0) {
|
357
367
|
printUsage();
|
358
368
|
return 0;
|
@@ -452,7 +462,8 @@ int main(int argc, char **argv)
|
|
452
462
|
|
453
463
|
sequenceLengths =
|
454
464
|
getSequenceLengths(sequences, getWordLength(graph));
|
455
|
-
|
465
|
+
if (doTourBus)
|
466
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
456
467
|
exportGraph(graphFilename, graph, sequences->tSequences);
|
457
468
|
} else if ((file = fopen(preGraphFilename, "r")) != NULL) {
|
458
469
|
fclose(file);
|
@@ -468,7 +479,8 @@ int main(int argc, char **argv)
|
|
468
479
|
roadmapFilename, readTracking, accelerationBits);
|
469
480
|
sequenceLengths =
|
470
481
|
getSequenceLengths(sequences, getWordLength(graph));
|
471
|
-
|
482
|
+
if (doTourBus)
|
483
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
472
484
|
exportGraph(graphFilename, graph, sequences->tSequences);
|
473
485
|
} else if ((file = fopen(roadmapFilename, "r")) != NULL) {
|
474
486
|
fclose(file);
|
@@ -509,7 +521,7 @@ int main(int argc, char **argv)
|
|
509
521
|
}
|
510
522
|
preGraph = newPreGraph_pg(rdmaps, seqReadInfo);
|
511
523
|
concatenatePreGraph_pg(preGraph);
|
512
|
-
if (!conserveLong)
|
524
|
+
if (!conserveLong && doClipTips)
|
513
525
|
clipTips_pg(preGraph);
|
514
526
|
exportPreGraph_pg(preGraphFilename, preGraph);
|
515
527
|
destroyPreGraph_pg(preGraph);
|
@@ -523,13 +535,14 @@ int main(int argc, char **argv)
|
|
523
535
|
roadmapFilename, readTracking, accelerationBits);
|
524
536
|
sequenceLengths =
|
525
537
|
getSequenceLengths(sequences, getWordLength(graph));
|
526
|
-
|
538
|
+
if (doTourBus)
|
539
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
527
540
|
exportGraph(graphFilename, graph, sequences->tSequences);
|
528
541
|
} else {
|
529
542
|
velvetLog("No Roadmap file to build upon! Please run velveth (see manual)\n");
|
530
|
-
#ifdef DEBUG
|
543
|
+
#ifdef DEBUG
|
531
544
|
abort();
|
532
|
-
#endif
|
545
|
+
#endif
|
533
546
|
exit(1);
|
534
547
|
}
|
535
548
|
|
@@ -553,11 +566,11 @@ int main(int argc, char **argv)
|
|
553
566
|
coverageCutoff = expectedCoverage / 2;
|
554
567
|
estimateCutoff = true;
|
555
568
|
}
|
556
|
-
} else {
|
569
|
+
} else {
|
557
570
|
estimateCoverage = false;
|
558
|
-
if (coverageCutoff < 0 && estimateCutoff)
|
571
|
+
if (coverageCutoff < 0 && estimateCutoff)
|
559
572
|
coverageCutoff = estimated_cov(graph, directory) / 2;
|
560
|
-
else
|
573
|
+
else
|
561
574
|
estimateCutoff = false;
|
562
575
|
}
|
563
576
|
|
@@ -580,7 +593,7 @@ int main(int argc, char **argv)
|
|
580
593
|
if (minContigLength < 2 * getWordLength(graph))
|
581
594
|
minContigKmerLength = getWordLength(graph);
|
582
595
|
else
|
583
|
-
minContigKmerLength = minContigLength - getWordLength(graph) + 1;
|
596
|
+
minContigKmerLength = minContigLength - getWordLength(graph) + 1;
|
584
597
|
|
585
598
|
dubious =
|
586
599
|
removeLowCoverageNodesAndDenounceDubiousReads(graph,
|
@@ -599,7 +612,8 @@ int main(int argc, char **argv)
|
|
599
612
|
lowCovContigsFilename);
|
600
613
|
|
601
614
|
removeHighCoverageNodes(graph, maxCoverageCutoff, exportFilteredNodes, minContigKmerLength, highCovContigsFilename);
|
602
|
-
|
615
|
+
if (doClipTips)
|
616
|
+
clipTipsHard(graph, conserveLong);
|
603
617
|
|
604
618
|
if (sequences->readCount > 0 && sequences->categories[0] == REFERENCE)
|
605
619
|
removeLowArcs(graph, coverageCutoff);
|
@@ -635,7 +649,7 @@ int main(int argc, char **argv)
|
|
635
649
|
strcpy(graphFilename, directory);
|
636
650
|
strcat(graphFilename, "/contigs.fa");
|
637
651
|
sequenceLengths = getSequenceLengths(sequences, getWordLength(graph));
|
638
|
-
exportLongNodeSequences(graphFilename, graph, minContigKmerLength, sequences, sequenceLengths, coverageMask);
|
652
|
+
exportLongNodeSequences(graphFilename, graph, minContigKmerLength, sequences, sequenceLengths, coverageMask);
|
639
653
|
|
640
654
|
if (exportAlignments) {
|
641
655
|
strcpy(graphFilename, directory);
|
@@ -663,9 +677,9 @@ int main(int argc, char **argv)
|
|
663
677
|
if (unusedReads)
|
664
678
|
exportUnusedReads(graph, sequences, minContigKmerLength, directory);
|
665
679
|
|
666
|
-
if (estimateCoverage)
|
680
|
+
if (estimateCoverage)
|
667
681
|
velvetLog("Estimated Coverage = %f\n", expectedCoverage);
|
668
|
-
if (estimateCutoff)
|
682
|
+
if (estimateCutoff)
|
669
683
|
velvetLog("Estimated Coverage cutoff = %f\n", coverageCutoff);
|
670
684
|
|
671
685
|
logFinalStats(graph, minContigKmerLength, directory);
|
@@ -673,25 +687,25 @@ int main(int argc, char **argv)
|
|
673
687
|
if (clean > 0) {
|
674
688
|
strcpy(graphFilename, directory);
|
675
689
|
strcat(graphFilename, "/Roadmaps");
|
676
|
-
remove(graphFilename);
|
690
|
+
remove(graphFilename);
|
677
691
|
|
678
692
|
strcpy(graphFilename, directory);
|
679
693
|
strcat(graphFilename, "/LastGraph");
|
680
|
-
remove(graphFilename);
|
681
|
-
}
|
694
|
+
remove(graphFilename);
|
695
|
+
}
|
682
696
|
|
683
697
|
if (clean > 1) {
|
684
698
|
strcpy(graphFilename, directory);
|
685
699
|
strcat(graphFilename, "/Sequences");
|
686
|
-
remove(graphFilename);
|
700
|
+
remove(graphFilename);
|
687
701
|
|
688
702
|
strcpy(graphFilename, directory);
|
689
703
|
strcat(graphFilename, "/Graph2");
|
690
|
-
remove(graphFilename);
|
704
|
+
remove(graphFilename);
|
691
705
|
|
692
706
|
strcpy(graphFilename, directory);
|
693
707
|
strcat(graphFilename, "/Graph");
|
694
|
-
remove(graphFilename);
|
708
|
+
remove(graphFilename);
|
695
709
|
}
|
696
710
|
|
697
711
|
free(sequenceLengths);
|
data/ext/src/src/utility.c
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
Copyright 2009 John Marshall (jm18@sanger.ac.uk)
|
2
|
+
Copyright 2009 John Marshall (jm18@sanger.ac.uk)
|
3
3
|
|
4
4
|
This file is part of Velvet.
|
5
5
|
|
@@ -89,14 +89,15 @@ void exitErrorf(int exitStatus, boolean showErrno, const char *format, ...)
|
|
89
89
|
fprintf(stderr, "\n");
|
90
90
|
va_end(args);
|
91
91
|
|
92
|
-
#ifdef DEBUG
|
92
|
+
#ifdef DEBUG
|
93
93
|
abort();
|
94
|
-
#endif
|
94
|
+
#endif
|
95
95
|
exit(exitStatus);
|
96
96
|
}
|
97
97
|
|
98
98
|
void velvetLog(const char *format, ...)
|
99
99
|
{
|
100
|
+
/* Don't print anything as it interferes with code bound through bioruby-velvet_underground
|
100
101
|
static boolean timeIsSet = false;
|
101
102
|
static struct timeval tvStart;
|
102
103
|
struct timeval tvNow;
|
@@ -117,14 +118,14 @@ void velvetLog(const char *format, ...)
|
|
117
118
|
vprintf(format, args);
|
118
119
|
va_end(args);
|
119
120
|
|
120
|
-
#ifdef DEBUG
|
121
|
+
#ifdef DEBUG
|
121
122
|
fflush(stdout);
|
122
|
-
#endif
|
123
|
+
#endif*/
|
123
124
|
}
|
124
125
|
|
125
|
-
void velvetFprintf(FILE * file, const char * format, ...)
|
126
|
+
void velvetFprintf(FILE * file, const char * format, ...)
|
126
127
|
{
|
127
|
-
|
128
|
+
va_list args;
|
128
129
|
|
129
130
|
va_start(args, format);
|
130
131
|
if (vfprintf(file, format, args) < 0) {
|
@@ -132,9 +133,9 @@ void velvetFprintf(FILE * file, const char * format, ...)
|
|
132
133
|
fprintf(stderr, "%s: ", programName);
|
133
134
|
fprintf(stderr, "Could not write into file\n");
|
134
135
|
va_end(args);
|
135
|
-
#ifdef DEBUG
|
136
|
+
#ifdef DEBUG
|
136
137
|
abort();
|
137
|
-
#endif
|
138
|
+
#endif
|
138
139
|
exit(EXIT_FAILURE);
|
139
140
|
}
|
140
141
|
va_end(args);
|