bio-velvet_underground 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
data/ext/src/src/graphStats.c
CHANGED
@@ -522,7 +522,7 @@ void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads)
|
|
522
522
|
}
|
523
523
|
|
524
524
|
velvetFprintf(outfile, "\t%li", (long) markerCount(node));
|
525
|
-
printShortCounts(outfile, node, graph, reads);
|
525
|
+
printShortCounts(outfile, node, graph, reads);
|
526
526
|
|
527
527
|
velvetFprintf(outfile, "\n");
|
528
528
|
}
|
@@ -579,7 +579,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
579
579
|
if (destination == NULL)
|
580
580
|
return;
|
581
581
|
|
582
|
-
// Eliminate those that point to uniquely strain sequences
|
582
|
+
// Eliminate those that point to uniquely strain sequences
|
583
583
|
if (nodeGenomicMultiplicity(destination, firstStrain) != 1) {
|
584
584
|
// velvetLog("Multiple genome reads\n");
|
585
585
|
return;
|
@@ -600,7 +600,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
600
600
|
if (destination == destination2)
|
601
601
|
return;
|
602
602
|
|
603
|
-
// Eliminate those that point to uniquely strain sequences
|
603
|
+
// Eliminate those that point to uniquely strain sequences
|
604
604
|
if (isOnlyGenome(destination2, firstStrain))
|
605
605
|
return;
|
606
606
|
|
@@ -776,7 +776,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
776
776
|
if (regions) {
|
777
777
|
lastRegion->next = newMask(stops[indexStop]);
|
778
778
|
lastRegion = lastRegion->next;
|
779
|
-
} else {
|
779
|
+
} else {
|
780
780
|
regions = newMask(stops[indexStop]);
|
781
781
|
lastRegion = regions;
|
782
782
|
}
|
@@ -803,7 +803,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
803
803
|
if (regions) {
|
804
804
|
lastRegion->next = newMask(stops[indexStop]);
|
805
805
|
lastRegion = lastRegion->next;
|
806
|
-
} else {
|
806
|
+
} else {
|
807
807
|
regions = newMask(stops[indexStop]);
|
808
808
|
lastRegion = regions;
|
809
809
|
}
|
@@ -837,7 +837,7 @@ static int compareCoords(const void * A, const void * B) {
|
|
837
837
|
|
838
838
|
static void sortCoords(Coordinate * array, IDnum length) {
|
839
839
|
qsort(array, (size_t) length, sizeof(Coordinate), compareCoords);
|
840
|
-
}
|
840
|
+
}
|
841
841
|
|
842
842
|
static void getShortReadCoords(Coordinate * starts, Coordinate * stops, Node * node, Graph * graph, ShortLength * readLengths) {
|
843
843
|
ShortReadMarker * markers = getNodeReads(node, graph);
|
@@ -918,7 +918,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
918
918
|
gap = getGap(node, graph);
|
919
919
|
for (position = 0; position < WORDLENGTH; position++) {
|
920
920
|
if (position % 60 == 0 && position > 0)
|
921
|
-
velvetFprintf(outfile, "\n");
|
921
|
+
velvetFprintf(outfile, "\n");
|
922
922
|
nucleotide = getNucleotideChar(position, tString);
|
923
923
|
velvetFprintf(outfile, "%c", nucleotide);
|
924
924
|
}
|
@@ -938,7 +938,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
938
938
|
mask->finish) {
|
939
939
|
next = mask->next;
|
940
940
|
deallocateMask(mask);
|
941
|
-
mask = next;
|
941
|
+
mask = next;
|
942
942
|
}
|
943
943
|
|
944
944
|
if (gap
|
@@ -1047,7 +1047,7 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1047
1047
|
Node * nodeB = *((Node **) B);
|
1048
1048
|
double covA;
|
1049
1049
|
double covB;
|
1050
|
-
|
1050
|
+
|
1051
1051
|
if (getNodeLength(nodeA) == 0)
|
1052
1052
|
nodeA = NULL;
|
1053
1053
|
|
@@ -1063,8 +1063,8 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1063
1063
|
return -1;
|
1064
1064
|
|
1065
1065
|
// Deal with real coverage numbers:
|
1066
|
-
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
-
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1066
|
+
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
+
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1068
1068
|
|
1069
1069
|
if (covA > covB)
|
1070
1070
|
return 1;
|
@@ -1075,14 +1075,14 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1075
1075
|
|
1076
1076
|
double estimated_cov(Graph * graph, char * directory)
|
1077
1077
|
{
|
1078
|
-
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1078
|
+
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1079
1079
|
IDnum index;
|
1080
1080
|
Coordinate halfTotalLength = 0;
|
1081
1081
|
Coordinate sumLength = 0;
|
1082
1082
|
Node *node;
|
1083
1083
|
char *logFilename =
|
1084
1084
|
mallocOrExit(strlen(directory) + 100, char);
|
1085
|
-
char *statsLine =
|
1085
|
+
char *statsLine =
|
1086
1086
|
mallocOrExit(5000, char);
|
1087
1087
|
FILE *logFile;
|
1088
1088
|
|
@@ -1152,7 +1152,7 @@ static boolean terminalReferenceMarker(Node * node, ReadSet * reads) {
|
|
1152
1152
|
|
1153
1153
|
static boolean hasReferenceMarker(Node * node, ReadSet * reads) {
|
1154
1154
|
PassageMarkerI marker;
|
1155
|
-
|
1155
|
+
|
1156
1156
|
for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker))
|
1157
1157
|
if (reads->categories[getAbsolutePassMarkerSeqID(marker) - 1] == REFERENCE)
|
1158
1158
|
return true;
|
@@ -1229,14 +1229,14 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1229
1229
|
IDnum index;
|
1230
1230
|
Node *node;
|
1231
1231
|
boolean denounceReads = readStartsAreActivated(graph);
|
1232
|
-
boolean *res = NULL;
|
1232
|
+
boolean *res = NULL;
|
1233
1233
|
FILE * outfile = NULL;
|
1234
1234
|
|
1235
1235
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1236
|
-
|
1236
|
+
|
1237
1237
|
if (denounceReads)
|
1238
1238
|
res = callocOrExit(sequenceCount(graph), boolean);
|
1239
|
-
|
1239
|
+
|
1240
1240
|
if (export) {
|
1241
1241
|
outfile = fopen(filename, "w");
|
1242
1242
|
|
@@ -1255,7 +1255,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1255
1255
|
if (getNodeLength(node) == 0)
|
1256
1256
|
continue;
|
1257
1257
|
|
1258
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1258
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1259
1259
|
&& !hasReferenceMarker(node, reads))
|
1260
1260
|
removeNodeAndDenounceDubiousReads(graph,
|
1261
1261
|
node,
|
@@ -1273,7 +1273,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1273
1273
|
if (getNodeLength(node) == 0)
|
1274
1274
|
continue;
|
1275
1275
|
|
1276
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1276
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1277
1277
|
&& !terminalReferenceMarker(node, reads))
|
1278
1278
|
removeNodeAndDenounceDubiousReads(graph,
|
1279
1279
|
node,
|
@@ -1296,7 +1296,7 @@ static Coordinate getLongCoverage(Node * node) {
|
|
1296
1296
|
|
1297
1297
|
for (marker = getMarker(node); marker; marker = getNextInNode(marker))
|
1298
1298
|
total += getPassageMarkerLength(marker);
|
1299
|
-
|
1299
|
+
|
1300
1300
|
return total;
|
1301
1301
|
}
|
1302
1302
|
|
@@ -1313,7 +1313,7 @@ void removeLowCoverageReferenceNodes(Graph * graph, double minCov, double minLon
|
|
1313
1313
|
if (getNodeLength(node) == 0)
|
1314
1314
|
continue;
|
1315
1315
|
|
1316
|
-
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1316
|
+
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1317
1317
|
|| getLongCoverage(node) / getNodeLength(node) < minLongCov)
|
1318
1318
|
&& hasReferenceMarker(node, reads)) {
|
1319
1319
|
destroyNodePassageMarkers(graph, node);
|
@@ -1342,7 +1342,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1342
1342
|
return;
|
1343
1343
|
|
1344
1344
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1345
|
-
|
1345
|
+
|
1346
1346
|
if (export) {
|
1347
1347
|
outfile = fopen(filename, "a");
|
1348
1348
|
|
@@ -1360,7 +1360,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1360
1360
|
if (getNodeLength(node) == 0)
|
1361
1361
|
continue;
|
1362
1362
|
|
1363
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1363
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1364
1364
|
&& !hasReferenceMarker(node, reads))
|
1365
1365
|
removeNodeAndDenounceDubiousReads(graph,
|
1366
1366
|
node,
|
@@ -1378,7 +1378,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1378
1378
|
if (getNodeLength(node) == 0)
|
1379
1379
|
continue;
|
1380
1380
|
|
1381
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1381
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1382
1382
|
&& !terminalReferenceMarker(node, reads))
|
1383
1383
|
removeNodeAndDenounceDubiousReads(graph,
|
1384
1384
|
node,
|
@@ -1404,7 +1404,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1404
1404
|
return;
|
1405
1405
|
|
1406
1406
|
velvetLog("Applying an upper coverage cutoff of %f...\n", maxCov);
|
1407
|
-
|
1407
|
+
|
1408
1408
|
if (export) {
|
1409
1409
|
outfile = fopen(filename, "w");
|
1410
1410
|
|
@@ -1423,7 +1423,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1423
1423
|
&& getTotalCoverage(node) / getNodeLength(node) > maxCov) {
|
1424
1424
|
destroyNodePassageMarkers(graph, node);
|
1425
1425
|
|
1426
|
-
if (export && getNodeLength(node) > minLength)
|
1426
|
+
if (export && getNodeLength(node) > minLength)
|
1427
1427
|
exportLongNodeSequence(outfile, node, graph, NULL, NULL, -1);
|
1428
1428
|
|
1429
1429
|
destroyNode(node, graph);
|
@@ -1784,7 +1784,7 @@ Coordinate totalAssemblyLength(Graph * graph)
|
|
1784
1784
|
return total;
|
1785
1785
|
}
|
1786
1786
|
|
1787
|
-
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1787
|
+
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1788
1788
|
{
|
1789
1789
|
IDnum res = 0;
|
1790
1790
|
boolean * used = callocOrExit(sequenceCount(graph) + 1, boolean);
|
@@ -1798,16 +1798,16 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1798
1798
|
node = getNodeInGraph(graph, nodeID);
|
1799
1799
|
if (node == NULL || getNodeLength(node) < minContigLength)
|
1800
1800
|
continue;
|
1801
|
-
|
1801
|
+
|
1802
1802
|
// Long reads
|
1803
1803
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1804
1804
|
readID = getPassageMarkerSequenceID(marker);
|
1805
1805
|
if (readID < 0)
|
1806
1806
|
readID = -readID;
|
1807
|
-
used[readID] = true;
|
1808
|
-
}
|
1807
|
+
used[readID] = true;
|
1808
|
+
}
|
1809
1809
|
|
1810
|
-
// Short reads
|
1810
|
+
// Short reads
|
1811
1811
|
if (!readStartsAreActivated(graph))
|
1812
1812
|
continue;
|
1813
1813
|
|
@@ -1816,23 +1816,23 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1816
1816
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1817
1817
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1818
1818
|
readID = getShortReadMarkerID(shortReadMarker);
|
1819
|
-
used[readID] = true;
|
1819
|
+
used[readID] = true;
|
1820
1820
|
}
|
1821
|
-
|
1821
|
+
|
1822
1822
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1823
1823
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1824
1824
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1825
1825
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1826
1826
|
readID = getShortReadMarkerID(shortReadMarker);
|
1827
|
-
used[readID] = true;
|
1827
|
+
used[readID] = true;
|
1828
1828
|
}
|
1829
1829
|
}
|
1830
1830
|
|
1831
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1831
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1832
1832
|
if (used[readID])
|
1833
1833
|
res++;
|
1834
1834
|
|
1835
|
-
free(used);
|
1835
|
+
free(used);
|
1836
1836
|
|
1837
1837
|
return res;
|
1838
1838
|
}
|
@@ -1841,7 +1841,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1841
1841
|
{
|
1842
1842
|
char *logFilename =
|
1843
1843
|
mallocOrExit(strlen(directory) + 100, char);
|
1844
|
-
char *statsLine =
|
1844
|
+
char *statsLine =
|
1845
1845
|
mallocOrExit(5000, char);
|
1846
1846
|
FILE *logFile;
|
1847
1847
|
|
@@ -1860,7 +1860,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1860
1860
|
(long) sequenceCount(graph));
|
1861
1861
|
|
1862
1862
|
velvetFprintf(logFile, "%s", statsLine);
|
1863
|
-
velvetFprintf(stdout, "%s", statsLine);
|
1863
|
+
//velvetFprintf(stdout, "%s", statsLine); //don't print to stdout when part of a library
|
1864
1864
|
|
1865
1865
|
fclose(logFile);
|
1866
1866
|
free(logFilename);
|
@@ -1888,16 +1888,16 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1888
1888
|
node = getNodeInGraph(graph, nodeID);
|
1889
1889
|
if (node == NULL || getNodeLength(node) < minContigKmerLength)
|
1890
1890
|
continue;
|
1891
|
-
|
1891
|
+
|
1892
1892
|
// Long reads
|
1893
1893
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1894
1894
|
readID = getPassageMarkerSequenceID(marker);
|
1895
1895
|
if (readID < 0)
|
1896
1896
|
readID = -readID;
|
1897
|
-
used[readID] = true;
|
1898
|
-
}
|
1897
|
+
used[readID] = true;
|
1898
|
+
}
|
1899
1899
|
|
1900
|
-
// Short reads
|
1900
|
+
// Short reads
|
1901
1901
|
if (!readStartsAreActivated(graph))
|
1902
1902
|
continue;
|
1903
1903
|
|
@@ -1906,31 +1906,31 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1906
1906
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1907
1907
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1908
1908
|
readID = getShortReadMarkerID(shortReadMarker);
|
1909
|
-
used[readID] = true;
|
1909
|
+
used[readID] = true;
|
1910
1910
|
}
|
1911
|
-
|
1911
|
+
|
1912
1912
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1913
1913
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1914
1914
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1915
1915
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1916
1916
|
readID = getShortReadMarkerID(shortReadMarker);
|
1917
|
-
used[readID] = true;
|
1917
|
+
used[readID] = true;
|
1918
1918
|
}
|
1919
1919
|
}
|
1920
1920
|
|
1921
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1921
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1922
1922
|
if (!used[readID])
|
1923
1923
|
exportTightString(outfile, getTightStringInArray(reads->tSequences, readID - 1), readID);
|
1924
1924
|
|
1925
1925
|
free(outFilename);
|
1926
|
-
free(used);
|
1926
|
+
free(used);
|
1927
1927
|
fclose(outfile);
|
1928
1928
|
}
|
1929
1929
|
|
1930
1930
|
static IDnum getReferenceCount(ReadSet * reads) {
|
1931
1931
|
IDnum index;
|
1932
1932
|
|
1933
|
-
for (index = 0; index < reads->readCount; index++)
|
1933
|
+
for (index = 0; index < reads->readCount; index++)
|
1934
1934
|
if (reads->categories[index] != REFERENCE)
|
1935
1935
|
break;
|
1936
1936
|
|
@@ -1979,7 +1979,7 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1979
1979
|
start = longlongvar;
|
1980
1980
|
sscanf(strtok(NULL, ":-\r\n"), "%lli", &longlongvar);
|
1981
1981
|
finish = longlongvar;
|
1982
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1982
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1983
1983
|
if (start <= finish) {
|
1984
1984
|
strcpy(refCoords[refIndex].name, name);
|
1985
1985
|
refCoords[refIndex].start = start;
|
@@ -1998,17 +1998,17 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1998
1998
|
}
|
1999
1999
|
|
2000
2000
|
strcpy(name, line + 1);
|
2001
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2001
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2002
2002
|
strcpy(refCoords[refIndex].name, name);
|
2003
2003
|
refCoords[refIndex].start = 1;
|
2004
2004
|
refCoords[refIndex].finish = -1;
|
2005
2005
|
refCoords[refIndex].positive_strand = true;
|
2006
2006
|
}
|
2007
2007
|
if (++refIndex == referenceCount)
|
2008
|
-
break;
|
2008
|
+
break;
|
2009
2009
|
}
|
2010
2010
|
}
|
2011
|
-
|
2011
|
+
|
2012
2012
|
fclose(file);
|
2013
2013
|
}
|
2014
2014
|
return refCoords;
|
@@ -2020,23 +2020,23 @@ typedef struct refMap_st {
|
|
2020
2020
|
IDnum refID;
|
2021
2021
|
IDnum refStart;
|
2022
2022
|
IDnum refFinish;
|
2023
|
-
} ATTRIBUTE_PACKED ReferenceMapping;
|
2023
|
+
} ATTRIBUTE_PACKED ReferenceMapping;
|
2024
2024
|
|
2025
2025
|
static int compareReferenceMappings(const void * A, const void * B) {
|
2026
2026
|
ReferenceMapping * refMapA = (ReferenceMapping *) A;
|
2027
2027
|
ReferenceMapping * refMapB = (ReferenceMapping *) B;
|
2028
|
-
|
2028
|
+
|
2029
2029
|
if (refMapA->start < refMapB->start)
|
2030
2030
|
return -1;
|
2031
2031
|
else if (refMapA->start == refMapB->start)
|
2032
2032
|
return 0;
|
2033
|
-
else
|
2033
|
+
else
|
2034
2034
|
return 1;
|
2035
2035
|
}
|
2036
2036
|
|
2037
2037
|
static void initializeReferenceMapping(ReferenceMapping * refMap, PassageMarkerI marker, Node * node) {
|
2038
2038
|
refMap->start = getStartOffset(marker);
|
2039
|
-
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2039
|
+
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2040
2040
|
refMap->refID = getPassageMarkerSequenceID(marker);
|
2041
2041
|
refMap->refStart = getPassageMarkerStart(marker);
|
2042
2042
|
refMap->refFinish = getPassageMarkerFinish(marker);
|
@@ -2046,7 +2046,7 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2046
2046
|
ReferenceCoord * refCoord;
|
2047
2047
|
Coordinate start, finish;
|
2048
2048
|
|
2049
|
-
if (mapping->refID > 0)
|
2049
|
+
if (mapping->refID > 0)
|
2050
2050
|
refCoord = &refCoords[mapping->refID - 1];
|
2051
2051
|
else
|
2052
2052
|
refCoord = &refCoords[-mapping->refID - 1];
|
@@ -2064,13 +2064,13 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2064
2064
|
start = refCoord->start + mapping->refStart + wordLength - 1;
|
2065
2065
|
finish = refCoord->start + mapping->refFinish + 1;
|
2066
2066
|
} else {
|
2067
|
-
start = refCoord->finish - mapping->refStart;
|
2068
|
-
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2067
|
+
start = refCoord->finish - mapping->refStart;
|
2068
|
+
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2069
2069
|
}
|
2070
2070
|
}
|
2071
|
-
|
2071
|
+
|
2072
2072
|
velvetFprintf(file, "%lli\t%lli\t%s\t%lli\t%lli\n",
|
2073
|
-
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2073
|
+
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2074
2074
|
refCoord->name, (long long) start, (long long) finish);
|
2075
2075
|
}
|
2076
2076
|
|
@@ -2089,7 +2089,7 @@ static void exportLongNodeMapping(FILE * outfile, Node * node, ReadSet * reads,
|
|
2089
2089
|
velvetFprintf(outfile, ">contig_%li\n", (long) getNodeID(node));
|
2090
2090
|
|
2091
2091
|
// Create table
|
2092
|
-
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2092
|
+
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2093
2093
|
|
2094
2094
|
// Initialize table
|
2095
2095
|
referenceCount = 0;
|
@@ -2115,9 +2115,9 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2115
2115
|
IDnum nodeIndex, refIndex;
|
2116
2116
|
Node *node;
|
2117
2117
|
ReferenceCoord * refCoords;
|
2118
|
-
IDnum referenceCount = getReferenceCount(reads);
|
2118
|
+
IDnum referenceCount = getReferenceCount(reads);
|
2119
2119
|
|
2120
|
-
if (referenceCount == 0)
|
2120
|
+
if (referenceCount == 0)
|
2121
2121
|
return;
|
2122
2122
|
|
2123
2123
|
refCoords = collectReferenceCoords(seqReadInfo, referenceCount);
|
@@ -2135,7 +2135,7 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2135
2135
|
|
2136
2136
|
if (node == NULL || getNodeLength(node) < minLength)
|
2137
2137
|
continue;
|
2138
|
-
|
2138
|
+
|
2139
2139
|
exportLongNodeMapping(outfile, node, reads, refCoords, getWordLength(graph));
|
2140
2140
|
}
|
2141
2141
|
|
data/ext/src/src/run.c
CHANGED
@@ -39,7 +39,7 @@ static void printUsage()
|
|
39
39
|
printf("\thash_length\t: EITHER an odd integer (if even, it will be decremented) <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
40
40
|
printf("\t\t\t: OR: m,M,s where m and M are odd integers (if not, they will be decremented) with m < M <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
41
41
|
puts("\t\t\t\tand s is a step (even number). Velvet will then hash from k=m to k=M with a step of s");
|
42
|
-
puts("\tfilename\t: path to sequence file or - for standard input");
|
42
|
+
puts("\tfilename\t: path to sequence file or - for standard input");
|
43
43
|
puts("");
|
44
44
|
puts("File format options:");
|
45
45
|
puts("\t-fasta\t-fastq\t-raw\t-fasta.gz\t-fastq.gz\t-raw.gz\t-sam\t-bam\t-fmtAuto");
|
@@ -52,7 +52,7 @@ static void printUsage()
|
|
52
52
|
puts("Read type options:");
|
53
53
|
puts("\t-short\t-shortPaired");
|
54
54
|
#if CATEGORIES <= 5
|
55
|
-
Category cat;
|
55
|
+
Category cat;
|
56
56
|
for (cat = 2; cat <= CATEGORIES; cat++)
|
57
57
|
printf("\t-short%i\t-shortPaired%i\n", cat, cat);
|
58
58
|
#else
|
@@ -93,7 +93,7 @@ static void printUsage()
|
|
93
93
|
puts("\t\t[Both files are picked up by graph, so please leave them there]");
|
94
94
|
}
|
95
95
|
|
96
|
-
int
|
96
|
+
int velveth(int argc, char **argv)
|
97
97
|
{
|
98
98
|
ReadSet *allSequences = NULL;
|
99
99
|
SplayTable *splayTable;
|
@@ -161,18 +161,18 @@ int main(int argc, char **argv)
|
|
161
161
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
162
162
|
hashLength, MAXKMERLENGTH);
|
163
163
|
hashLength = MAXKMERLENGTH;
|
164
|
-
}
|
164
|
+
}
|
165
165
|
if (hashLength <= 0) {
|
166
166
|
velvetLog("Invalid hash length: %s\n", argv[2]);
|
167
167
|
printUsage();
|
168
168
|
return 0;
|
169
|
-
}
|
169
|
+
}
|
170
170
|
if (hashLength % 2 == 0) {
|
171
171
|
velvetLog
|
172
172
|
("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
|
173
173
|
hashLength, hashLength - 1);
|
174
174
|
hashLength--;
|
175
|
-
}
|
175
|
+
}
|
176
176
|
|
177
177
|
if (multiple_kmers) {
|
178
178
|
if (hashLengthMax > MAXKMERLENGTH + 1) {
|
@@ -180,12 +180,12 @@ int main(int argc, char **argv)
|
|
180
180
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
181
181
|
hashLengthMax, MAXKMERLENGTH + 1);
|
182
182
|
hashLengthMax = MAXKMERLENGTH + 1;
|
183
|
-
}
|
183
|
+
}
|
184
184
|
if (hashLengthMax <= hashLength) {
|
185
185
|
velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
|
186
186
|
printUsage();
|
187
187
|
return 0;
|
188
|
-
}
|
188
|
+
}
|
189
189
|
|
190
190
|
if (hashLengthStep <= 0) {
|
191
191
|
velvetLog("Non-positive hash length! Setting it to 2\n");
|
@@ -215,7 +215,7 @@ int main(int argc, char **argv)
|
|
215
215
|
sprintf(buf,"%s_%d",argv[1],h);
|
216
216
|
directory = mallocOrExit(strlen(buf) + 100, char);
|
217
217
|
strcpy(directory,buf);
|
218
|
-
} else
|
218
|
+
} else
|
219
219
|
directory = argv[1];
|
220
220
|
|
221
221
|
filename = mallocOrExit(strlen(directory) + 100, char);
|