bio-velvet_underground 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
data/ext/src/src/graphStats.c
CHANGED
@@ -522,7 +522,7 @@ void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads)
|
|
522
522
|
}
|
523
523
|
|
524
524
|
velvetFprintf(outfile, "\t%li", (long) markerCount(node));
|
525
|
-
printShortCounts(outfile, node, graph, reads);
|
525
|
+
printShortCounts(outfile, node, graph, reads);
|
526
526
|
|
527
527
|
velvetFprintf(outfile, "\n");
|
528
528
|
}
|
@@ -579,7 +579,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
579
579
|
if (destination == NULL)
|
580
580
|
return;
|
581
581
|
|
582
|
-
// Eliminate those that point to uniquely strain sequences
|
582
|
+
// Eliminate those that point to uniquely strain sequences
|
583
583
|
if (nodeGenomicMultiplicity(destination, firstStrain) != 1) {
|
584
584
|
// velvetLog("Multiple genome reads\n");
|
585
585
|
return;
|
@@ -600,7 +600,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
|
|
600
600
|
if (destination == destination2)
|
601
601
|
return;
|
602
602
|
|
603
|
-
// Eliminate those that point to uniquely strain sequences
|
603
|
+
// Eliminate those that point to uniquely strain sequences
|
604
604
|
if (isOnlyGenome(destination2, firstStrain))
|
605
605
|
return;
|
606
606
|
|
@@ -776,7 +776,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
776
776
|
if (regions) {
|
777
777
|
lastRegion->next = newMask(stops[indexStop]);
|
778
778
|
lastRegion = lastRegion->next;
|
779
|
-
} else {
|
779
|
+
} else {
|
780
780
|
regions = newMask(stops[indexStop]);
|
781
781
|
lastRegion = regions;
|
782
782
|
}
|
@@ -803,7 +803,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
|
|
803
803
|
if (regions) {
|
804
804
|
lastRegion->next = newMask(stops[indexStop]);
|
805
805
|
lastRegion = lastRegion->next;
|
806
|
-
} else {
|
806
|
+
} else {
|
807
807
|
regions = newMask(stops[indexStop]);
|
808
808
|
lastRegion = regions;
|
809
809
|
}
|
@@ -837,7 +837,7 @@ static int compareCoords(const void * A, const void * B) {
|
|
837
837
|
|
838
838
|
static void sortCoords(Coordinate * array, IDnum length) {
|
839
839
|
qsort(array, (size_t) length, sizeof(Coordinate), compareCoords);
|
840
|
-
}
|
840
|
+
}
|
841
841
|
|
842
842
|
static void getShortReadCoords(Coordinate * starts, Coordinate * stops, Node * node, Graph * graph, ShortLength * readLengths) {
|
843
843
|
ShortReadMarker * markers = getNodeReads(node, graph);
|
@@ -918,7 +918,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
918
918
|
gap = getGap(node, graph);
|
919
919
|
for (position = 0; position < WORDLENGTH; position++) {
|
920
920
|
if (position % 60 == 0 && position > 0)
|
921
|
-
velvetFprintf(outfile, "\n");
|
921
|
+
velvetFprintf(outfile, "\n");
|
922
922
|
nucleotide = getNucleotideChar(position, tString);
|
923
923
|
velvetFprintf(outfile, "%c", nucleotide);
|
924
924
|
}
|
@@ -938,7 +938,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
|
|
938
938
|
mask->finish) {
|
939
939
|
next = mask->next;
|
940
940
|
deallocateMask(mask);
|
941
|
-
mask = next;
|
941
|
+
mask = next;
|
942
942
|
}
|
943
943
|
|
944
944
|
if (gap
|
@@ -1047,7 +1047,7 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1047
1047
|
Node * nodeB = *((Node **) B);
|
1048
1048
|
double covA;
|
1049
1049
|
double covB;
|
1050
|
-
|
1050
|
+
|
1051
1051
|
if (getNodeLength(nodeA) == 0)
|
1052
1052
|
nodeA = NULL;
|
1053
1053
|
|
@@ -1063,8 +1063,8 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1063
1063
|
return -1;
|
1064
1064
|
|
1065
1065
|
// Deal with real coverage numbers:
|
1066
|
-
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
-
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1066
|
+
covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
|
1067
|
+
covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
|
1068
1068
|
|
1069
1069
|
if (covA > covB)
|
1070
1070
|
return 1;
|
@@ -1075,14 +1075,14 @@ int compareNodeCovs(const void * A, const void * B) {
|
|
1075
1075
|
|
1076
1076
|
double estimated_cov(Graph * graph, char * directory)
|
1077
1077
|
{
|
1078
|
-
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1078
|
+
Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
|
1079
1079
|
IDnum index;
|
1080
1080
|
Coordinate halfTotalLength = 0;
|
1081
1081
|
Coordinate sumLength = 0;
|
1082
1082
|
Node *node;
|
1083
1083
|
char *logFilename =
|
1084
1084
|
mallocOrExit(strlen(directory) + 100, char);
|
1085
|
-
char *statsLine =
|
1085
|
+
char *statsLine =
|
1086
1086
|
mallocOrExit(5000, char);
|
1087
1087
|
FILE *logFile;
|
1088
1088
|
|
@@ -1152,7 +1152,7 @@ static boolean terminalReferenceMarker(Node * node, ReadSet * reads) {
|
|
1152
1152
|
|
1153
1153
|
static boolean hasReferenceMarker(Node * node, ReadSet * reads) {
|
1154
1154
|
PassageMarkerI marker;
|
1155
|
-
|
1155
|
+
|
1156
1156
|
for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker))
|
1157
1157
|
if (reads->categories[getAbsolutePassMarkerSeqID(marker) - 1] == REFERENCE)
|
1158
1158
|
return true;
|
@@ -1229,14 +1229,14 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1229
1229
|
IDnum index;
|
1230
1230
|
Node *node;
|
1231
1231
|
boolean denounceReads = readStartsAreActivated(graph);
|
1232
|
-
boolean *res = NULL;
|
1232
|
+
boolean *res = NULL;
|
1233
1233
|
FILE * outfile = NULL;
|
1234
1234
|
|
1235
1235
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1236
|
-
|
1236
|
+
|
1237
1237
|
if (denounceReads)
|
1238
1238
|
res = callocOrExit(sequenceCount(graph), boolean);
|
1239
|
-
|
1239
|
+
|
1240
1240
|
if (export) {
|
1241
1241
|
outfile = fopen(filename, "w");
|
1242
1242
|
|
@@ -1255,7 +1255,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1255
1255
|
if (getNodeLength(node) == 0)
|
1256
1256
|
continue;
|
1257
1257
|
|
1258
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1258
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1259
1259
|
&& !hasReferenceMarker(node, reads))
|
1260
1260
|
removeNodeAndDenounceDubiousReads(graph,
|
1261
1261
|
node,
|
@@ -1273,7 +1273,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1273
1273
|
if (getNodeLength(node) == 0)
|
1274
1274
|
continue;
|
1275
1275
|
|
1276
|
-
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1276
|
+
if (getTotalCoverage(node) / getNodeLength(node) < minCov
|
1277
1277
|
&& !terminalReferenceMarker(node, reads))
|
1278
1278
|
removeNodeAndDenounceDubiousReads(graph,
|
1279
1279
|
node,
|
@@ -1296,7 +1296,7 @@ static Coordinate getLongCoverage(Node * node) {
|
|
1296
1296
|
|
1297
1297
|
for (marker = getMarker(node); marker; marker = getNextInNode(marker))
|
1298
1298
|
total += getPassageMarkerLength(marker);
|
1299
|
-
|
1299
|
+
|
1300
1300
|
return total;
|
1301
1301
|
}
|
1302
1302
|
|
@@ -1313,7 +1313,7 @@ void removeLowCoverageReferenceNodes(Graph * graph, double minCov, double minLon
|
|
1313
1313
|
if (getNodeLength(node) == 0)
|
1314
1314
|
continue;
|
1315
1315
|
|
1316
|
-
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1316
|
+
if ((getTotalCoverage(node) / getNodeLength(node) < minCov
|
1317
1317
|
|| getLongCoverage(node) / getNodeLength(node) < minLongCov)
|
1318
1318
|
&& hasReferenceMarker(node, reads)) {
|
1319
1319
|
destroyNodePassageMarkers(graph, node);
|
@@ -1342,7 +1342,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1342
1342
|
return;
|
1343
1343
|
|
1344
1344
|
velvetLog("Removing contigs with coverage < %f...\n", minCov);
|
1345
|
-
|
1345
|
+
|
1346
1346
|
if (export) {
|
1347
1347
|
outfile = fopen(filename, "a");
|
1348
1348
|
|
@@ -1360,7 +1360,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1360
1360
|
if (getNodeLength(node) == 0)
|
1361
1361
|
continue;
|
1362
1362
|
|
1363
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1363
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1364
1364
|
&& !hasReferenceMarker(node, reads))
|
1365
1365
|
removeNodeAndDenounceDubiousReads(graph,
|
1366
1366
|
node,
|
@@ -1378,7 +1378,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
|
|
1378
1378
|
if (getNodeLength(node) == 0)
|
1379
1379
|
continue;
|
1380
1380
|
|
1381
|
-
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1381
|
+
if (getLongCoverage(node) / getNodeLength(node) < minCov
|
1382
1382
|
&& !terminalReferenceMarker(node, reads))
|
1383
1383
|
removeNodeAndDenounceDubiousReads(graph,
|
1384
1384
|
node,
|
@@ -1404,7 +1404,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1404
1404
|
return;
|
1405
1405
|
|
1406
1406
|
velvetLog("Applying an upper coverage cutoff of %f...\n", maxCov);
|
1407
|
-
|
1407
|
+
|
1408
1408
|
if (export) {
|
1409
1409
|
outfile = fopen(filename, "w");
|
1410
1410
|
|
@@ -1423,7 +1423,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
|
|
1423
1423
|
&& getTotalCoverage(node) / getNodeLength(node) > maxCov) {
|
1424
1424
|
destroyNodePassageMarkers(graph, node);
|
1425
1425
|
|
1426
|
-
if (export && getNodeLength(node) > minLength)
|
1426
|
+
if (export && getNodeLength(node) > minLength)
|
1427
1427
|
exportLongNodeSequence(outfile, node, graph, NULL, NULL, -1);
|
1428
1428
|
|
1429
1429
|
destroyNode(node, graph);
|
@@ -1784,7 +1784,7 @@ Coordinate totalAssemblyLength(Graph * graph)
|
|
1784
1784
|
return total;
|
1785
1785
|
}
|
1786
1786
|
|
1787
|
-
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1787
|
+
IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
1788
1788
|
{
|
1789
1789
|
IDnum res = 0;
|
1790
1790
|
boolean * used = callocOrExit(sequenceCount(graph) + 1, boolean);
|
@@ -1798,16 +1798,16 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1798
1798
|
node = getNodeInGraph(graph, nodeID);
|
1799
1799
|
if (node == NULL || getNodeLength(node) < minContigLength)
|
1800
1800
|
continue;
|
1801
|
-
|
1801
|
+
|
1802
1802
|
// Long reads
|
1803
1803
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1804
1804
|
readID = getPassageMarkerSequenceID(marker);
|
1805
1805
|
if (readID < 0)
|
1806
1806
|
readID = -readID;
|
1807
|
-
used[readID] = true;
|
1808
|
-
}
|
1807
|
+
used[readID] = true;
|
1808
|
+
}
|
1809
1809
|
|
1810
|
-
// Short reads
|
1810
|
+
// Short reads
|
1811
1811
|
if (!readStartsAreActivated(graph))
|
1812
1812
|
continue;
|
1813
1813
|
|
@@ -1816,23 +1816,23 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
|
|
1816
1816
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1817
1817
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1818
1818
|
readID = getShortReadMarkerID(shortReadMarker);
|
1819
|
-
used[readID] = true;
|
1819
|
+
used[readID] = true;
|
1820
1820
|
}
|
1821
|
-
|
1821
|
+
|
1822
1822
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1823
1823
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1824
1824
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1825
1825
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1826
1826
|
readID = getShortReadMarkerID(shortReadMarker);
|
1827
|
-
used[readID] = true;
|
1827
|
+
used[readID] = true;
|
1828
1828
|
}
|
1829
1829
|
}
|
1830
1830
|
|
1831
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1831
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1832
1832
|
if (used[readID])
|
1833
1833
|
res++;
|
1834
1834
|
|
1835
|
-
free(used);
|
1835
|
+
free(used);
|
1836
1836
|
|
1837
1837
|
return res;
|
1838
1838
|
}
|
@@ -1841,7 +1841,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1841
1841
|
{
|
1842
1842
|
char *logFilename =
|
1843
1843
|
mallocOrExit(strlen(directory) + 100, char);
|
1844
|
-
char *statsLine =
|
1844
|
+
char *statsLine =
|
1845
1845
|
mallocOrExit(5000, char);
|
1846
1846
|
FILE *logFile;
|
1847
1847
|
|
@@ -1860,7 +1860,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
|
|
1860
1860
|
(long) sequenceCount(graph));
|
1861
1861
|
|
1862
1862
|
velvetFprintf(logFile, "%s", statsLine);
|
1863
|
-
velvetFprintf(stdout, "%s", statsLine);
|
1863
|
+
//velvetFprintf(stdout, "%s", statsLine); //don't print to stdout when part of a library
|
1864
1864
|
|
1865
1865
|
fclose(logFile);
|
1866
1866
|
free(logFilename);
|
@@ -1888,16 +1888,16 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1888
1888
|
node = getNodeInGraph(graph, nodeID);
|
1889
1889
|
if (node == NULL || getNodeLength(node) < minContigKmerLength)
|
1890
1890
|
continue;
|
1891
|
-
|
1891
|
+
|
1892
1892
|
// Long reads
|
1893
1893
|
for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
|
1894
1894
|
readID = getPassageMarkerSequenceID(marker);
|
1895
1895
|
if (readID < 0)
|
1896
1896
|
readID = -readID;
|
1897
|
-
used[readID] = true;
|
1898
|
-
}
|
1897
|
+
used[readID] = true;
|
1898
|
+
}
|
1899
1899
|
|
1900
|
-
// Short reads
|
1900
|
+
// Short reads
|
1901
1901
|
if (!readStartsAreActivated(graph))
|
1902
1902
|
continue;
|
1903
1903
|
|
@@ -1906,31 +1906,31 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
|
|
1906
1906
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1907
1907
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1908
1908
|
readID = getShortReadMarkerID(shortReadMarker);
|
1909
|
-
used[readID] = true;
|
1909
|
+
used[readID] = true;
|
1910
1910
|
}
|
1911
|
-
|
1911
|
+
|
1912
1912
|
shortReadArray = getNodeReads(getTwinNode(node), graph);
|
1913
1913
|
shortReadCount = getNodeReadCount(getTwinNode(node), graph);
|
1914
1914
|
for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
|
1915
1915
|
shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
|
1916
1916
|
readID = getShortReadMarkerID(shortReadMarker);
|
1917
|
-
used[readID] = true;
|
1917
|
+
used[readID] = true;
|
1918
1918
|
}
|
1919
1919
|
}
|
1920
1920
|
|
1921
|
-
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1921
|
+
for (readID = 1; readID <= sequenceCount(graph); readID++)
|
1922
1922
|
if (!used[readID])
|
1923
1923
|
exportTightString(outfile, getTightStringInArray(reads->tSequences, readID - 1), readID);
|
1924
1924
|
|
1925
1925
|
free(outFilename);
|
1926
|
-
free(used);
|
1926
|
+
free(used);
|
1927
1927
|
fclose(outfile);
|
1928
1928
|
}
|
1929
1929
|
|
1930
1930
|
static IDnum getReferenceCount(ReadSet * reads) {
|
1931
1931
|
IDnum index;
|
1932
1932
|
|
1933
|
-
for (index = 0; index < reads->readCount; index++)
|
1933
|
+
for (index = 0; index < reads->readCount; index++)
|
1934
1934
|
if (reads->categories[index] != REFERENCE)
|
1935
1935
|
break;
|
1936
1936
|
|
@@ -1979,7 +1979,7 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1979
1979
|
start = longlongvar;
|
1980
1980
|
sscanf(strtok(NULL, ":-\r\n"), "%lli", &longlongvar);
|
1981
1981
|
finish = longlongvar;
|
1982
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1982
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
1983
1983
|
if (start <= finish) {
|
1984
1984
|
strcpy(refCoords[refIndex].name, name);
|
1985
1985
|
refCoords[refIndex].start = start;
|
@@ -1998,17 +1998,17 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
|
|
1998
1998
|
}
|
1999
1999
|
|
2000
2000
|
strcpy(name, line + 1);
|
2001
|
-
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2001
|
+
refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
|
2002
2002
|
strcpy(refCoords[refIndex].name, name);
|
2003
2003
|
refCoords[refIndex].start = 1;
|
2004
2004
|
refCoords[refIndex].finish = -1;
|
2005
2005
|
refCoords[refIndex].positive_strand = true;
|
2006
2006
|
}
|
2007
2007
|
if (++refIndex == referenceCount)
|
2008
|
-
break;
|
2008
|
+
break;
|
2009
2009
|
}
|
2010
2010
|
}
|
2011
|
-
|
2011
|
+
|
2012
2012
|
fclose(file);
|
2013
2013
|
}
|
2014
2014
|
return refCoords;
|
@@ -2020,23 +2020,23 @@ typedef struct refMap_st {
|
|
2020
2020
|
IDnum refID;
|
2021
2021
|
IDnum refStart;
|
2022
2022
|
IDnum refFinish;
|
2023
|
-
} ATTRIBUTE_PACKED ReferenceMapping;
|
2023
|
+
} ATTRIBUTE_PACKED ReferenceMapping;
|
2024
2024
|
|
2025
2025
|
static int compareReferenceMappings(const void * A, const void * B) {
|
2026
2026
|
ReferenceMapping * refMapA = (ReferenceMapping *) A;
|
2027
2027
|
ReferenceMapping * refMapB = (ReferenceMapping *) B;
|
2028
|
-
|
2028
|
+
|
2029
2029
|
if (refMapA->start < refMapB->start)
|
2030
2030
|
return -1;
|
2031
2031
|
else if (refMapA->start == refMapB->start)
|
2032
2032
|
return 0;
|
2033
|
-
else
|
2033
|
+
else
|
2034
2034
|
return 1;
|
2035
2035
|
}
|
2036
2036
|
|
2037
2037
|
static void initializeReferenceMapping(ReferenceMapping * refMap, PassageMarkerI marker, Node * node) {
|
2038
2038
|
refMap->start = getStartOffset(marker);
|
2039
|
-
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2039
|
+
refMap->finish = getNodeLength(node) - getFinishOffset(marker);
|
2040
2040
|
refMap->refID = getPassageMarkerSequenceID(marker);
|
2041
2041
|
refMap->refStart = getPassageMarkerStart(marker);
|
2042
2042
|
refMap->refFinish = getPassageMarkerFinish(marker);
|
@@ -2046,7 +2046,7 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2046
2046
|
ReferenceCoord * refCoord;
|
2047
2047
|
Coordinate start, finish;
|
2048
2048
|
|
2049
|
-
if (mapping->refID > 0)
|
2049
|
+
if (mapping->refID > 0)
|
2050
2050
|
refCoord = &refCoords[mapping->refID - 1];
|
2051
2051
|
else
|
2052
2052
|
refCoord = &refCoords[-mapping->refID - 1];
|
@@ -2064,13 +2064,13 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
|
|
2064
2064
|
start = refCoord->start + mapping->refStart + wordLength - 1;
|
2065
2065
|
finish = refCoord->start + mapping->refFinish + 1;
|
2066
2066
|
} else {
|
2067
|
-
start = refCoord->finish - mapping->refStart;
|
2068
|
-
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2067
|
+
start = refCoord->finish - mapping->refStart;
|
2068
|
+
finish = refCoord->finish - mapping->refFinish + wordLength;
|
2069
2069
|
}
|
2070
2070
|
}
|
2071
|
-
|
2071
|
+
|
2072
2072
|
velvetFprintf(file, "%lli\t%lli\t%s\t%lli\t%lli\n",
|
2073
|
-
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2073
|
+
(long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
|
2074
2074
|
refCoord->name, (long long) start, (long long) finish);
|
2075
2075
|
}
|
2076
2076
|
|
@@ -2089,7 +2089,7 @@ static void exportLongNodeMapping(FILE * outfile, Node * node, ReadSet * reads,
|
|
2089
2089
|
velvetFprintf(outfile, ">contig_%li\n", (long) getNodeID(node));
|
2090
2090
|
|
2091
2091
|
// Create table
|
2092
|
-
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2092
|
+
referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
|
2093
2093
|
|
2094
2094
|
// Initialize table
|
2095
2095
|
referenceCount = 0;
|
@@ -2115,9 +2115,9 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2115
2115
|
IDnum nodeIndex, refIndex;
|
2116
2116
|
Node *node;
|
2117
2117
|
ReferenceCoord * refCoords;
|
2118
|
-
IDnum referenceCount = getReferenceCount(reads);
|
2118
|
+
IDnum referenceCount = getReferenceCount(reads);
|
2119
2119
|
|
2120
|
-
if (referenceCount == 0)
|
2120
|
+
if (referenceCount == 0)
|
2121
2121
|
return;
|
2122
2122
|
|
2123
2123
|
refCoords = collectReferenceCoords(seqReadInfo, referenceCount);
|
@@ -2135,7 +2135,7 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
|
|
2135
2135
|
|
2136
2136
|
if (node == NULL || getNodeLength(node) < minLength)
|
2137
2137
|
continue;
|
2138
|
-
|
2138
|
+
|
2139
2139
|
exportLongNodeMapping(outfile, node, reads, refCoords, getWordLength(graph));
|
2140
2140
|
}
|
2141
2141
|
|
data/ext/src/src/run.c
CHANGED
@@ -39,7 +39,7 @@ static void printUsage()
|
|
39
39
|
printf("\thash_length\t: EITHER an odd integer (if even, it will be decremented) <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
40
40
|
printf("\t\t\t: OR: m,M,s where m and M are odd integers (if not, they will be decremented) with m < M <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
41
41
|
puts("\t\t\t\tand s is a step (even number). Velvet will then hash from k=m to k=M with a step of s");
|
42
|
-
puts("\tfilename\t: path to sequence file or - for standard input");
|
42
|
+
puts("\tfilename\t: path to sequence file or - for standard input");
|
43
43
|
puts("");
|
44
44
|
puts("File format options:");
|
45
45
|
puts("\t-fasta\t-fastq\t-raw\t-fasta.gz\t-fastq.gz\t-raw.gz\t-sam\t-bam\t-fmtAuto");
|
@@ -52,7 +52,7 @@ static void printUsage()
|
|
52
52
|
puts("Read type options:");
|
53
53
|
puts("\t-short\t-shortPaired");
|
54
54
|
#if CATEGORIES <= 5
|
55
|
-
Category cat;
|
55
|
+
Category cat;
|
56
56
|
for (cat = 2; cat <= CATEGORIES; cat++)
|
57
57
|
printf("\t-short%i\t-shortPaired%i\n", cat, cat);
|
58
58
|
#else
|
@@ -93,7 +93,7 @@ static void printUsage()
|
|
93
93
|
puts("\t\t[Both files are picked up by graph, so please leave them there]");
|
94
94
|
}
|
95
95
|
|
96
|
-
int
|
96
|
+
int velveth(int argc, char **argv)
|
97
97
|
{
|
98
98
|
ReadSet *allSequences = NULL;
|
99
99
|
SplayTable *splayTable;
|
@@ -161,18 +161,18 @@ int main(int argc, char **argv)
|
|
161
161
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
162
162
|
hashLength, MAXKMERLENGTH);
|
163
163
|
hashLength = MAXKMERLENGTH;
|
164
|
-
}
|
164
|
+
}
|
165
165
|
if (hashLength <= 0) {
|
166
166
|
velvetLog("Invalid hash length: %s\n", argv[2]);
|
167
167
|
printUsage();
|
168
168
|
return 0;
|
169
|
-
}
|
169
|
+
}
|
170
170
|
if (hashLength % 2 == 0) {
|
171
171
|
velvetLog
|
172
172
|
("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
|
173
173
|
hashLength, hashLength - 1);
|
174
174
|
hashLength--;
|
175
|
-
}
|
175
|
+
}
|
176
176
|
|
177
177
|
if (multiple_kmers) {
|
178
178
|
if (hashLengthMax > MAXKMERLENGTH + 1) {
|
@@ -180,12 +180,12 @@ int main(int argc, char **argv)
|
|
180
180
|
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
181
181
|
hashLengthMax, MAXKMERLENGTH + 1);
|
182
182
|
hashLengthMax = MAXKMERLENGTH + 1;
|
183
|
-
}
|
183
|
+
}
|
184
184
|
if (hashLengthMax <= hashLength) {
|
185
185
|
velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
|
186
186
|
printUsage();
|
187
187
|
return 0;
|
188
|
-
}
|
188
|
+
}
|
189
189
|
|
190
190
|
if (hashLengthStep <= 0) {
|
191
191
|
velvetLog("Non-positive hash length! Setting it to 2\n");
|
@@ -215,7 +215,7 @@ int main(int argc, char **argv)
|
|
215
215
|
sprintf(buf,"%s_%d",argv[1],h);
|
216
216
|
directory = mallocOrExit(strlen(buf) + 100, char);
|
217
217
|
strcpy(directory,buf);
|
218
|
-
} else
|
218
|
+
} else
|
219
219
|
directory = argv[1];
|
220
220
|
|
221
221
|
filename = mallocOrExit(strlen(directory) + 100, char);
|