bio-velvet_underground 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4af6bceb99a775604acc0b87e641f42834bfe86f
4
- data.tar.gz: bc8d0a17768ec2f94fd304c98a58505229b2a955
3
+ metadata.gz: 696cf2a0e11ae0c6848fe4d717e1add4e1452370
4
+ data.tar.gz: 3b88bb295352c038eddeb118835e88596b66641b
5
5
  SHA512:
6
- metadata.gz: f652e8333d0e2fa600bab4a9f3c9a0dc5bb4670b0a9caef3cbbf2999f3435068e22bd65b41c43671910a6eaf808af70a2afa5e4787aa1d90cda347c96ec3a510
7
- data.tar.gz: d00da35ed8e0926392a60ce93251d293ccb032803fb3e0b0ec81a9477ff7d6f96a452a75caa36e36a97d765e7aff33821bc73a354c2b74c6c73fae9c87001f3d
6
+ metadata.gz: 464490f17d6ab3a98c47c4a4fe7f7dcd08359ba02284986a45119106d8a29da620c1417c1af0d1615199fc912adb0331fdf66242968f5b63f8e2cc03cdf4e1f2
7
+ data.tar.gz: a0331bd42caecc5adbb2fde466e2c8ea6de71db07c621f844a8ccc9456e78ef13ea1ce41073052bd574e32655676c057d3854a44c9875bedad1314c322746345
data/README.md CHANGED
@@ -20,12 +20,10 @@ Running velvet returns a `Result` object, which is effectively a pointer to a ve
20
20
  ```ruby
21
21
  require 'bio-velvet_underground'
22
22
 
23
- #kmer 29, '-short my.fasta' the argument to velveth, no special arguments given to velvetg.
24
- result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'')
25
- result.result_directory #=> path to temporary directory, containing velvet generated files e.g. contigs.fna
26
-
23
+ # Run assembly with kmer 29, '-short my.fasta' the arguments to velveth (not including kmer and directory),
24
+ # no special arguments given to velvetg.
27
25
  # A pre-defined velvet result directory:
28
- result = Bio::Velvet::Runner.new.velvet(29,"-short my.fasta",'',:output_assembly_path => '/path/to/result')
26
+ result = Bio::Velvet::Runner.new.velvet(29, %w(-short my.fasta),[],:output_assembly_path => '/path/to/result')
29
27
  result.result_directory #=> '/path/to/result'
30
28
  ```
31
29
  With the magic of Ruby-FFI, the library with the smallest kmer size >= 29 is chosen (in this case 31).
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/ext/src/Makefile CHANGED
@@ -3,8 +3,8 @@ CFLAGS = -Wall
3
3
  DEBUG = -g
4
4
  LIBS = -lm
5
5
  OPT = -O3
6
- MAXKMERLENGTH=31
7
- CATEGORIES=2
6
+ MAXKMERLENGTH?=31
7
+ CATEGORIES?=2
8
8
  DEF = -D MAXKMERLENGTH=$(MAXKMERLENGTH) -D CATEGORIES=$(CATEGORIES)
9
9
  PDFLATEX_VERSION := $(shell pdflatex --version 2> /dev/null)
10
10
 
@@ -20,14 +20,14 @@ endif
20
20
 
21
21
  ifdef VBIGASSEMBLY
22
22
  override DEF := $(DEF) -D BIGASSEMBLY -D VBIGASSEMBLY
23
- endif
23
+ endif
24
24
 
25
25
 
26
26
  ifdef LONGSEQUENCES
27
27
  override DEF := $(DEF) -D LONGSEQUENCES
28
28
  endif
29
29
 
30
- # OpenMP
30
+ # OpenMP
31
31
  ifdef OPENMP
32
32
  override CFLAGS := $(CFLAGS) -fopenmp
33
33
  endif
@@ -39,24 +39,23 @@ endif
39
39
 
40
40
  OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
41
41
  OBJDBG = $(subst obj,obj/dbg,$(OBJ))
42
- OBJSHARED = $(subst obj,obj/shared,$(OBJ))
43
42
 
44
- default : zlib obj velveth velvetg
43
+ default : cleanobj zlib obj velveth velvetg doc
45
44
 
46
45
  clean : clean-zlib
47
- -rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
46
+ -rm obj/*.o obj/dbg/*.o ./velvet*
48
47
  -rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
49
48
  -rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
50
49
 
51
- cleanobj:
52
- -rm obj/*.o obj/dbg/*.o obj/shared/*.o
50
+ cleanobj:
51
+ -rm obj/*.o obj/dbg/*.o
53
52
 
54
53
  ifdef BUNDLEDZLIB
55
54
  Z_LIB_DIR=third-party/zlib-1.2.3
56
55
  Z_LIB_FILES=$(Z_LIB_DIR)/*.o
57
56
  override DEF := $(DEF) -D BUNDLEDZLIB
58
57
 
59
- zlib:
58
+ zlib:
60
59
  cd $(Z_LIB_DIR); ./configure; make; rm minigzip.o; rm example.o
61
60
 
62
61
  clean-zlib :
@@ -68,14 +67,14 @@ zlib :
68
67
  clean-zlib :
69
68
  endif
70
69
 
71
- velveth : obj
70
+ velveth : obj
72
71
  $(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velveth obj/tightString.o obj/run.o obj/recycleBin.o obj/splay.o obj/splayTable.o obj/readSet.o obj/binarySequences.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
73
72
 
74
73
 
75
74
  velvetg : obj
76
75
  $(CC) $(CFLAGS) $(OPT) $(LDFLAGS) -o velvetg obj/tightString.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/scaffold.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o $(Z_LIB_FILES) $(LIBS)
77
76
 
78
- debug : override DEF := $(DEF) -D DEBUG
77
+ debug : override DEF := $(DEF) -D DEBUG
79
78
  debug : cleanobj obj/dbg
80
79
  $(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velveth obj/dbg/tightString.o obj/dbg/run.o obj/dbg/recycleBin.o obj/dbg/splay.o obj/dbg/splayTable.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
81
80
  $(CC) $(CFLAGS) $(LDFLAGS) $(DEBUG) -o velvetg obj/dbg/tightString.o obj/dbg/graph.o obj/dbg/run2.o obj/dbg/fibHeap.o obj/dbg/fib.o obj/dbg/concatenatedGraph.o obj/dbg/passageMarker.o obj/dbg/graphStats.o obj/dbg/correctedGraph.o obj/dbg/dfib.o obj/dbg/dfibHeap.o obj/dbg/recycleBin.o obj/dbg/readSet.o obj/dbg/binarySequences.o obj/dbg/shortReadPairs.o obj/dbg/scaffold.o obj/dbg/locallyCorrectedGraph.o obj/dbg/graphReConstruction.o obj/dbg/roadMap.o obj/dbg/preGraph.o obj/dbg/preGraphConstruction.o obj/dbg/concatenatedPreGraph.o obj/dbg/readCoherentGraph.o obj/dbg/utility.o obj/dbg/kmer.o obj/dbg/kmerOccurenceTable.o obj/dbg/allocArray.o obj/dbg/autoOpen.o $(Z_LIB_FILES) $(LIBS)
@@ -93,25 +92,25 @@ colordebug : cleanobj obj/dbg_de
93
92
  objdir:
94
93
  mkdir -p obj
95
94
 
96
- obj: zlib objdir $(OBJ)
95
+ obj: zlib cleanobj objdir $(OBJ)
97
96
 
98
97
  obj_de: override DEF := $(DEF) -D COLOR
99
98
  obj_de: zlib cleanobj objdir $(OBJ)
100
99
 
101
- obj/dbgdir:
100
+ obj/dbgdir:
102
101
  mkdir -p obj/dbg
103
102
 
104
- obj/dbg: override DEF := $(DEF) -D DEBUG
103
+ obj/dbg: override DEF := $(DEF) -D DEBUG
105
104
  obj/dbg: zlib cleanobj obj/dbgdir $(OBJDBG)
106
105
 
107
106
  obj/dbg_de: override DEF := $(DEF) -D COLOR -D DEBUG
108
107
  obj/dbg_de: zlib cleanobj obj/dbgdir $(OBJDBG)
109
108
 
110
109
  obj/%.o: src/%.c
111
- $(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
110
+ $(CC) $(CFLAGS) $(OPT) $(DEF) -c $? -o $@
112
111
 
113
112
  obj/dbg/%.o: src/%.c
114
- $(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
113
+ $(CC) $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
115
114
 
116
115
  doc: Manual.pdf
117
116
 
@@ -124,15 +123,3 @@ endif
124
123
 
125
124
  test: velvetg velveth
126
125
  cd tests && ./run-tests.sh
127
-
128
- sharedobjdir:
129
- mkdir -p obj/shared
130
-
131
- obj/shared: sharedobjdir $(OBJSHARED)
132
-
133
- obj/shared/%.o: src/%.c
134
- $(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
135
-
136
- shared: zlib obj/shared
137
- cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o run.o run2.o
138
-
@@ -47,7 +47,7 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
47
47
  hasSinglePreArc_pg(getOtherEnd_pg
48
48
  (preArc, preNodeBID),
49
49
  preGraph)
50
- && !isLoop_pg(preArc)
50
+ && !isLoop_pg(preArc)
51
51
  && getDestination_pg(preArc, preNodeBID) != preNodeAID) {
52
52
 
53
53
  totalLength += getPreNodeLength_pg(preNodeBID, preGraph);
@@ -99,11 +99,11 @@ static void concatenatePreNodes(IDnum preNodeAID, PreArcI oldPreArc,
99
99
  appendDescriptors_pg(&ptr, &writeOffset ,currentPreNodeID, preGraph, false);
100
100
  }
101
101
 
102
- if (writeOffset != 0)
102
+ if (writeOffset != 0)
103
103
  while (writeOffset++ != 4)
104
104
  (*ptr) >>= 2;
105
105
 
106
- setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
106
+ setPreNodeDescriptor_pg(descr, totalLength - wordLength + 1, preNodeAID, preGraph);
107
107
 
108
108
  // Correct preArcs
109
109
  for (preArc = getPreArc_pg(preNodeBID, preGraph); preArc != NULL_IDX;
@@ -209,7 +209,7 @@ static boolean isEligibleTip(IDnum index, PreGraph * preGraph, Coordinate
209
209
  if (currentIndex == 0)
210
210
  return true;
211
211
 
212
- // Joined tips
212
+ // Joined tips
213
213
  if (simplePreArcCount_pg(-currentIndex, preGraph) < 2)
214
214
  return false;
215
215
 
@@ -764,7 +764,7 @@ static void remapNodeMarkersOntoNeighbour(Node * source,
764
764
  }
765
765
  }
766
766
 
767
- // Short read markers
767
+ // Short read markers
768
768
  if (readStartsAreActivated(graph)) {
769
769
  // Update Coordinates
770
770
  sourceArray = getNodeReads(source, graph);
@@ -1250,7 +1250,7 @@ static void foldSymmetricalNode(Node * node)
1250
1250
  twinNode = node;
1251
1251
  node = tmp;
1252
1252
  }
1253
- // Destroy link to old markers
1253
+ // Destroy link to old markers
1254
1254
  setMarker(node, NULL_IDX);
1255
1255
 
1256
1256
  // Reinsert markers properly
@@ -1951,7 +1951,7 @@ static void transferNodeData(Node * source, Node * target)
1951
1951
  if (getNode(fastPath) == twinSource)
1952
1952
  fastPath = getNextInSequence(fastPath);
1953
1953
 
1954
- // Next node
1954
+ // Next node
1955
1955
  if (source == activeNode) {
1956
1956
  activeNode = target;
1957
1957
  todo =
@@ -2013,7 +2013,7 @@ static void concatenateNodesAndVaccinate(Node * nodeA, Node * nodeB,
2013
2013
  // Read starts
2014
2014
  concatenateReadStarts(nodeA, nodeB, graph);
2015
2015
 
2016
- // Descriptor management
2016
+ // Descriptor management
2017
2017
  appendDescriptors(nodeA, nodeB);
2018
2018
 
2019
2019
  // Update uniqueness:
@@ -2167,7 +2167,7 @@ static void cleanUpRedundancy()
2167
2167
 
2168
2168
  //velvetLog("Concatenation\n");
2169
2169
 
2170
- // Freeing up memory
2170
+ // Freeing up memory
2171
2171
  if (slowMarker != NULL_IDX)
2172
2172
  concatenatePathNodes(slowPath);
2173
2173
  else
@@ -2460,7 +2460,7 @@ void clipTipsHard(Graph * graph, boolean conserveLong)
2460
2460
 
2461
2461
  if (current == NULL)
2462
2462
  continue;
2463
-
2463
+
2464
2464
  if (conserveLong && getMarker(current))
2465
2465
  continue;
2466
2466
 
@@ -2560,7 +2560,7 @@ void correctGraph(Graph * argGraph, ShortLength * argSequenceLengths, Category *
2560
2560
  eligibleStartingPoints = mallocOrExit(2 * nodes + 1, IDnum);
2561
2561
  progressStatus = callocOrExit(2 * nodes + 1, boolean);
2562
2562
  todoLists = callocOrExit(2 * nodes + 1, Ticket *);
2563
- //Done with memory
2563
+ //Done with memory
2564
2564
 
2565
2565
  resetNodeStatus(graph);
2566
2566
  determineEligibleStartingPoints();
@@ -2607,9 +2607,9 @@ void setMaxReadLength(int value)
2607
2607
  if (value < 0) {
2608
2608
  velvetLog("Negative branch length %i!\n", value);
2609
2609
  velvetLog("Exiting...\n");
2610
- #ifdef DEBUG
2610
+ #ifdef DEBUG
2611
2611
  abort();
2612
- #endif
2612
+ #endif
2613
2613
  exit(1);
2614
2614
  }
2615
2615
  MAXREADLENGTH = value;
@@ -2621,9 +2621,9 @@ void setMaxGaps(int value)
2621
2621
  if (value < 0) {
2622
2622
  velvetLog("Negative max gap count %i!\n", value);
2623
2623
  velvetLog("Exiting...\n");
2624
- #ifdef DEBUG
2624
+ #ifdef DEBUG
2625
2625
  abort();
2626
- #endif
2626
+ #endif
2627
2627
  exit(1);
2628
2628
  }
2629
2629
  MAXGAPS = value;
@@ -2635,9 +2635,9 @@ void setMaxDivergence(double value)
2635
2635
  velvetLog("Divergence rate %lf out of bounds [0,1]!\n",
2636
2636
  value);
2637
2637
  velvetLog("Exiting...\n");
2638
- #ifdef DEBUG
2638
+ #ifdef DEBUG
2639
2639
  abort();
2640
- #endif
2640
+ #endif
2641
2641
  exit(1);
2642
2642
  }
2643
2643
  MAXDIVERGENCE = value;
@@ -522,7 +522,7 @@ void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads)
522
522
  }
523
523
 
524
524
  velvetFprintf(outfile, "\t%li", (long) markerCount(node));
525
- printShortCounts(outfile, node, graph, reads);
525
+ printShortCounts(outfile, node, graph, reads);
526
526
 
527
527
  velvetFprintf(outfile, "\n");
528
528
  }
@@ -579,7 +579,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
579
579
  if (destination == NULL)
580
580
  return;
581
581
 
582
- // Eliminate those that point to uniquely strain sequences
582
+ // Eliminate those that point to uniquely strain sequences
583
583
  if (nodeGenomicMultiplicity(destination, firstStrain) != 1) {
584
584
  // velvetLog("Multiple genome reads\n");
585
585
  return;
@@ -600,7 +600,7 @@ void displayLocalBreakpoint(PassageMarkerI strainMarker,
600
600
  if (destination == destination2)
601
601
  return;
602
602
 
603
- // Eliminate those that point to uniquely strain sequences
603
+ // Eliminate those that point to uniquely strain sequences
604
604
  if (isOnlyGenome(destination2, firstStrain))
605
605
  return;
606
606
 
@@ -776,7 +776,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
776
776
  if (regions) {
777
777
  lastRegion->next = newMask(stops[indexStop]);
778
778
  lastRegion = lastRegion->next;
779
- } else {
779
+ } else {
780
780
  regions = newMask(stops[indexStop]);
781
781
  lastRegion = regions;
782
782
  }
@@ -803,7 +803,7 @@ static Mask * lowCoverageRegions(Coordinate * starts, Coordinate * stops, size_t
803
803
  if (regions) {
804
804
  lastRegion->next = newMask(stops[indexStop]);
805
805
  lastRegion = lastRegion->next;
806
- } else {
806
+ } else {
807
807
  regions = newMask(stops[indexStop]);
808
808
  lastRegion = regions;
809
809
  }
@@ -837,7 +837,7 @@ static int compareCoords(const void * A, const void * B) {
837
837
 
838
838
  static void sortCoords(Coordinate * array, IDnum length) {
839
839
  qsort(array, (size_t) length, sizeof(Coordinate), compareCoords);
840
- }
840
+ }
841
841
 
842
842
  static void getShortReadCoords(Coordinate * starts, Coordinate * stops, Node * node, Graph * graph, ShortLength * readLengths) {
843
843
  ShortReadMarker * markers = getNodeReads(node, graph);
@@ -918,7 +918,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
918
918
  gap = getGap(node, graph);
919
919
  for (position = 0; position < WORDLENGTH; position++) {
920
920
  if (position % 60 == 0 && position > 0)
921
- velvetFprintf(outfile, "\n");
921
+ velvetFprintf(outfile, "\n");
922
922
  nucleotide = getNucleotideChar(position, tString);
923
923
  velvetFprintf(outfile, "%c", nucleotide);
924
924
  }
@@ -938,7 +938,7 @@ static void exportLongNodeSequence(FILE * outfile, Node * node, Graph * graph, R
938
938
  mask->finish) {
939
939
  next = mask->next;
940
940
  deallocateMask(mask);
941
- mask = next;
941
+ mask = next;
942
942
  }
943
943
 
944
944
  if (gap
@@ -1047,7 +1047,7 @@ int compareNodeCovs(const void * A, const void * B) {
1047
1047
  Node * nodeB = *((Node **) B);
1048
1048
  double covA;
1049
1049
  double covB;
1050
-
1050
+
1051
1051
  if (getNodeLength(nodeA) == 0)
1052
1052
  nodeA = NULL;
1053
1053
 
@@ -1063,8 +1063,8 @@ int compareNodeCovs(const void * A, const void * B) {
1063
1063
  return -1;
1064
1064
 
1065
1065
  // Deal with real coverage numbers:
1066
- covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
1067
- covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
1066
+ covA = getTotalCoverage(nodeA) / (double) getNodeLength(nodeA);
1067
+ covB = getTotalCoverage(nodeB) / (double) getNodeLength(nodeB);
1068
1068
 
1069
1069
  if (covA > covB)
1070
1070
  return 1;
@@ -1075,14 +1075,14 @@ int compareNodeCovs(const void * A, const void * B) {
1075
1075
 
1076
1076
  double estimated_cov(Graph * graph, char * directory)
1077
1077
  {
1078
- Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
1078
+ Node ** nodeArray = callocOrExit(nodeCount(graph), Node*);
1079
1079
  IDnum index;
1080
1080
  Coordinate halfTotalLength = 0;
1081
1081
  Coordinate sumLength = 0;
1082
1082
  Node *node;
1083
1083
  char *logFilename =
1084
1084
  mallocOrExit(strlen(directory) + 100, char);
1085
- char *statsLine =
1085
+ char *statsLine =
1086
1086
  mallocOrExit(5000, char);
1087
1087
  FILE *logFile;
1088
1088
 
@@ -1152,7 +1152,7 @@ static boolean terminalReferenceMarker(Node * node, ReadSet * reads) {
1152
1152
 
1153
1153
  static boolean hasReferenceMarker(Node * node, ReadSet * reads) {
1154
1154
  PassageMarkerI marker;
1155
-
1155
+
1156
1156
  for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker))
1157
1157
  if (reads->categories[getAbsolutePassMarkerSeqID(marker) - 1] == REFERENCE)
1158
1158
  return true;
@@ -1229,14 +1229,14 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
1229
1229
  IDnum index;
1230
1230
  Node *node;
1231
1231
  boolean denounceReads = readStartsAreActivated(graph);
1232
- boolean *res = NULL;
1232
+ boolean *res = NULL;
1233
1233
  FILE * outfile = NULL;
1234
1234
 
1235
1235
  velvetLog("Removing contigs with coverage < %f...\n", minCov);
1236
-
1236
+
1237
1237
  if (denounceReads)
1238
1238
  res = callocOrExit(sequenceCount(graph), boolean);
1239
-
1239
+
1240
1240
  if (export) {
1241
1241
  outfile = fopen(filename, "w");
1242
1242
 
@@ -1255,7 +1255,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
1255
1255
  if (getNodeLength(node) == 0)
1256
1256
  continue;
1257
1257
 
1258
- if (getTotalCoverage(node) / getNodeLength(node) < minCov
1258
+ if (getTotalCoverage(node) / getNodeLength(node) < minCov
1259
1259
  && !hasReferenceMarker(node, reads))
1260
1260
  removeNodeAndDenounceDubiousReads(graph,
1261
1261
  node,
@@ -1273,7 +1273,7 @@ boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
1273
1273
  if (getNodeLength(node) == 0)
1274
1274
  continue;
1275
1275
 
1276
- if (getTotalCoverage(node) / getNodeLength(node) < minCov
1276
+ if (getTotalCoverage(node) / getNodeLength(node) < minCov
1277
1277
  && !terminalReferenceMarker(node, reads))
1278
1278
  removeNodeAndDenounceDubiousReads(graph,
1279
1279
  node,
@@ -1296,7 +1296,7 @@ static Coordinate getLongCoverage(Node * node) {
1296
1296
 
1297
1297
  for (marker = getMarker(node); marker; marker = getNextInNode(marker))
1298
1298
  total += getPassageMarkerLength(marker);
1299
-
1299
+
1300
1300
  return total;
1301
1301
  }
1302
1302
 
@@ -1313,7 +1313,7 @@ void removeLowCoverageReferenceNodes(Graph * graph, double minCov, double minLon
1313
1313
  if (getNodeLength(node) == 0)
1314
1314
  continue;
1315
1315
 
1316
- if ((getTotalCoverage(node) / getNodeLength(node) < minCov
1316
+ if ((getTotalCoverage(node) / getNodeLength(node) < minCov
1317
1317
  || getLongCoverage(node) / getNodeLength(node) < minLongCov)
1318
1318
  && hasReferenceMarker(node, reads)) {
1319
1319
  destroyNodePassageMarkers(graph, node);
@@ -1342,7 +1342,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
1342
1342
  return;
1343
1343
 
1344
1344
  velvetLog("Removing contigs with coverage < %f...\n", minCov);
1345
-
1345
+
1346
1346
  if (export) {
1347
1347
  outfile = fopen(filename, "a");
1348
1348
 
@@ -1360,7 +1360,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
1360
1360
  if (getNodeLength(node) == 0)
1361
1361
  continue;
1362
1362
 
1363
- if (getLongCoverage(node) / getNodeLength(node) < minCov
1363
+ if (getLongCoverage(node) / getNodeLength(node) < minCov
1364
1364
  && !hasReferenceMarker(node, reads))
1365
1365
  removeNodeAndDenounceDubiousReads(graph,
1366
1366
  node,
@@ -1378,7 +1378,7 @@ void removeLowLongCoverageNodesAndDenounceDubiousReads(Graph * graph,
1378
1378
  if (getNodeLength(node) == 0)
1379
1379
  continue;
1380
1380
 
1381
- if (getLongCoverage(node) / getNodeLength(node) < minCov
1381
+ if (getLongCoverage(node) / getNodeLength(node) < minCov
1382
1382
  && !terminalReferenceMarker(node, reads))
1383
1383
  removeNodeAndDenounceDubiousReads(graph,
1384
1384
  node,
@@ -1404,7 +1404,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
1404
1404
  return;
1405
1405
 
1406
1406
  velvetLog("Applying an upper coverage cutoff of %f...\n", maxCov);
1407
-
1407
+
1408
1408
  if (export) {
1409
1409
  outfile = fopen(filename, "w");
1410
1410
 
@@ -1423,7 +1423,7 @@ void removeHighCoverageNodes(Graph * graph, double maxCov, boolean export, Coord
1423
1423
  && getTotalCoverage(node) / getNodeLength(node) > maxCov) {
1424
1424
  destroyNodePassageMarkers(graph, node);
1425
1425
 
1426
- if (export && getNodeLength(node) > minLength)
1426
+ if (export && getNodeLength(node) > minLength)
1427
1427
  exportLongNodeSequence(outfile, node, graph, NULL, NULL, -1);
1428
1428
 
1429
1429
  destroyNode(node, graph);
@@ -1784,7 +1784,7 @@ Coordinate totalAssemblyLength(Graph * graph)
1784
1784
  return total;
1785
1785
  }
1786
1786
 
1787
- IDnum usedReads(Graph * graph, Coordinate minContigLength)
1787
+ IDnum usedReads(Graph * graph, Coordinate minContigLength)
1788
1788
  {
1789
1789
  IDnum res = 0;
1790
1790
  boolean * used = callocOrExit(sequenceCount(graph) + 1, boolean);
@@ -1798,16 +1798,16 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
1798
1798
  node = getNodeInGraph(graph, nodeID);
1799
1799
  if (node == NULL || getNodeLength(node) < minContigLength)
1800
1800
  continue;
1801
-
1801
+
1802
1802
  // Long reads
1803
1803
  for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
1804
1804
  readID = getPassageMarkerSequenceID(marker);
1805
1805
  if (readID < 0)
1806
1806
  readID = -readID;
1807
- used[readID] = true;
1808
- }
1807
+ used[readID] = true;
1808
+ }
1809
1809
 
1810
- // Short reads
1810
+ // Short reads
1811
1811
  if (!readStartsAreActivated(graph))
1812
1812
  continue;
1813
1813
 
@@ -1816,23 +1816,23 @@ IDnum usedReads(Graph * graph, Coordinate minContigLength)
1816
1816
  for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
1817
1817
  shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
1818
1818
  readID = getShortReadMarkerID(shortReadMarker);
1819
- used[readID] = true;
1819
+ used[readID] = true;
1820
1820
  }
1821
-
1821
+
1822
1822
  shortReadArray = getNodeReads(getTwinNode(node), graph);
1823
1823
  shortReadCount = getNodeReadCount(getTwinNode(node), graph);
1824
1824
  for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
1825
1825
  shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
1826
1826
  readID = getShortReadMarkerID(shortReadMarker);
1827
- used[readID] = true;
1827
+ used[readID] = true;
1828
1828
  }
1829
1829
  }
1830
1830
 
1831
- for (readID = 1; readID <= sequenceCount(graph); readID++)
1831
+ for (readID = 1; readID <= sequenceCount(graph); readID++)
1832
1832
  if (used[readID])
1833
1833
  res++;
1834
1834
 
1835
- free(used);
1835
+ free(used);
1836
1836
 
1837
1837
  return res;
1838
1838
  }
@@ -1841,7 +1841,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
1841
1841
  {
1842
1842
  char *logFilename =
1843
1843
  mallocOrExit(strlen(directory) + 100, char);
1844
- char *statsLine =
1844
+ char *statsLine =
1845
1845
  mallocOrExit(5000, char);
1846
1846
  FILE *logFile;
1847
1847
 
@@ -1860,7 +1860,7 @@ void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *director
1860
1860
  (long) sequenceCount(graph));
1861
1861
 
1862
1862
  velvetFprintf(logFile, "%s", statsLine);
1863
- //velvetFprintf(stdout, "%s", statsLine); //don't print to stdout when part of a library
1863
+ velvetFprintf(stdout, "%s", statsLine);
1864
1864
 
1865
1865
  fclose(logFile);
1866
1866
  free(logFilename);
@@ -1888,16 +1888,16 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
1888
1888
  node = getNodeInGraph(graph, nodeID);
1889
1889
  if (node == NULL || getNodeLength(node) < minContigKmerLength)
1890
1890
  continue;
1891
-
1891
+
1892
1892
  // Long reads
1893
1893
  for(marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) {
1894
1894
  readID = getPassageMarkerSequenceID(marker);
1895
1895
  if (readID < 0)
1896
1896
  readID = -readID;
1897
- used[readID] = true;
1898
- }
1897
+ used[readID] = true;
1898
+ }
1899
1899
 
1900
- // Short reads
1900
+ // Short reads
1901
1901
  if (!readStartsAreActivated(graph))
1902
1902
  continue;
1903
1903
 
@@ -1906,31 +1906,31 @@ void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLe
1906
1906
  for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
1907
1907
  shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
1908
1908
  readID = getShortReadMarkerID(shortReadMarker);
1909
- used[readID] = true;
1909
+ used[readID] = true;
1910
1910
  }
1911
-
1911
+
1912
1912
  shortReadArray = getNodeReads(getTwinNode(node), graph);
1913
1913
  shortReadCount = getNodeReadCount(getTwinNode(node), graph);
1914
1914
  for (shortReadIndex = 0; shortReadIndex < shortReadCount; shortReadIndex++) {
1915
1915
  shortReadMarker = getShortReadMarkerAtIndex(shortReadArray, shortReadIndex);
1916
1916
  readID = getShortReadMarkerID(shortReadMarker);
1917
- used[readID] = true;
1917
+ used[readID] = true;
1918
1918
  }
1919
1919
  }
1920
1920
 
1921
- for (readID = 1; readID <= sequenceCount(graph); readID++)
1921
+ for (readID = 1; readID <= sequenceCount(graph); readID++)
1922
1922
  if (!used[readID])
1923
1923
  exportTightString(outfile, getTightStringInArray(reads->tSequences, readID - 1), readID);
1924
1924
 
1925
1925
  free(outFilename);
1926
- free(used);
1926
+ free(used);
1927
1927
  fclose(outfile);
1928
1928
  }
1929
1929
 
1930
1930
  static IDnum getReferenceCount(ReadSet * reads) {
1931
1931
  IDnum index;
1932
1932
 
1933
- for (index = 0; index < reads->readCount; index++)
1933
+ for (index = 0; index < reads->readCount; index++)
1934
1934
  if (reads->categories[index] != REFERENCE)
1935
1935
  break;
1936
1936
 
@@ -1979,7 +1979,7 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
1979
1979
  start = longlongvar;
1980
1980
  sscanf(strtok(NULL, ":-\r\n"), "%lli", &longlongvar);
1981
1981
  finish = longlongvar;
1982
- refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
1982
+ refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
1983
1983
  if (start <= finish) {
1984
1984
  strcpy(refCoords[refIndex].name, name);
1985
1985
  refCoords[refIndex].start = start;
@@ -1998,17 +1998,17 @@ static ReferenceCoord * collectReferenceCoords(SequencesReader *seqReadInfo, IDn
1998
1998
  }
1999
1999
 
2000
2000
  strcpy(name, line + 1);
2001
- refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
2001
+ refCoords[refIndex].name = callocOrExit(strlen(name) + 1, char);
2002
2002
  strcpy(refCoords[refIndex].name, name);
2003
2003
  refCoords[refIndex].start = 1;
2004
2004
  refCoords[refIndex].finish = -1;
2005
2005
  refCoords[refIndex].positive_strand = true;
2006
2006
  }
2007
2007
  if (++refIndex == referenceCount)
2008
- break;
2008
+ break;
2009
2009
  }
2010
2010
  }
2011
-
2011
+
2012
2012
  fclose(file);
2013
2013
  }
2014
2014
  return refCoords;
@@ -2020,23 +2020,23 @@ typedef struct refMap_st {
2020
2020
  IDnum refID;
2021
2021
  IDnum refStart;
2022
2022
  IDnum refFinish;
2023
- } ATTRIBUTE_PACKED ReferenceMapping;
2023
+ } ATTRIBUTE_PACKED ReferenceMapping;
2024
2024
 
2025
2025
  static int compareReferenceMappings(const void * A, const void * B) {
2026
2026
  ReferenceMapping * refMapA = (ReferenceMapping *) A;
2027
2027
  ReferenceMapping * refMapB = (ReferenceMapping *) B;
2028
-
2028
+
2029
2029
  if (refMapA->start < refMapB->start)
2030
2030
  return -1;
2031
2031
  else if (refMapA->start == refMapB->start)
2032
2032
  return 0;
2033
- else
2033
+ else
2034
2034
  return 1;
2035
2035
  }
2036
2036
 
2037
2037
  static void initializeReferenceMapping(ReferenceMapping * refMap, PassageMarkerI marker, Node * node) {
2038
2038
  refMap->start = getStartOffset(marker);
2039
- refMap->finish = getNodeLength(node) - getFinishOffset(marker);
2039
+ refMap->finish = getNodeLength(node) - getFinishOffset(marker);
2040
2040
  refMap->refID = getPassageMarkerSequenceID(marker);
2041
2041
  refMap->refStart = getPassageMarkerStart(marker);
2042
2042
  refMap->refFinish = getPassageMarkerFinish(marker);
@@ -2046,7 +2046,7 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
2046
2046
  ReferenceCoord * refCoord;
2047
2047
  Coordinate start, finish;
2048
2048
 
2049
- if (mapping->refID > 0)
2049
+ if (mapping->refID > 0)
2050
2050
  refCoord = &refCoords[mapping->refID - 1];
2051
2051
  else
2052
2052
  refCoord = &refCoords[-mapping->refID - 1];
@@ -2064,13 +2064,13 @@ static void velvetFprintfReferenceMapping(FILE * file, ReferenceMapping * mappin
2064
2064
  start = refCoord->start + mapping->refStart + wordLength - 1;
2065
2065
  finish = refCoord->start + mapping->refFinish + 1;
2066
2066
  } else {
2067
- start = refCoord->finish - mapping->refStart;
2068
- finish = refCoord->finish - mapping->refFinish + wordLength;
2067
+ start = refCoord->finish - mapping->refStart;
2068
+ finish = refCoord->finish - mapping->refFinish + wordLength;
2069
2069
  }
2070
2070
  }
2071
-
2071
+
2072
2072
  velvetFprintf(file, "%lli\t%lli\t%s\t%lli\t%lli\n",
2073
- (long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
2073
+ (long long) mapping->start + 1, (long long) mapping->finish + wordLength - 1,
2074
2074
  refCoord->name, (long long) start, (long long) finish);
2075
2075
  }
2076
2076
 
@@ -2089,7 +2089,7 @@ static void exportLongNodeMapping(FILE * outfile, Node * node, ReadSet * reads,
2089
2089
  velvetFprintf(outfile, ">contig_%li\n", (long) getNodeID(node));
2090
2090
 
2091
2091
  // Create table
2092
- referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
2092
+ referenceMappings = callocOrExit(referenceCount, ReferenceMapping);
2093
2093
 
2094
2094
  // Initialize table
2095
2095
  referenceCount = 0;
@@ -2115,9 +2115,9 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
2115
2115
  IDnum nodeIndex, refIndex;
2116
2116
  Node *node;
2117
2117
  ReferenceCoord * refCoords;
2118
- IDnum referenceCount = getReferenceCount(reads);
2118
+ IDnum referenceCount = getReferenceCount(reads);
2119
2119
 
2120
- if (referenceCount == 0)
2120
+ if (referenceCount == 0)
2121
2121
  return;
2122
2122
 
2123
2123
  refCoords = collectReferenceCoords(seqReadInfo, referenceCount);
@@ -2135,7 +2135,7 @@ void exportLongNodeMappings(char *filename, Graph * graph, ReadSet * reads,
2135
2135
 
2136
2136
  if (node == NULL || getNodeLength(node) < minLength)
2137
2137
  continue;
2138
-
2138
+
2139
2139
  exportLongNodeMapping(outfile, node, reads, refCoords, getWordLength(graph));
2140
2140
  }
2141
2141