finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,3932 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+
25
+ #include "globals.h"
26
+ #include "graph.h"
27
+ #include "recycleBin.h"
28
+ #include "tightString.h"
29
+ #include "passageMarker.h"
30
+ #include "utility.h"
31
+ #include "kmer.h"
32
+
33
+ #include "graphStructures.h"
34
+
35
+ #define ADENINE 0
36
+ #define CYTOSINE 1
37
+ #define GUANINE 2
38
+ #define THYMINE 3
39
+
40
+ static RecycleBin *arcMemory = NULL;
41
+ static RecycleBin *nodeMemory = NULL;
42
+ static RecycleBin *gapMarkerMemory = NULL;
43
+
44
+ #define BLOCKSIZE 50
45
+ #define GAPBLOCKSIZE 10000
46
+
47
+ Arc *allocateArc()
48
+ {
49
+ if (arcMemory == NULL)
50
+ arcMemory = newRecycleBin(sizeof(Arc), BLOCKSIZE);
51
+
52
+ return allocatePointer(arcMemory);
53
+ }
54
+
55
+ void deallocateArc(Arc * arc)
56
+ {
57
+ deallocatePointer(arcMemory, arc);
58
+ }
59
+
60
+ Node *allocateNode()
61
+ {
62
+ if (nodeMemory == NULL)
63
+ nodeMemory = newRecycleBin(sizeof(Node), BLOCKSIZE);
64
+
65
+ return (Node *) allocatePointer(nodeMemory);
66
+ }
67
+
68
+ void deallocateNode(Node * node)
69
+ {
70
+ deallocatePointer(nodeMemory, node);
71
+ }
72
+
73
+ // Returns the twin node of a given node
74
+ Node *getTwinNode(Node * node)
75
+ {
76
+ return node->twinNode;
77
+ }
78
+
79
+ // Inserts new passage marker in the marker list of destination node
80
+ void insertPassageMarker(PassageMarkerI marker, Node * destination)
81
+ {
82
+ setTopOfTheNode(marker);
83
+ setNextInNode(marker, destination->marker);
84
+ destination->marker = marker;
85
+ }
86
+
87
+ // Returns the length of the node's descriptor list
88
+ Coordinate getNodeLength(Node * node)
89
+ {
90
+ return node->length;
91
+ }
92
+
93
+ // Returns the number of nodes in the graph
94
+ IDnum nodeCount(Graph * graph)
95
+ {
96
+ return graph->nodeCount;
97
+ }
98
+
99
+ // returns the number of sequences used to buid the graph
100
+ IDnum sequenceCount(Graph * graph)
101
+ {
102
+ return graph->sequenceCount;
103
+ }
104
+
105
+ // Creates an arc from node origin to node destination.
106
+ // If this arc already exists, increments its multiplicity by 1.
107
+ Arc *createArc(Node * originNode, Node * destinationNode, Graph * graph)
108
+ {
109
+ Arc *arc, *twinArc;
110
+ Node *destinationTwin;
111
+ IDnum lookupIndex;
112
+
113
+ if (originNode == NULL || destinationNode == NULL)
114
+ return NULL;
115
+
116
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
117
+
118
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
119
+
120
+ if (arc != NULL) {
121
+ arc->multiplicity++;
122
+ arc->twinArc->multiplicity++;
123
+ return arc;
124
+ }
125
+ // If not found
126
+ #ifdef _OPENMP
127
+ #pragma omp critical
128
+ #endif
129
+ arc = allocateArc();
130
+ arc->destination = destinationNode;
131
+ arc->multiplicity = 1;
132
+ arc->previous = NULL;
133
+ arc->next = originNode->arc;
134
+ if (originNode->arc != NULL)
135
+ originNode->arc->previous = arc;
136
+ originNode->arc = arc;
137
+ originNode->arcCount++;
138
+
139
+ destinationTwin = destinationNode->twinNode;
140
+
141
+ // Hairpin case
142
+ if (destinationTwin == originNode) {
143
+ arc->multiplicity++;
144
+ arc->twinArc = arc;
145
+ if (graph->arcLookupTable != NULL) {
146
+ lookupIndex =
147
+ 2 * originNode->ID + destinationNode->ID +
148
+ 3 * graph->nodeCount;
149
+ arc->nextInLookupTable =
150
+ graph->arcLookupTable[lookupIndex];
151
+ graph->arcLookupTable[lookupIndex] = arc;
152
+ }
153
+ return arc;
154
+ }
155
+
156
+ #ifdef _OPENMP
157
+ #pragma omp critical
158
+ #endif
159
+ twinArc = allocateArc();
160
+ twinArc->destination = originNode->twinNode;
161
+ twinArc->multiplicity = 1;
162
+ twinArc->previous = NULL;
163
+ twinArc->next = destinationTwin->arc;
164
+ if (destinationTwin->arc != NULL)
165
+ destinationTwin->arc->previous = twinArc;
166
+ destinationTwin->arc = twinArc;
167
+ destinationTwin->arcCount++;
168
+
169
+ arc->twinArc = twinArc;
170
+ twinArc->twinArc = arc;
171
+
172
+ if (graph->arcLookupTable != NULL) {
173
+ lookupIndex =
174
+ 2 * originNode->ID + destinationNode->ID +
175
+ 3 * graph->nodeCount;
176
+ arc->nextInLookupTable =
177
+ graph->arcLookupTable[lookupIndex];
178
+ graph->arcLookupTable[lookupIndex] = arc;
179
+
180
+ lookupIndex =
181
+ -2 * destinationNode->ID - originNode->ID +
182
+ 3 * graph->nodeCount;
183
+ twinArc->nextInLookupTable =
184
+ graph->arcLookupTable[lookupIndex];
185
+ graph->arcLookupTable[lookupIndex] = twinArc;
186
+ }
187
+ return arc;
188
+ }
189
+
190
+ void createAnalogousArc(Node * originNode, Node * destinationNode,
191
+ Arc * refArc, Graph * graph)
192
+ {
193
+ Arc *arc, *twinArc;
194
+ Node *destinationTwin;
195
+ IDnum lookupIndex;
196
+
197
+ if (originNode == NULL || destinationNode == NULL)
198
+ return;
199
+
200
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
201
+
202
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
203
+
204
+ if (arc != NULL) {
205
+ if (refArc->twinArc != refArc) {
206
+ arc->multiplicity += getMultiplicity(refArc);
207
+ arc->twinArc->multiplicity +=
208
+ getMultiplicity(refArc);
209
+ } else {
210
+ arc->multiplicity += getMultiplicity(refArc) / 2;
211
+ arc->twinArc->multiplicity +=
212
+ getMultiplicity(refArc) / 2;
213
+ }
214
+ return;
215
+ }
216
+ // If not found
217
+ arc = allocateArc();
218
+ arc->destination = destinationNode;
219
+ arc->multiplicity = getMultiplicity(refArc);
220
+ arc->previous = NULL;
221
+ arc->next = originNode->arc;
222
+ if (originNode->arc != NULL)
223
+ originNode->arc->previous = arc;
224
+ originNode->arc = arc;
225
+ originNode->arcCount++;
226
+
227
+ destinationTwin = destinationNode->twinNode;
228
+
229
+ // Hairpin case
230
+ if (destinationTwin == originNode) {
231
+ arc->twinArc = arc;
232
+ if (refArc->twinArc != refArc)
233
+ arc->multiplicity *= 2;
234
+
235
+ if (graph->arcLookupTable != NULL) {
236
+ lookupIndex =
237
+ 2 * originNode->ID + destinationNode->ID
238
+ + 3 * graph->nodeCount;
239
+ arc->nextInLookupTable =
240
+ graph->arcLookupTable[lookupIndex];
241
+ graph->arcLookupTable[lookupIndex] = arc;
242
+ }
243
+ return;
244
+ }
245
+
246
+ twinArc = allocateArc();
247
+ twinArc->destination = originNode->twinNode;
248
+ twinArc->multiplicity = getMultiplicity(refArc);
249
+ twinArc->previous = NULL;
250
+ twinArc->next = destinationTwin->arc;
251
+ if (destinationTwin->arc != NULL)
252
+ destinationTwin->arc->previous = twinArc;
253
+ destinationTwin->arc = twinArc;
254
+ destinationTwin->arcCount++;
255
+
256
+ arc->twinArc = twinArc;
257
+ twinArc->twinArc = arc;
258
+
259
+ if (graph->arcLookupTable != NULL) {
260
+ lookupIndex =
261
+ 2 * originNode->ID + destinationNode->ID +
262
+ 3 * graph->nodeCount;
263
+ arc->nextInLookupTable =
264
+ graph->arcLookupTable[lookupIndex];
265
+ graph->arcLookupTable[lookupIndex] = arc;
266
+
267
+ lookupIndex =
268
+ -2 * destinationNode->ID - originNode->ID +
269
+ 3 * graph->nodeCount;
270
+ twinArc->nextInLookupTable =
271
+ graph->arcLookupTable[lookupIndex];
272
+ graph->arcLookupTable[lookupIndex] = twinArc;
273
+ }
274
+ }
275
+
276
+ Arc *getArcBetweenNodes(Node * originNode, Node * destinationNode,
277
+ Graph * graph)
278
+ {
279
+ Arc *arc;
280
+ Node *twinDestination, *twinOrigin;
281
+
282
+ if (originNode == NULL || destinationNode == NULL)
283
+ return NULL;
284
+
285
+ if (graph->arcLookupTable != NULL) {
286
+ for (arc =
287
+ graph->arcLookupTable[2 * originNode->ID +
288
+ destinationNode->ID +
289
+ 3 * graph->nodeCount];
290
+ arc != NULL; arc = arc->nextInLookupTable) {
291
+ if (arc->destination == destinationNode) {
292
+ return arc;
293
+ }
294
+ }
295
+ return NULL;
296
+ }
297
+
298
+ twinDestination = destinationNode->twinNode;
299
+ if (originNode->arcCount <= twinDestination->arcCount) {
300
+ for (arc = originNode->arc; arc != NULL; arc = arc->next)
301
+ if (arc->destination == destinationNode)
302
+ return arc;
303
+ return NULL;
304
+ }
305
+
306
+ twinOrigin = originNode->twinNode;
307
+ for (arc = twinDestination->arc; arc != NULL; arc = arc->next)
308
+ if (arc->destination == twinOrigin)
309
+ return arc->twinArc;
310
+ return NULL;
311
+ }
312
+
313
+ void destroyArc(Arc * arc, Graph * graph)
314
+ {
315
+ Node *origin, *destination;
316
+ Arc *twinArc;
317
+ Arc *currentArc;
318
+ IDnum lookupIndex;
319
+
320
+ if (arc == NULL)
321
+ return;
322
+
323
+ twinArc = arc->twinArc;
324
+ origin = twinArc->destination->twinNode;
325
+ destination = arc->destination->twinNode;
326
+
327
+ //velvetLog("Destroying arc %p\n", arc);
328
+
329
+ // Removing arc from list
330
+ if (origin->arc == arc) {
331
+ origin->arc = arc->next;
332
+ if (origin->arc != NULL)
333
+ origin->arc->previous = NULL;
334
+ } else {
335
+ arc->previous->next = arc->next;
336
+ if (arc->next != NULL)
337
+ arc->next->previous = arc->previous;
338
+ }
339
+
340
+ origin->arcCount--;
341
+
342
+ if (destination == origin) {
343
+ if (graph->arcLookupTable != NULL) {
344
+ lookupIndex =
345
+ 2 * origin->ID - destination->ID +
346
+ 3 * graph->nodeCount;
347
+ currentArc = graph->arcLookupTable[lookupIndex];
348
+ if (currentArc == arc)
349
+ graph->arcLookupTable[lookupIndex] =
350
+ arc->nextInLookupTable;
351
+ else {
352
+ while (currentArc->nextInLookupTable !=
353
+ arc)
354
+ currentArc =
355
+ currentArc->nextInLookupTable;
356
+
357
+ currentArc->nextInLookupTable =
358
+ twinArc->nextInLookupTable;
359
+ }
360
+ }
361
+
362
+ deallocateArc(arc);
363
+ return;
364
+ }
365
+ // Removing arc's twin from list
366
+ if (destination->arc == twinArc) {
367
+ destination->arc = twinArc->next;
368
+ if (destination->arc != NULL)
369
+ destination->arc->previous = NULL;
370
+ } else {
371
+ twinArc->previous->next = twinArc->next;
372
+ if (twinArc->next != NULL)
373
+ twinArc->next->previous = twinArc->previous;
374
+ }
375
+
376
+ destination->arcCount--;
377
+
378
+ if (graph->arcLookupTable != NULL) {
379
+ lookupIndex =
380
+ 2 * origin->ID - destination->ID +
381
+ 3 * graph->nodeCount;
382
+ currentArc = graph->arcLookupTable[lookupIndex];
383
+ if (currentArc == arc)
384
+ graph->arcLookupTable[lookupIndex] =
385
+ arc->nextInLookupTable;
386
+ else {
387
+ while (currentArc->nextInLookupTable != arc)
388
+ currentArc = currentArc->nextInLookupTable;
389
+
390
+ currentArc->nextInLookupTable =
391
+ arc->nextInLookupTable;
392
+ }
393
+
394
+ lookupIndex =
395
+ 2 * destination->ID - origin->ID +
396
+ 3 * graph->nodeCount;
397
+ currentArc = graph->arcLookupTable[lookupIndex];
398
+ if (currentArc == twinArc)
399
+ graph->arcLookupTable[lookupIndex] =
400
+ twinArc->nextInLookupTable;
401
+ else {
402
+ while (currentArc->nextInLookupTable != twinArc)
403
+ currentArc = currentArc->nextInLookupTable;
404
+
405
+ currentArc->nextInLookupTable =
406
+ twinArc->nextInLookupTable;
407
+ }
408
+ }
409
+ // Freeing memory
410
+ deallocateArc(arc);
411
+ deallocateArc(twinArc);
412
+ }
413
+
414
+ void destroyNode(Node * node, Graph * graph)
415
+ {
416
+ Node *twin = node->twinNode;
417
+ IDnum ID = node->ID;
418
+ IDnum index;
419
+
420
+ //velvetLog("Destroying %d\n and twin %d\n", getNodeID(node), getNodeID(twin));
421
+
422
+ if (ID < 0)
423
+ ID = -ID;
424
+
425
+ // Node arcs:
426
+ while (node->arc != NULL)
427
+ destroyArc(node->arc, graph);
428
+ while (twin->arc != NULL)
429
+ destroyArc(twin->arc, graph);
430
+
431
+ // Descriptors
432
+ free(node->descriptor);
433
+ free(twin->descriptor);
434
+
435
+ // Passage markers
436
+ while (node->marker != NULL_IDX)
437
+ destroyPassageMarker(node->marker);
438
+
439
+ // Reads starts
440
+ if (graph->nodeReads != NULL) {
441
+ index = ID + graph->nodeCount;
442
+ free(graph->nodeReads[index]);
443
+ graph->nodeReads[index] = NULL;
444
+ graph->nodeReadCounts[index] = 0;
445
+
446
+ index = -ID + graph->nodeCount;
447
+ free(graph->nodeReads[index]);
448
+ graph->nodeReads[index] = NULL;
449
+ graph->nodeReadCounts[index] = 0;
450
+ }
451
+
452
+ graph->nodes[ID] = NULL;
453
+ deallocateNode(node);
454
+ deallocateNode(twin);
455
+ }
456
+
457
+ int outDegree(Node * node)
458
+ {
459
+ int result = 0;
460
+ Arc *arc = node->arc;
461
+ while (arc != NULL) {
462
+ result += arc->multiplicity;
463
+ arc = arc->next;
464
+ }
465
+
466
+ return result;
467
+ }
468
+
469
+ int simpleArcCount(Node * node)
470
+ {
471
+ return node->arcCount;
472
+ }
473
+
474
+ int arcCount(Node * node)
475
+ {
476
+ int result = 0;
477
+ Arc *arc;
478
+
479
+ if (node == NULL)
480
+ return result;
481
+
482
+ arc = node->arc;
483
+ while (arc != NULL) {
484
+ result++;
485
+ if (arc->destination == node->twinNode)
486
+ result++;
487
+ arc = arc->next;
488
+ }
489
+
490
+ return result;
491
+
492
+ }
493
+
494
+ static Nucleotide getNucleotideInDescriptor(Descriptor * descriptor,
495
+ Coordinate i)
496
+ {
497
+ Descriptor *fourMer = descriptor + i / 4;
498
+
499
+ switch (i % 4) {
500
+ case 0:
501
+ return (*fourMer & 3);
502
+ case 1:
503
+ return (*fourMer & 12) >> 2;
504
+ case 2:
505
+ return (*fourMer & 48) >> 4;
506
+ case 3:
507
+ return (*fourMer & 192) >> 6;
508
+ }
509
+ return 0;
510
+ }
511
+
512
+ Nucleotide getNucleotideInNode(Node * node, Coordinate index) {
513
+ return getNucleotideInDescriptor(node->descriptor, index);
514
+ }
515
+
516
+ PassageMarkerI getMarker(Node * node)
517
+ {
518
+ return node->marker;
519
+ }
520
+
521
+ void setMarker(Node * node, PassageMarkerI marker)
522
+ {
523
+ if (node == NULL)
524
+ return;
525
+
526
+ if (marker == NULL_IDX) {
527
+ node->marker = NULL_IDX;
528
+ node->twinNode->marker = NULL_IDX;
529
+ return;
530
+ }
531
+
532
+ node->marker = marker;
533
+ setTopOfTheNode(marker);
534
+ node->twinNode->marker = getTwinMarker(marker);
535
+ setTopOfTheNode(getTwinMarker(marker));
536
+ }
537
+
538
+ void setNodeStatus(Node * node, boolean status)
539
+ {
540
+ node->status = status;
541
+ node->twinNode->status = status;
542
+ }
543
+
544
+ void setSingleNodeStatus(Node * node, boolean status)
545
+ {
546
+ node->status = status;
547
+ }
548
+
549
+ boolean getNodeStatus(Node * node)
550
+ {
551
+ if (node == NULL)
552
+ return false;
553
+ return node->status;
554
+ }
555
+
556
+ IDnum getNodeID(Node * node)
557
+ {
558
+ if (node == NULL)
559
+ return 0;
560
+
561
+ return node->ID;
562
+ }
563
+
564
+ void resetNodeStatus(Graph * graph)
565
+ {
566
+ IDnum nodeIndex;
567
+ Node *node;
568
+
569
+ for (nodeIndex = 1; nodeIndex <= graph->nodeCount; nodeIndex++) {
570
+ node = graph->nodes[nodeIndex];
571
+ if (node == NULL)
572
+ continue;
573
+
574
+ node->status = false;
575
+ node->twinNode->status = false;
576
+ }
577
+ }
578
+
579
+ Node *getNodeInGraph(Graph * graph, IDnum nodeID)
580
+ {
581
+ if (nodeID == 0)
582
+ return NULL;
583
+ else if (nodeID > 0)
584
+ return graph->nodes[nodeID];
585
+ else if (graph->nodes[-nodeID] == NULL)
586
+ return NULL;
587
+ else
588
+ return graph->nodes[-nodeID]->twinNode;
589
+ }
590
+
591
+ Arc *getArc(Node * node)
592
+ {
593
+ return node->arc;
594
+ }
595
+
596
+ Arc *getNextArc(Arc * arc)
597
+ {
598
+ return arc->next;
599
+ }
600
+
601
+ IDnum getMultiplicity(Arc * arc)
602
+ {
603
+ if (arc == NULL)
604
+ return 0;
605
+
606
+ return arc->multiplicity;
607
+ }
608
+
609
+ Node *getOrigin(Arc * arc)
610
+ {
611
+ if (arc == NULL)
612
+ return NULL;
613
+
614
+ return arc->twinArc->destination->twinNode;
615
+ }
616
+
617
+ Node *getDestination(Arc * arc)
618
+ {
619
+ if (arc == NULL)
620
+ return NULL;
621
+
622
+ return arc->destination;
623
+ }
624
+
625
+ IDnum markerCount(Node * node)
626
+ {
627
+ IDnum count = 0;
628
+ PassageMarkerI marker;
629
+
630
+ for (marker = getMarker(node); marker != NULL_IDX;
631
+ marker = getNextInNode(marker))
632
+ count++;
633
+
634
+ return count;
635
+ }
636
+
637
+ void appendNodeSequence(Node * node, TightString * sequence,
638
+ Coordinate writeIndex)
639
+ {
640
+ Coordinate i;
641
+ Nucleotide nucleotide;
642
+
643
+ //velvetLog("Getting sequence from node %d of length %d (%d)\n", getNodeID(node), getNodeLength(node), getLength(nodeLabel));
644
+
645
+ for (i = 0; i < getNodeLength(node); i++) {
646
+ nucleotide =
647
+ getNucleotideInDescriptor(node->descriptor, i);
648
+ writeNucleotideAtPosition(nucleotide, i + writeIndex,
649
+ sequence);
650
+ }
651
+ }
652
+
653
+ static void writeNucleotideInDescriptor(Nucleotide nucleotide,
654
+ Descriptor * descriptor,
655
+ Coordinate i)
656
+ {
657
+ Descriptor *fourMer = descriptor + i / 4;
658
+ switch (i % 4) {
659
+ case 3:
660
+ *fourMer &= 63;
661
+ *fourMer += nucleotide << 6;
662
+ return;
663
+ case 2:
664
+ *fourMer &= 207;
665
+ *fourMer += nucleotide << 4;
666
+ return;
667
+ case 1:
668
+ *fourMer &= 243;
669
+ *fourMer += nucleotide << 2;
670
+ return;
671
+ case 0:
672
+ *fourMer &= 252;
673
+ *fourMer += nucleotide;
674
+ }
675
+ }
676
+
677
+ static inline Descriptor *mergeDescriptors(Descriptor * descr,
678
+ Coordinate destinationLength,
679
+ Descriptor * copy,
680
+ Coordinate sourceLength,
681
+ size_t arrayLength)
682
+ {
683
+ Descriptor *readPtr, *writePtr;
684
+ Descriptor readCopy;
685
+ int readOffset, writeOffset;
686
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
687
+ Coordinate index;
688
+
689
+ readPtr = descr;
690
+ readCopy = *readPtr;
691
+ writePtr = new;
692
+ writeOffset = 0;
693
+ for (index = 0; index < destinationLength; index++) {
694
+ (*writePtr) >>= 2;
695
+ (*writePtr) += (readCopy & 3) << 6;
696
+ readCopy >>= 2;
697
+
698
+ writeOffset++;
699
+ if (writeOffset == 4) {
700
+ writePtr++;
701
+ readPtr++;
702
+ if (index < destinationLength - 1)
703
+ readCopy = *readPtr;
704
+ writeOffset = 0;
705
+ }
706
+ }
707
+
708
+ readPtr = copy;
709
+ readCopy = *readPtr;
710
+ readOffset = 0;
711
+ for (index = 0; index < sourceLength; index++) {
712
+ (*writePtr) >>= 2;
713
+ (*writePtr) += (readCopy & 3) << 6;
714
+ readCopy >>= 2;
715
+
716
+ writeOffset++;
717
+ if (writeOffset == 4) {
718
+ writePtr++;
719
+ writeOffset = 0;
720
+ }
721
+
722
+ readOffset++;
723
+ if (readOffset == 4) {
724
+ readPtr++;
725
+ if (index < sourceLength - 1)
726
+ readCopy = *readPtr;
727
+ readOffset = 0;
728
+ }
729
+ }
730
+
731
+ if (writeOffset != 0) {
732
+ while (writeOffset != 4) {
733
+ (*writePtr) >>= 2;
734
+ writeOffset++;
735
+ }
736
+ }
737
+
738
+ return new;
739
+ }
740
+
741
+ static void addBufferToDescriptor(Node * node, Coordinate length)
742
+ {
743
+ Descriptor *descr;
744
+ Coordinate newLength;
745
+ size_t arrayLength;
746
+ Node *twinNode;
747
+ Coordinate index;
748
+ Descriptor *old_descriptor;
749
+
750
+ if (node == NULL)
751
+ return;
752
+
753
+ twinNode = node->twinNode;
754
+ descr = node->descriptor;
755
+
756
+ // Amendments for empty descriptors
757
+ if (descr == NULL) {
758
+ arrayLength = length / 4;
759
+ if (length % 4 != 0)
760
+ arrayLength++;
761
+
762
+ node->descriptor = callocOrExit(arrayLength, Descriptor);
763
+ node->length = length;
764
+ twinNode->descriptor =
765
+ callocOrExit(arrayLength, Descriptor);
766
+ twinNode->length = length;
767
+ return;
768
+ }
769
+
770
+ newLength = node->length + length;
771
+ arrayLength = newLength / 4;
772
+ if (newLength % 4 != 0)
773
+ arrayLength++;
774
+
775
+ // Merging forward descriptors
776
+ node->descriptor =
777
+ reallocOrExit(node->descriptor, arrayLength, Descriptor);
778
+
779
+ for (index = node->length; index < newLength; index++)
780
+ writeNucleotideInDescriptor(ADENINE, node->descriptor,
781
+ index);
782
+ node->length = newLength;
783
+
784
+ // Merging reverse descriptors
785
+ old_descriptor = twinNode->descriptor;
786
+ twinNode->descriptor = callocOrExit(arrayLength, Descriptor);
787
+ for (index = 0; index < twinNode->length; index++)
788
+ writeNucleotideInDescriptor(getNucleotideInDescriptor
789
+ (old_descriptor, index),
790
+ twinNode->descriptor,
791
+ index + length);
792
+ for (index = 0; index < length; index++)
793
+ writeNucleotideInDescriptor(THYMINE, twinNode->descriptor,
794
+ index);
795
+ free(old_descriptor);
796
+ twinNode->length = newLength;
797
+ }
798
+
799
+ void appendDescriptors(Node * destination, Node * source)
800
+ {
801
+ Descriptor *copy;
802
+ Descriptor *twinCopy;
803
+ Descriptor *descr;
804
+ Descriptor *twinDescr;
805
+ Coordinate newLength, destinationLength, sourceLength;
806
+ size_t arrayLength;
807
+ Descriptor *new;
808
+ Node *twinDestination;
809
+
810
+ if (source == NULL || destination == NULL)
811
+ return;
812
+
813
+ twinDestination = destination->twinNode;
814
+ descr = destination->descriptor;
815
+ twinDescr = twinDestination->descriptor;
816
+ copy = source->descriptor;
817
+ twinCopy = source->twinNode->descriptor;
818
+
819
+ // Amendments for empty descriptors
820
+ if (getNodeLength(source) == 0)
821
+ return;
822
+ if (getNodeLength(destination) == 0) {
823
+ destination->descriptor = copy;
824
+ twinDestination->descriptor = twinCopy;
825
+ source->descriptor = NULL;
826
+ source->twinNode->descriptor = NULL;
827
+ destination->length = source->length;
828
+ destination->twinNode->length = source->length;
829
+ source->length = 0;
830
+ source->twinNode->length = 0;
831
+ return;
832
+ }
833
+
834
+ destinationLength = destination->length;
835
+ sourceLength = source->length;
836
+ newLength = destinationLength + sourceLength;
837
+ arrayLength = newLength / 4;
838
+ if (newLength % 4 != 0)
839
+ arrayLength++;
840
+
841
+ // Merging forward descriptors
842
+ new =
843
+ mergeDescriptors(descr, destinationLength, copy, sourceLength,
844
+ arrayLength);
845
+ free(descr);
846
+ destination->descriptor = new;
847
+ destination->length = newLength;
848
+
849
+ // Merging reverse descriptors
850
+ new =
851
+ mergeDescriptors(twinCopy, sourceLength, twinDescr,
852
+ destinationLength, arrayLength);
853
+ free(twinDescr);
854
+ twinDestination->descriptor = new;
855
+ twinDestination->length = newLength;
856
+ }
857
+
858
+ static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
859
+ {
860
+ Coordinate index;
861
+ Nucleotide nucleotide;
862
+
863
+ for (index = 0; index < sourceLength; index++) {
864
+ nucleotide = getNucleotideInDescriptor(copy, index);
865
+ writeNucleotideInDescriptor(nucleotide, descr, index + destinationLength);
866
+ }
867
+ }
868
+
869
+ static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
870
+ {
871
+ Coordinate shift = totalLength - destinationLength - sourceLength;
872
+ Coordinate index;
873
+ Nucleotide nucleotide;
874
+
875
+ for (index = 0; index < sourceLength; index++) {
876
+ nucleotide = getNucleotideInDescriptor(copy, index);
877
+ writeNucleotideInDescriptor(nucleotide, descr, index + shift);
878
+ }
879
+ }
880
+
881
+ void directlyAppendDescriptors(Node * destination, Node * source, Coordinate totalLength)
882
+ {
883
+ Descriptor *copy;
884
+ Descriptor *twinCopy;
885
+ Descriptor *descr;
886
+ Descriptor *twinDescr;
887
+ Coordinate destinationLength, sourceLength;
888
+
889
+ if (source == NULL || destination == NULL)
890
+ return;
891
+
892
+ descr = destination->descriptor;
893
+ twinDescr = destination->twinNode->descriptor;
894
+ copy = source->descriptor;
895
+ twinCopy = source->twinNode->descriptor;
896
+
897
+ // Amendments for empty descriptors
898
+ if (getNodeLength(source) == 0)
899
+ return;
900
+
901
+ destinationLength = destination->length;
902
+ sourceLength = source->length;
903
+
904
+ // Merging forward descriptors
905
+ catDescriptors(descr, destinationLength, copy, sourceLength);
906
+
907
+ // Merging reverse descriptors
908
+ reverseCatDescriptors(twinDescr, destinationLength, twinCopy, sourceLength, totalLength);
909
+
910
+ destination->length += source->length;
911
+ destination->twinNode->length += source->length;
912
+ }
913
+
914
+ static void copyDownDescriptor(Descriptor ** writePtr, int *writeOffset,
915
+ Descriptor * source, Coordinate length)
916
+ {
917
+ Descriptor *readPtr = source;
918
+ Descriptor readCopy = *readPtr;
919
+ int readOffset = 0;
920
+ Coordinate index;
921
+
922
+ for (index = 0; index < length; index++) {
923
+ (**writePtr) >>= 2;
924
+ (**writePtr) += (readCopy & 3) << 6;
925
+ readCopy >>= 2;
926
+
927
+ (*writeOffset)++;
928
+ if (*writeOffset == 4) {
929
+ (*writePtr)++;
930
+ *writeOffset = 0;
931
+ }
932
+
933
+ readOffset++;
934
+ if (readOffset == 4) {
935
+ readPtr++;
936
+ if (index < length - 1)
937
+ readCopy = *readPtr;
938
+ readOffset = 0;
939
+ }
940
+ }
941
+ }
942
+
943
+ static void copyDownSequence(Descriptor ** writePtr, int *writeOffset,
944
+ TightString * sequence, Coordinate start,
945
+ Coordinate finish, int WORDLENGTH)
946
+ {
947
+ boolean forward = (start < finish);
948
+ Coordinate sourceLength = finish - start;
949
+ Coordinate index;
950
+ Nucleotide nucleotide;
951
+
952
+ if (!forward)
953
+ sourceLength *= -1;
954
+
955
+ for (index = 0; index < sourceLength; index++) {
956
+ if (forward)
957
+ nucleotide =
958
+ getNucleotide(start + WORDLENGTH - 1 + index,
959
+ sequence);
960
+ else
961
+ nucleotide =
962
+ #ifndef COLOR
963
+ 3 - getNucleotide(start - index - 1, sequence);
964
+ #else
965
+ getNucleotide(start - index - 1, sequence);
966
+ #endif
967
+
968
+ (**writePtr) >>= 2;
969
+ (**writePtr) += nucleotide << 6;
970
+
971
+ (*writeOffset)++;
972
+ if (*writeOffset == 4) {
973
+ (*writePtr)++;
974
+ *writeOffset = 0;
975
+ }
976
+ }
977
+ }
978
+
979
+ static Descriptor *appendSequenceToDescriptor(Descriptor * descr,
980
+ Coordinate nodeLength,
981
+ PassageMarkerI marker,
982
+ TightString *sequences,
983
+ int WORDLENGTH,
984
+ size_t arrayLength,
985
+ boolean downStream)
986
+ {
987
+ int writeOffset = 0;
988
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
989
+ Descriptor *writePtr = new;
990
+ TightString *sequence;
991
+ IDnum sequenceID = getPassageMarkerSequenceID(marker);
992
+ Coordinate start = getPassageMarkerStart(marker);
993
+ Coordinate finish = getPassageMarkerFinish(marker);
994
+
995
+ if (sequenceID > 0)
996
+ sequence = getTightStringInArray(sequences, sequenceID - 1);
997
+ else
998
+ sequence = getTightStringInArray(sequences, -sequenceID - 1);
999
+
1000
+ if (downStream)
1001
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1002
+ nodeLength);
1003
+
1004
+ copyDownSequence(&writePtr, &writeOffset, sequence, start, finish,
1005
+ WORDLENGTH);
1006
+
1007
+ if (!downStream)
1008
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1009
+ nodeLength);
1010
+
1011
+ if (writeOffset != 0) {
1012
+ while (writeOffset != 4) {
1013
+ (*writePtr) >>= 2;
1014
+ writeOffset++;
1015
+ }
1016
+ }
1017
+
1018
+ return new;
1019
+ }
1020
+
1021
+ void appendSequence(Node * node, TightString * reads,
1022
+ PassageMarkerI guide, Graph * graph)
1023
+ {
1024
+ Descriptor *descr;
1025
+ Descriptor *twinDescr;
1026
+ Coordinate newLength, nodeLength, sourceLength;
1027
+ size_t arrayLength;
1028
+ Descriptor *new;
1029
+ Node *twinNode;
1030
+
1031
+ if (node == NULL)
1032
+ return;
1033
+
1034
+ twinNode = node->twinNode;
1035
+ descr = node->descriptor;
1036
+ twinDescr = twinNode->descriptor;
1037
+ nodeLength = node->length;
1038
+ sourceLength = getPassageMarkerLength(guide);
1039
+
1040
+ // Amendments for empty descriptors
1041
+ if (sourceLength == 0)
1042
+ return;
1043
+
1044
+ newLength = nodeLength + sourceLength;
1045
+ arrayLength = newLength / 4;
1046
+ if (newLength % 4 != 0)
1047
+ arrayLength++;
1048
+
1049
+ // Merging forward descriptors
1050
+ new =
1051
+ appendSequenceToDescriptor(descr, nodeLength, guide, reads,
1052
+ getWordLength(graph), arrayLength,
1053
+ true);
1054
+ free(descr);
1055
+ node->descriptor = new;
1056
+ node->length = newLength;
1057
+
1058
+ // Merging reverse descriptors
1059
+ new =
1060
+ appendSequenceToDescriptor(twinDescr, nodeLength,
1061
+ getTwinMarker(guide), reads,
1062
+ getWordLength(graph), arrayLength,
1063
+ false);
1064
+ free(twinDescr);
1065
+ twinNode->descriptor = new;
1066
+ twinNode->length = newLength;
1067
+ }
1068
+
1069
+ void setMultiplicity(Arc * arc, IDnum mult)
1070
+ {
1071
+ arc->multiplicity = mult;
1072
+ arc->twinArc->multiplicity = mult;
1073
+ }
1074
+
1075
+ // Reshuffles the graph->nodes array to remove NULL pointers
1076
+ // Beware that node IDs are accordingly reshuffled (all pointers remain valid though)
1077
+ void renumberNodes(Graph * graph)
1078
+ {
1079
+ IDnum nodeIndex;
1080
+ Node *currentNode;
1081
+ IDnum counter = 0;
1082
+ IDnum nodes = graph->nodeCount;
1083
+ IDnum newIndex;
1084
+
1085
+ velvetLog("Renumbering nodes\n");
1086
+ velvetLog("Initial node count %li\n", (long) graph->nodeCount);
1087
+
1088
+ for (nodeIndex = 1; nodeIndex <= nodes; nodeIndex++) {
1089
+ currentNode = getNodeInGraph(graph, nodeIndex);
1090
+
1091
+ if (currentNode == NULL)
1092
+ counter++;
1093
+ else if (counter != 0) {
1094
+ newIndex = nodeIndex - counter;
1095
+ currentNode->ID = newIndex;
1096
+ currentNode->twinNode->ID = -newIndex;
1097
+ graph->nodes[newIndex] = currentNode;
1098
+
1099
+ if (graph->nodeReads != NULL) {
1100
+ graph->nodeReads[newIndex + nodes] =
1101
+ graph->nodeReads[nodeIndex + nodes];
1102
+ graph->nodeReadCounts[newIndex + nodes] =
1103
+ graph->nodeReadCounts[nodeIndex +
1104
+ nodes];
1105
+
1106
+ graph->nodeReads[nodeIndex + nodes] = NULL;
1107
+ graph->nodeReadCounts[nodeIndex + nodes] =
1108
+ 0;
1109
+
1110
+ graph->nodeReads[-newIndex + nodes] =
1111
+ graph->nodeReads[-nodeIndex + nodes];
1112
+ graph->nodeReadCounts[-newIndex + nodes] =
1113
+ graph->nodeReadCounts[-nodeIndex +
1114
+ nodes];
1115
+
1116
+ graph->nodeReads[-nodeIndex + nodes] =
1117
+ NULL;
1118
+ graph->nodeReadCounts[-nodeIndex + nodes] =
1119
+ 0;
1120
+ }
1121
+
1122
+ if (graph->gapMarkers != NULL) {
1123
+ graph->gapMarkers[newIndex] =
1124
+ graph->gapMarkers[nodeIndex];
1125
+ graph->gapMarkers[nodeIndex] = NULL;
1126
+ }
1127
+ }
1128
+ }
1129
+
1130
+ // Shitfting array to the left
1131
+ if (graph->nodeReads != NULL && counter != 0) {
1132
+ for (nodeIndex = counter; nodeIndex <= 2 * nodes - counter;
1133
+ nodeIndex++) {
1134
+ graph->nodeReads[nodeIndex - counter] =
1135
+ graph->nodeReads[nodeIndex];
1136
+ graph->nodeReadCounts[nodeIndex - counter] =
1137
+ graph->nodeReadCounts[nodeIndex];
1138
+ }
1139
+ }
1140
+
1141
+ // Rellocating node space
1142
+ graph->nodeCount -= counter;
1143
+ graph->nodes =
1144
+ reallocOrExit(graph->nodes, graph->nodeCount + 1, Node *);
1145
+
1146
+ // Reallocating short read marker arrays
1147
+ if (graph->nodeReads != NULL) {
1148
+ graph->nodeReads =
1149
+ reallocOrExit(graph->nodeReads,
1150
+ 2 * graph->nodeCount +
1151
+ 1, ShortReadMarker *);
1152
+ graph->nodeReadCounts =
1153
+ reallocOrExit(graph->nodeReadCounts,
1154
+ 2 * graph->nodeCount + 1, IDnum);
1155
+ }
1156
+
1157
+ // Reallocating gap marker table
1158
+ if (graph->gapMarkers != NULL)
1159
+ graph->gapMarkers = reallocOrExit(graph->gapMarkers,
1160
+ graph->nodeCount +
1161
+ 1, GapMarker *);
1162
+
1163
+ velvetLog("Removed %li null nodes\n", (long) counter);
1164
+ }
1165
+
1166
+ void splitNodeDescriptor(Node * source, Node * target, Coordinate offset)
1167
+ {
1168
+ Coordinate originalLength = source->length;
1169
+ Coordinate backLength = originalLength - offset;
1170
+ Coordinate index;
1171
+ Descriptor *descriptor, *new;
1172
+ size_t arrayLength;
1173
+ Nucleotide nucleotide;
1174
+
1175
+ source->length = offset;
1176
+ source->twinNode->length = offset;
1177
+
1178
+ if (target != NULL) {
1179
+ target->length = backLength;
1180
+ target->twinNode->length = backLength;
1181
+ free(target->descriptor);
1182
+ free(target->twinNode->descriptor);
1183
+ target->descriptor = NULL;
1184
+ target->twinNode->descriptor = NULL;
1185
+ }
1186
+
1187
+ if (backLength == 0)
1188
+ return;
1189
+
1190
+ descriptor = source->descriptor;
1191
+
1192
+ arrayLength = backLength / 4;
1193
+ if (backLength % 4 > 0)
1194
+ arrayLength++;
1195
+
1196
+ if (target != NULL) {
1197
+ // Target node .. forwards
1198
+ new = mallocOrExit(arrayLength, Descriptor);
1199
+ target->descriptor = new;
1200
+ for (index = 0; index < backLength; index++) {
1201
+ nucleotide =
1202
+ getNucleotideInDescriptor(descriptor, index);
1203
+ writeNucleotideInDescriptor(nucleotide, new,
1204
+ index);
1205
+ }
1206
+ }
1207
+ // Source node
1208
+ for (index = backLength; index < originalLength; index++) {
1209
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1210
+ writeNucleotideInDescriptor(nucleotide, descriptor,
1211
+ index - backLength);
1212
+ }
1213
+
1214
+ if (target == NULL)
1215
+ return;
1216
+
1217
+ // target node other way
1218
+ descriptor = source->twinNode->descriptor;
1219
+ new = mallocOrExit(arrayLength, Descriptor);
1220
+ target->twinNode->descriptor = new;
1221
+
1222
+ for (index = offset; index < originalLength; index++) {
1223
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1224
+ writeNucleotideInDescriptor(nucleotide, new,
1225
+ index - offset);
1226
+ }
1227
+ }
1228
+
1229
+ void reduceNode(Node * node)
1230
+ {
1231
+ free(node->descriptor);
1232
+ node->descriptor = NULL;
1233
+ node->length = 0;
1234
+
1235
+ free(node->twinNode->descriptor);
1236
+ node->twinNode->descriptor = NULL;
1237
+ node->twinNode->length = 0;
1238
+ }
1239
+
1240
+ // Allocate memory for an empty graph created with sequenceCount different sequences
1241
+ Graph *emptyGraph(IDnum sequenceCount, int wordLength)
1242
+ {
1243
+ Graph *newGraph = mallocOrExit(1, Graph);
1244
+ newGraph->sequenceCount = sequenceCount;
1245
+ newGraph->arcLookupTable = NULL;
1246
+ newGraph->nodeReads = NULL;
1247
+ newGraph->nodeReadCounts = NULL;
1248
+ newGraph->wordLength = wordLength;
1249
+ newGraph->gapMarkers = NULL;
1250
+ return newGraph;
1251
+ }
1252
+
1253
+ static Descriptor *newPositiveDescriptor(IDnum sequenceID,
1254
+ Coordinate start,
1255
+ Coordinate finish,
1256
+ TightString *sequences,
1257
+ int WORDLENGTH)
1258
+ {
1259
+ Coordinate index;
1260
+ Nucleotide nucleotide;
1261
+ TightString *tString = getTightStringInArray (sequences, sequenceID - 1);
1262
+ Coordinate length = finish - start;
1263
+ Descriptor *res;
1264
+ size_t arrayLength = length / 4;
1265
+
1266
+ if (length % 4 > 0)
1267
+ arrayLength++;
1268
+
1269
+ res = mallocOrExit(arrayLength, Descriptor);
1270
+
1271
+ for (index = 0; index < length; index++) {
1272
+ nucleotide =
1273
+ getNucleotide(start + index + WORDLENGTH - 1, tString);
1274
+ writeNucleotideInDescriptor(nucleotide, res, index);
1275
+ }
1276
+
1277
+ return res;
1278
+
1279
+ }
1280
+
1281
+ static Descriptor *newNegativeDescriptor(IDnum sequenceID,
1282
+ Coordinate start,
1283
+ Coordinate finish,
1284
+ TightString *sequences,
1285
+ int WORDLENGTH)
1286
+ {
1287
+ Coordinate index;
1288
+ Nucleotide nucleotide;
1289
+ TightString *tString = getTightStringInArray (sequences, -sequenceID - 1);
1290
+ Coordinate length = start - finish;
1291
+ Descriptor *res;
1292
+ size_t arrayLength = length / 4;
1293
+
1294
+ if (length % 4 > 0)
1295
+ arrayLength++;
1296
+
1297
+ res = mallocOrExit(arrayLength, Descriptor);
1298
+
1299
+ for (index = 0; index < length; index++) {
1300
+ nucleotide = getNucleotide(start - index, tString);
1301
+ #ifndef COLOR
1302
+ writeNucleotideInDescriptor(3 - nucleotide, res, index);
1303
+ #else
1304
+ writeNucleotideInDescriptor(nucleotide, res, index);
1305
+ #endif
1306
+ }
1307
+
1308
+ return res;
1309
+
1310
+ }
1311
+
1312
+ static Descriptor *newDescriptor(IDnum sequenceID, Coordinate start,
1313
+ Coordinate finish,
1314
+ TightString * sequences, int WORDLENGTH)
1315
+ {
1316
+ if (sequenceID > 0)
1317
+ return newPositiveDescriptor(sequenceID, start, finish,
1318
+ sequences, WORDLENGTH);
1319
+ else
1320
+ return newNegativeDescriptor(sequenceID, start, finish,
1321
+ sequences, WORDLENGTH);
1322
+ }
1323
+
1324
+ // Constructor
1325
+ // Memory allocated
1326
+ Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
1327
+ Coordinate offset, IDnum ID, TightString * sequences,
1328
+ int WORDLENGTH)
1329
+ {
1330
+ Node *newnd = allocateNode();
1331
+ Node *antiNode = allocateNode();
1332
+
1333
+ newnd->ID = ID;
1334
+ newnd->descriptor =
1335
+ newDescriptor(sequenceID, start + offset, finish + offset,
1336
+ sequences, WORDLENGTH);
1337
+ newnd->arc = NULL;
1338
+ newnd->arcCount = 0;
1339
+ newnd->marker = NULL_IDX;
1340
+ newnd->status = false;
1341
+
1342
+ #ifndef SINGLE_COV_CAT
1343
+ Category cat;
1344
+ for (cat = 0; cat < CATEGORIES; cat++) {
1345
+ newnd->virtualCoverage[cat] = 0;
1346
+ newnd->originalVirtualCoverage[cat] = 0;
1347
+ }
1348
+ #else
1349
+ newnd->virtualCoverage = 0;
1350
+ #endif
1351
+
1352
+ antiNode->ID = -ID;
1353
+ antiNode->descriptor =
1354
+ newDescriptor(-sequenceID, finish + offset - 1,
1355
+ start + offset - 1, sequences, WORDLENGTH);
1356
+ antiNode->arc = NULL;
1357
+ antiNode->arcCount = 0;
1358
+ antiNode->marker = NULL_IDX;
1359
+ antiNode->status = false;
1360
+
1361
+ #ifndef SINGLE_COV_CAT
1362
+ for (cat = 0; cat < CATEGORIES; cat++) {
1363
+ antiNode->virtualCoverage[cat] = 0;
1364
+ antiNode->originalVirtualCoverage[cat] = 0;
1365
+ }
1366
+ #else
1367
+ antiNode->virtualCoverage = 0;
1368
+ #endif
1369
+
1370
+ newnd->twinNode = antiNode;
1371
+ antiNode->twinNode = newnd;
1372
+
1373
+ if (sequenceID > 0) {
1374
+ newnd->length = finish - start;
1375
+ antiNode->length = finish - start;
1376
+ } else {
1377
+ newnd->length = start - finish;
1378
+ antiNode->length = start - finish;
1379
+ }
1380
+
1381
+ return newnd;
1382
+ }
1383
+
1384
+ void allocateNodeSpace(Graph * graph, IDnum nodeCount)
1385
+ {
1386
+ graph->nodes = callocOrExit(nodeCount + 1, Node *);
1387
+ graph->nodeCount = nodeCount;
1388
+ }
1389
+
1390
+ boolean getUniqueness(Node * node)
1391
+ {
1392
+ return node->uniqueness;
1393
+ }
1394
+
1395
+ void setUniqueness(Node * node, boolean value)
1396
+ {
1397
+ node->uniqueness = value;
1398
+ node->twinNode->uniqueness = value;
1399
+ }
1400
+
1401
+ Node *emptyNode()
1402
+ {
1403
+ Node *newnd = allocateNode();
1404
+ Node *antiNode = allocateNode();
1405
+
1406
+ newnd->ID = 0;
1407
+ newnd->descriptor = NULL;
1408
+ newnd->arc = NULL;
1409
+ newnd->arcCount = 0;
1410
+ newnd->marker = NULL_IDX;
1411
+ newnd->length = 0;
1412
+ newnd->uniqueness = false;
1413
+
1414
+ #ifndef SINGLE_COV_CAT
1415
+ Category cat;
1416
+ for (cat = 0; cat < CATEGORIES; cat++) {
1417
+ newnd->virtualCoverage[cat] = 0;
1418
+ newnd->originalVirtualCoverage[cat] = 0;
1419
+ }
1420
+ #else
1421
+ newnd->virtualCoverage = 0;
1422
+ #endif
1423
+
1424
+ antiNode->ID = 0;
1425
+ antiNode->descriptor = NULL;
1426
+ antiNode->arc = NULL;
1427
+ antiNode->arcCount = 0;
1428
+ antiNode->marker = NULL_IDX;
1429
+ antiNode->length = 0;
1430
+ antiNode->uniqueness = false;
1431
+
1432
+ #ifndef SINGLE_COV_CAT
1433
+ for (cat = 0; cat < CATEGORIES; cat++) {
1434
+ antiNode->virtualCoverage[cat] = 0;
1435
+ antiNode->originalVirtualCoverage[cat] = 0;
1436
+ }
1437
+ #else
1438
+ antiNode->virtualCoverage = 0;
1439
+ #endif
1440
+
1441
+ newnd->twinNode = antiNode;
1442
+ antiNode->twinNode = newnd;
1443
+
1444
+ return newnd;
1445
+
1446
+ }
1447
+
1448
+ Node *addEmptyNodeToGraph(Graph * graph, IDnum ID)
1449
+ {
1450
+ Node *newnd = emptyNode();
1451
+
1452
+ newnd->ID = ID;
1453
+ newnd->twinNode->ID = -ID;
1454
+
1455
+ graph->nodes[ID] = newnd;
1456
+
1457
+ return newnd;
1458
+
1459
+ }
1460
+
1461
+ #ifndef SINGLE_COV_CAT
1462
+
1463
+ void setVirtualCoverage(Node * node, Category category,
1464
+ Coordinate coverage)
1465
+ {
1466
+ node->virtualCoverage[category] = coverage;
1467
+ node->twinNode->virtualCoverage[category] =
1468
+ node->virtualCoverage[category];
1469
+ }
1470
+
1471
+ void incrementVirtualCoverage(Node * node, Category category,
1472
+ Coordinate coverage)
1473
+ {
1474
+ node->virtualCoverage[category] += coverage;
1475
+ node->twinNode->virtualCoverage[category] =
1476
+ node->virtualCoverage[category];
1477
+ }
1478
+
1479
+ Coordinate getVirtualCoverage(Node * node, Category category)
1480
+ {
1481
+ return node->virtualCoverage[category];
1482
+ }
1483
+
1484
+ Coordinate getTotalCoverage(Node * node)
1485
+ {
1486
+ Category cat;
1487
+ Coordinate coverage = 0;
1488
+
1489
+ for (cat = 0; cat < CATEGORIES; cat++)
1490
+ coverage += node->virtualCoverage[cat];
1491
+
1492
+ return coverage;
1493
+ }
1494
+
1495
+ void setOriginalVirtualCoverage(Node * node, Category category,
1496
+ Coordinate coverage)
1497
+ {
1498
+ node->originalVirtualCoverage[category] = coverage;
1499
+ node->twinNode->originalVirtualCoverage[category] =
1500
+ node->originalVirtualCoverage[category];
1501
+ }
1502
+
1503
+ void incrementOriginalVirtualCoverage(Node * node, Category category,
1504
+ Coordinate coverage)
1505
+ {
1506
+ node->originalVirtualCoverage[category] += coverage;
1507
+ node->twinNode->originalVirtualCoverage[category] =
1508
+ node->originalVirtualCoverage[category];
1509
+ }
1510
+
1511
+ Coordinate getOriginalVirtualCoverage(Node * node, Category category)
1512
+ {
1513
+ return node->originalVirtualCoverage[category];
1514
+ }
1515
+
1516
+ #else
1517
+
1518
+ void setVirtualCoverage(Node * node,
1519
+ Coordinate coverage)
1520
+ {
1521
+ node->virtualCoverage = coverage;
1522
+ node->twinNode->virtualCoverage = coverage;
1523
+ }
1524
+
1525
+ void incrementVirtualCoverage(Node * node,
1526
+ Coordinate coverage)
1527
+ {
1528
+ node->virtualCoverage += coverage;
1529
+ node->twinNode->virtualCoverage += coverage;
1530
+ }
1531
+
1532
+ Coordinate getVirtualCoverage(Node * node)
1533
+ {
1534
+ return node->virtualCoverage;
1535
+ }
1536
+
1537
+ Coordinate getTotalCoverage(Node * node)
1538
+ {
1539
+ return node->virtualCoverage;
1540
+ }
1541
+
1542
+ #endif
1543
+
1544
+ boolean hasSingleArc(Node * node)
1545
+ {
1546
+ return node->arcCount == 1;
1547
+ }
1548
+
1549
+ void activateArcLookupTable(Graph * graph)
1550
+ {
1551
+ IDnum index;
1552
+ Node *node;
1553
+ Arc *arc;
1554
+ IDnum nodes = graph->nodeCount;
1555
+ IDnum twinOriginID, destinationID, hash;
1556
+ Arc **table;
1557
+
1558
+ velvetLog("Activating arc lookup table\n");
1559
+
1560
+ graph->arcLookupTable = callocOrExit(6 * nodes + 1, Arc *);
1561
+
1562
+ table = graph->arcLookupTable;
1563
+
1564
+ for (index = -nodes; index <= nodes; index++) {
1565
+ if (index == 0)
1566
+ continue;
1567
+
1568
+ node = getNodeInGraph(graph, index);
1569
+ if (node == 0)
1570
+ continue;
1571
+
1572
+ for (arc = getArc(node); arc != NULL;
1573
+ arc = getNextArc(arc)) {
1574
+ twinOriginID = arc->twinArc->destination->ID;
1575
+ destinationID = arc->destination->ID;
1576
+ hash =
1577
+ 3 * nodes - 2 * twinOriginID + destinationID;
1578
+ arc->nextInLookupTable = table[hash];
1579
+ table[hash] = arc;
1580
+ }
1581
+ }
1582
+
1583
+ velvetLog("Done activating arc lookup table\n");
1584
+ }
1585
+
1586
+ void deactivateArcLookupTable(Graph * graph)
1587
+ {
1588
+ free(graph->arcLookupTable);
1589
+ graph->arcLookupTable = NULL;
1590
+ }
1591
+
1592
+ static void exportNode(FILE * outfile, Node * node, void *withSequence)
1593
+ {
1594
+ Coordinate index;
1595
+ Nucleotide nucleotide;
1596
+
1597
+ if (node == NULL)
1598
+ return;
1599
+
1600
+ velvetFprintf(outfile, "NODE\t%ld\t%lld", (long) node->ID, (long long) node->length);
1601
+
1602
+ #ifndef SINGLE_COV_CAT
1603
+ Category cat;
1604
+ for (cat = 0; cat < CATEGORIES; cat++)
1605
+ velvetFprintf(outfile, "\t%lld\t%lld", (long long) node->virtualCoverage[cat],
1606
+ (long long) node->originalVirtualCoverage[cat]);
1607
+ velvetFprintf(outfile, "\n");
1608
+ #else
1609
+ velvetFprintf(outfile, "\t%lld\n", (long long) node->virtualCoverage);
1610
+ #endif
1611
+
1612
+ if (withSequence == NULL)
1613
+ return;
1614
+
1615
+ for (index = 0; index < node->length; index++) {
1616
+ nucleotide =
1617
+ getNucleotideInDescriptor(node->descriptor, index);
1618
+ switch (nucleotide) {
1619
+ case ADENINE:
1620
+ velvetFprintf(outfile, "A");
1621
+ break;
1622
+ case CYTOSINE:
1623
+ velvetFprintf(outfile, "C");
1624
+ break;
1625
+ case GUANINE:
1626
+ velvetFprintf(outfile, "G");
1627
+ break;
1628
+ case THYMINE:
1629
+ velvetFprintf(outfile, "T");
1630
+ break;
1631
+ }
1632
+ }
1633
+ velvetFprintf(outfile, "\n");
1634
+
1635
+ for (index = 0; index < node->length; index++) {
1636
+ nucleotide =
1637
+ getNucleotideInDescriptor(node->twinNode->descriptor,
1638
+ index);
1639
+ switch (nucleotide) {
1640
+ case ADENINE:
1641
+ velvetFprintf(outfile, "A");
1642
+ break;
1643
+ case CYTOSINE:
1644
+ velvetFprintf(outfile, "C");
1645
+ break;
1646
+ case GUANINE:
1647
+ velvetFprintf(outfile, "G");
1648
+ break;
1649
+ case THYMINE:
1650
+ velvetFprintf(outfile, "T");
1651
+ break;
1652
+ }
1653
+ }
1654
+ velvetFprintf(outfile, "\n");
1655
+ }
1656
+
1657
+ static void exportArc(FILE * outfile, Arc * arc)
1658
+ {
1659
+ IDnum originID, destinationID;
1660
+ IDnum absOriginID, absDestinationID;
1661
+
1662
+ if (arc == NULL)
1663
+ return;
1664
+
1665
+ absOriginID = originID = -arc->twinArc->destination->ID;
1666
+ absDestinationID = destinationID = arc->destination->ID;
1667
+
1668
+ if (absOriginID < 0)
1669
+ absOriginID = -absOriginID;
1670
+ if (absDestinationID < 0)
1671
+ absDestinationID = -absDestinationID;
1672
+
1673
+ if (absDestinationID < absOriginID)
1674
+ return;
1675
+
1676
+ if (originID == destinationID && originID < 0)
1677
+ return;
1678
+
1679
+ velvetFprintf(outfile, "ARC\t%li\t%li\t%li\n", (long) originID, (long) destinationID,
1680
+ (long) arc->multiplicity);
1681
+ }
1682
+
1683
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1684
+ static Arc *mergeArcLists(Arc * left, Arc * right)
1685
+ {
1686
+ Arc *mergedList = NULL;
1687
+ Arc *tail = NULL;
1688
+
1689
+ // Choose first element:
1690
+ if (left->destination->ID <= right->destination->ID) {
1691
+ mergedList = left;
1692
+ tail = left;
1693
+ left = left->next;
1694
+ } else {
1695
+ mergedList = right;
1696
+ tail = right;
1697
+ right = right->next;
1698
+ }
1699
+
1700
+ // Iterate while both lists are still non empty
1701
+ while (left != NULL && right != NULL) {
1702
+ if (left->destination->ID <= right->destination->ID) {
1703
+ tail->next = left;
1704
+ left->previous = tail;
1705
+ left = left->next;
1706
+ } else {
1707
+ tail->next = right;
1708
+ right->previous = tail;
1709
+ right = right->next;
1710
+ }
1711
+
1712
+ tail = tail->next;
1713
+ }
1714
+
1715
+ // Concatenate the remaining list at the end of the merged list
1716
+ if (left != NULL) {
1717
+ tail->next = left;
1718
+ left->previous = tail;
1719
+ }
1720
+
1721
+ if (right != NULL) {
1722
+ tail->next = right;
1723
+ right->previous = tail;
1724
+ }
1725
+
1726
+ return mergedList;
1727
+ }
1728
+
1729
+ static void arcMergeSort(Arc ** arcPtr, IDnum count)
1730
+ {
1731
+
1732
+ IDnum half = count / 2;
1733
+ Arc *left = *arcPtr;
1734
+ Arc *ptr = left;
1735
+ Arc *right;
1736
+ IDnum index;
1737
+
1738
+ if (count == 0 || count == 1)
1739
+ return;
1740
+
1741
+ if (count == 2) {
1742
+ if ((*arcPtr)->destination->ID >
1743
+ (*arcPtr)->next->destination->ID) {
1744
+ (*arcPtr)->next->next = *arcPtr;
1745
+ (*arcPtr)->previous = (*arcPtr)->next;
1746
+ *arcPtr = (*arcPtr)->next;
1747
+ (*arcPtr)->next->next = NULL;
1748
+ (*arcPtr)->previous = NULL;
1749
+ }
1750
+ return;
1751
+ }
1752
+
1753
+ for (index = 0; index < half - 1; index++) {
1754
+ ptr = ptr->next;
1755
+ if (ptr == NULL)
1756
+ return;
1757
+ }
1758
+
1759
+ right = ptr->next;
1760
+ ptr->next = NULL;
1761
+ right->previous = NULL;
1762
+
1763
+ arcMergeSort(&left, half);
1764
+ arcMergeSort(&right, count - half);
1765
+ *arcPtr = mergeArcLists(left, right);
1766
+ }
1767
+
1768
+ static void sortNodeArcs(Node * node)
1769
+ {
1770
+ Arc *arc;
1771
+ IDnum count = 0;
1772
+
1773
+ for (arc = getArc(node); arc != NULL; arc = getNextArc(arc))
1774
+ count++;
1775
+
1776
+ if (count == 0)
1777
+ return;
1778
+
1779
+ arc = getArc(node);
1780
+ arcMergeSort(&arc, count);
1781
+
1782
+ node->arc = arc;
1783
+ }
1784
+
1785
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1786
+ static GapMarker *mergeGapMarkerLists(GapMarker * left, GapMarker * right)
1787
+ {
1788
+ GapMarker *mergedList = NULL;
1789
+ GapMarker *tail = NULL;
1790
+
1791
+ // Choose first element:
1792
+ if (left->position <= right->position) {
1793
+ mergedList = left;
1794
+ tail = left;
1795
+ left = left->next;
1796
+ } else {
1797
+ mergedList = right;
1798
+ tail = right;
1799
+ right = right->next;
1800
+ }
1801
+
1802
+ // Iterate while both lists are still non empty
1803
+ while (left != NULL && right != NULL) {
1804
+ if (left->position <= right->position) {
1805
+ tail->next = left;
1806
+ left = left->next;
1807
+ } else {
1808
+ tail->next = right;
1809
+ right = right->next;
1810
+ }
1811
+
1812
+ tail = tail->next;
1813
+ }
1814
+
1815
+ // Concatenate the remaining list at the end of the merged list
1816
+ if (left != NULL)
1817
+ tail->next = left;
1818
+
1819
+ if (right != NULL)
1820
+ tail->next = right;
1821
+
1822
+ return mergedList;
1823
+ }
1824
+
1825
+ static void gapMergeSort(GapMarker ** gapPtr, IDnum count)
1826
+ {
1827
+
1828
+ IDnum half = count / 2;
1829
+ GapMarker *left = *gapPtr;
1830
+ GapMarker *ptr = left;
1831
+ GapMarker *right;
1832
+ IDnum index;
1833
+
1834
+ if (count == 0 || count == 1)
1835
+ return;
1836
+
1837
+ if (count == 2) {
1838
+ if ((*gapPtr)->position > (*gapPtr)->next->position) {
1839
+ (*gapPtr)->next->next = *gapPtr;
1840
+ *gapPtr = (*gapPtr)->next;
1841
+ (*gapPtr)->next->next = NULL;
1842
+ }
1843
+ return;
1844
+ }
1845
+
1846
+ for (index = 0; index < half - 1; index++) {
1847
+ ptr = ptr->next;
1848
+ if (ptr == NULL)
1849
+ return;
1850
+ }
1851
+
1852
+ right = ptr->next;
1853
+ ptr->next = NULL;
1854
+
1855
+ gapMergeSort(&left, half);
1856
+ gapMergeSort(&right, count - half);
1857
+ *gapPtr = mergeGapMarkerLists(left, right);
1858
+ }
1859
+
1860
+ static void sortNodeGapMarkers(Node * node, Graph * graph)
1861
+ {
1862
+ GapMarker *gap;
1863
+ IDnum count = 0;
1864
+ IDnum ID = getNodeID(node);
1865
+
1866
+ if (ID < 0)
1867
+ ID = -ID;
1868
+
1869
+ for (gap = graph->gapMarkers[ID]; gap != NULL; gap = gap->next)
1870
+ count++;
1871
+
1872
+ if (count == 0)
1873
+ return;
1874
+
1875
+ gap = graph->gapMarkers[ID];
1876
+ gapMergeSort(&gap, count);
1877
+
1878
+ graph->gapMarkers[ID] = gap;
1879
+ }
1880
+
1881
+ void sortGapMarkers(Graph * graph)
1882
+ {
1883
+ IDnum index;
1884
+ Node *node;
1885
+
1886
+ if (graph->gapMarkers == NULL)
1887
+ return;
1888
+
1889
+ for (index = 1; index <= nodeCount(graph); index++) {
1890
+ node = getNodeInGraph(graph, index);
1891
+ if (node)
1892
+ sortNodeGapMarkers(node, graph);
1893
+ }
1894
+ }
1895
+
1896
+ void exportGraph(char *filename, Graph * graph, TightString * sequences)
1897
+ {
1898
+ IDnum index;
1899
+ FILE *outfile;
1900
+ Node *node;
1901
+ Arc *arc;
1902
+ PassageMarkerI marker;
1903
+ ShortReadMarker *reads;
1904
+ IDnum readCount, readIndex;
1905
+
1906
+ if (graph == NULL) {
1907
+ return;
1908
+ }
1909
+
1910
+ outfile = fopen(filename, "w");
1911
+ if (outfile == NULL) {
1912
+ velvetLog("Couldn't open file, sorry\n");
1913
+ return;
1914
+ } else
1915
+ velvetLog("Writing into graph file %s...\n", filename);
1916
+
1917
+ // General data
1918
+ velvetFprintf(outfile, "%li\t%li\t%i\t%i\n", (long) graph->nodeCount,
1919
+ (long) graph->sequenceCount, graph->wordLength, (int) graph->double_stranded);
1920
+
1921
+ // Node info
1922
+ for (index = 1; index <= graph->nodeCount; index++) {
1923
+ node = getNodeInGraph(graph, index);
1924
+ exportNode(outfile, node, (void *) sequences);
1925
+ }
1926
+
1927
+ // Arc info
1928
+ for (index = 1; index <= graph->nodeCount; index++) {
1929
+ node = getNodeInGraph(graph, index);
1930
+ if (node == NULL)
1931
+ continue;
1932
+
1933
+ sortNodeArcs(node);
1934
+ sortNodeArcs(getTwinNode(node));
1935
+
1936
+ for (arc = node->arc; arc != NULL; arc = arc->next)
1937
+ exportArc(outfile, arc);
1938
+ for (arc = node->twinNode->arc; arc != NULL;
1939
+ arc = arc->next)
1940
+ exportArc(outfile, arc);
1941
+ }
1942
+
1943
+ // Sequence info
1944
+ for (index = 1; index <= graph->nodeCount; index++) {
1945
+ node = getNodeInGraph(graph, index);
1946
+ if (node == NULL)
1947
+ continue;
1948
+ for (marker = node->marker; marker != NULL_IDX;
1949
+ marker = getNextInNode(marker))
1950
+ exportMarker(outfile, marker, sequences,
1951
+ graph->wordLength);
1952
+ }
1953
+
1954
+ // Node reads
1955
+ if (readStartsAreActivated(graph)) {
1956
+ for (index = 0; index <= graph->nodeCount * 2; index++) {
1957
+ readCount = graph->nodeReadCounts[index];
1958
+ if (readCount == 0)
1959
+ continue;
1960
+
1961
+ velvetFprintf(outfile, "NR\t%li\t%li\n",
1962
+ (long) (index - graph->nodeCount), (long) readCount);
1963
+
1964
+ reads = graph->nodeReads[index];
1965
+ for (readIndex = 0; readIndex < readCount;
1966
+ readIndex++)
1967
+ velvetFprintf(outfile, "%ld\t%lld\t%d\n",
1968
+ (long) reads[readIndex].readID,
1969
+ (long long) reads[readIndex].position,
1970
+ (int) reads[readIndex].offset);
1971
+ }
1972
+ }
1973
+
1974
+ fclose(outfile);
1975
+ }
1976
+
1977
+ Graph *importGraph(char *filename)
1978
+ {
1979
+ FILE *file = fopen(filename, "r");
1980
+ const int maxline = MAXLINE;
1981
+ char line[MAXLINE];
1982
+ Graph *graph;
1983
+ Coordinate coverage;
1984
+ IDnum nodeCounter, sequenceCount;
1985
+ Node *node, *twin;
1986
+ Arc *arc;
1987
+ IDnum originID, destinationID, multiplicity;
1988
+ PassageMarkerI newMarker, marker;
1989
+ IDnum nodeID, seqID;
1990
+ Coordinate index;
1991
+ Coordinate start, finish;
1992
+ Coordinate startOffset, finishOffset;
1993
+ boolean finished = false;
1994
+ size_t arrayLength;
1995
+ IDnum readCount;
1996
+ ShortReadMarker *array;
1997
+ int wordLength, sCount;
1998
+ ShortLength length;
1999
+ long long_var, long_var2, long_var3;
2000
+ long long longlong_var, longlong_var2, longlong_var3, longlong_var4;
2001
+ short short_var;
2002
+ char c;
2003
+
2004
+ if (file == NULL)
2005
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
2006
+
2007
+ velvetLog("Reading graph file %s\n", filename);
2008
+
2009
+ // First line
2010
+ if (!fgets(line, maxline, file))
2011
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2012
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2013
+ &wordLength, &short_var);
2014
+ nodeCounter = (IDnum) long_var;
2015
+ sequenceCount = (IDnum) long_var2;
2016
+ graph = emptyGraph(sequenceCount, wordLength);
2017
+ graph->double_stranded = (boolean) short_var;
2018
+ resetWordFilter(wordLength);
2019
+ allocateNodeSpace(graph, nodeCounter);
2020
+
2021
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2022
+ (long) sequenceCount);
2023
+
2024
+ if (nodeCounter == 0)
2025
+ return graph;
2026
+
2027
+ // Read nodes
2028
+ if (!fgets(line, maxline, file))
2029
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2030
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2031
+ strtok(line, "\t\n");
2032
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2033
+ nodeID = (IDnum) long_var;
2034
+ node = addEmptyNodeToGraph(graph, nodeID);
2035
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2036
+ node->length = (Coordinate) longlong_var;
2037
+
2038
+ #ifndef SINGLE_COV_CAT
2039
+ Category cat;
2040
+ Coordinate originalCoverage;
2041
+ for (cat = 0; cat < CATEGORIES; cat++) {
2042
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2043
+ coverage = (Coordinate) longlong_var;
2044
+ setVirtualCoverage(node, cat, coverage);
2045
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2046
+ &longlong_var);
2047
+ originalCoverage = (Coordinate) longlong_var;
2048
+ setOriginalVirtualCoverage(node, cat,
2049
+ originalCoverage);
2050
+ }
2051
+ #else
2052
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2053
+ coverage = (Coordinate) longlong_var;
2054
+ setVirtualCoverage(node, coverage);
2055
+ #endif
2056
+
2057
+ arrayLength = node->length / 4;
2058
+ if (node->length % 4 > 0)
2059
+ arrayLength++;
2060
+ node->descriptor =
2061
+ callocOrExit(arrayLength, Descriptor);
2062
+
2063
+ index = 0;
2064
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2065
+ if (c == 'A')
2066
+ writeNucleotideInDescriptor(ADENINE,
2067
+ node->
2068
+ descriptor,
2069
+ index++);
2070
+ else if (c == 'C')
2071
+ writeNucleotideInDescriptor(CYTOSINE,
2072
+ node->
2073
+ descriptor,
2074
+ index++);
2075
+ else if (c == 'G')
2076
+ writeNucleotideInDescriptor(GUANINE,
2077
+ node->
2078
+ descriptor,
2079
+ index++);
2080
+ else if (c == 'T')
2081
+ writeNucleotideInDescriptor(THYMINE,
2082
+ node->
2083
+ descriptor,
2084
+ index++);
2085
+ }
2086
+
2087
+ twin = node->twinNode;
2088
+ twin->length = node->length;
2089
+ twin->descriptor =
2090
+ callocOrExit(arrayLength, Descriptor);
2091
+ index = 0;
2092
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2093
+ if (c == 'A')
2094
+ writeNucleotideInDescriptor(ADENINE,
2095
+ twin->
2096
+ descriptor,
2097
+ index++);
2098
+ else if (c == 'C')
2099
+ writeNucleotideInDescriptor(CYTOSINE,
2100
+ twin->
2101
+ descriptor,
2102
+ index++);
2103
+ else if (c == 'G')
2104
+ writeNucleotideInDescriptor(GUANINE,
2105
+ twin->
2106
+ descriptor,
2107
+ index++);
2108
+ else if (c == 'T')
2109
+ writeNucleotideInDescriptor(THYMINE,
2110
+ twin->
2111
+ descriptor,
2112
+ index++);
2113
+ }
2114
+
2115
+ if (fgets(line, maxline, file) == NULL)
2116
+ finished = true;
2117
+ }
2118
+
2119
+ // Read arcs
2120
+ while (!finished && line[0] == 'A') {
2121
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2122
+ &long_var2, &long_var3);
2123
+ originID = (IDnum) long_var;
2124
+ destinationID = (IDnum) long_var2;
2125
+ multiplicity = (IDnum) long_var3;
2126
+ arc =
2127
+ createArc(getNodeInGraph(graph, originID),
2128
+ getNodeInGraph(graph, destinationID), graph);
2129
+ setMultiplicity(arc, multiplicity);
2130
+ if (fgets(line, maxline, file) == NULL)
2131
+ finished = true;
2132
+ }
2133
+
2134
+ // Read sequences
2135
+ while (!finished && line[0] != 'N') {
2136
+ sscanf(line, "SEQ\t%ld\n", &long_var);
2137
+ seqID = (IDnum) long_var;
2138
+ marker = NULL_IDX;
2139
+ if (!fgets(line, maxline, file))
2140
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2141
+
2142
+ while (!finished && line[0] != 'N' && line[0] != 'S') {
2143
+ sCount =
2144
+ sscanf(line, "%ld\t%lld\t%lld\t%lld\t%lld\n",
2145
+ &long_var, &longlong_var, &longlong_var2, &longlong_var3,
2146
+ &longlong_var4);
2147
+ nodeID = (IDnum) long_var;
2148
+ startOffset = (Coordinate) longlong_var;
2149
+ start = (Coordinate) longlong_var2;
2150
+ finish = (Coordinate) longlong_var3;
2151
+ finishOffset = (Coordinate) longlong_var4;
2152
+ if (sCount != 5) {
2153
+ velvetLog
2154
+ ("ERROR: reading in graph - only %d items read for line '%s'",
2155
+ sCount, line);
2156
+ #ifdef DEBUG
2157
+ abort();
2158
+ #endif
2159
+ exit(1);
2160
+ }
2161
+ newMarker =
2162
+ newPassageMarker(seqID, start, finish,
2163
+ startOffset, finishOffset);
2164
+ transposePassageMarker(newMarker,
2165
+ getNodeInGraph(graph,
2166
+ nodeID));
2167
+ connectPassageMarkers(marker, newMarker, graph);
2168
+ marker = newMarker;
2169
+ if (fgets(line, maxline, file) == NULL)
2170
+ finished = true;
2171
+ }
2172
+ }
2173
+
2174
+ // Node reads
2175
+ while (!finished) {
2176
+ sscanf(line, "NR\t%ld\t%ld\n", &long_var, &long_var2);
2177
+ nodeID = (IDnum) long_var;
2178
+ readCount = (IDnum) long_var2;
2179
+ if (!readStartsAreActivated(graph))
2180
+ activateReadStarts(graph);
2181
+
2182
+ graph->nodeReadCounts[nodeID + graph->nodeCount] =
2183
+ readCount;
2184
+ array = mallocOrExit(readCount, ShortReadMarker);
2185
+ graph->nodeReads[nodeID + graph->nodeCount] = array;
2186
+
2187
+ readCount = 0;
2188
+ if (!fgets(line, maxline, file))
2189
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2190
+ while (!finished && line[0] != 'N') {
2191
+ sscanf(line, "%ld\t%lld\t%hd\n", &long_var,
2192
+ &longlong_var, &short_var);
2193
+ seqID = (IDnum) long_var;
2194
+ startOffset = (Coordinate) longlong_var;
2195
+ length = (ShortLength) short_var;
2196
+ array[readCount].readID = seqID;
2197
+ array[readCount].position = startOffset;
2198
+ array[readCount].offset = length;
2199
+ readCount++;
2200
+ if (fgets(line, maxline, file) == NULL)
2201
+ finished = true;
2202
+ }
2203
+ }
2204
+
2205
+ //velvetLog("New graph has %d nodes\n", graph->nodeCount);
2206
+
2207
+ fclose(file);
2208
+ //velvetLog("Done, exiting\n");
2209
+ return graph;
2210
+ }
2211
+
2212
+ Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
2213
+ {
2214
+ FILE *file = fopen(preGraphFilename, "r");
2215
+ const int maxline = MAXLINE;
2216
+ char line[MAXLINE];
2217
+
2218
+ Graph *graph;
2219
+ IDnum nodeCounter, sequenceCount;
2220
+
2221
+ Node *node, *twin;
2222
+ IDnum nodeID = 0;
2223
+ Coordinate index, nodeLength;
2224
+ char c;
2225
+ int wordLength, wordShift;
2226
+ size_t arrayLength;
2227
+ short short_var;
2228
+ long long_var, long_var2;
2229
+ long long longlong_var;
2230
+
2231
+ if (file == NULL)
2232
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
2233
+
2234
+ velvetLog("Reading pre-graph file %s\n", preGraphFilename);
2235
+
2236
+ // First line
2237
+ if (!fgets(line, maxline, file))
2238
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2239
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2240
+ &wordLength, &short_var);
2241
+ nodeCounter = (IDnum) long_var;
2242
+ sequenceCount = (IDnum) long_var2;
2243
+ *double_strand = (boolean) short_var;
2244
+ wordShift = wordLength - 1;
2245
+ graph = emptyGraph(sequenceCount, wordLength);
2246
+ graph->double_stranded = *double_strand;
2247
+ resetWordFilter(wordLength);
2248
+ allocateNodeSpace(graph, nodeCounter);
2249
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2250
+ (long) sequenceCount);
2251
+
2252
+ // Read nodes
2253
+ if (nodeCounter == 0)
2254
+ return graph;
2255
+
2256
+ if (!fgets(line, maxline, file))
2257
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2258
+ while (line[0] == 'N') {
2259
+ nodeID++;
2260
+ node = addEmptyNodeToGraph(graph, nodeID);
2261
+
2262
+ sscanf(line, "%*s\t%*i\t%lli\n", &longlong_var);
2263
+ node->length = (Coordinate) longlong_var;
2264
+ nodeLength = node->length;
2265
+ arrayLength = node->length / 4;
2266
+ if (node->length % 4 > 0)
2267
+ arrayLength++;
2268
+ node->descriptor =
2269
+ callocOrExit(arrayLength, Descriptor);
2270
+
2271
+ twin = node->twinNode;
2272
+ twin->length = nodeLength;
2273
+ twin->descriptor =
2274
+ callocOrExit(arrayLength, Descriptor);
2275
+
2276
+
2277
+ index = 0;
2278
+ while ((c = getc(file)) != '\n') {
2279
+ if (c == 'A') {
2280
+ if (index - wordShift >= 0)
2281
+ writeNucleotideInDescriptor(ADENINE,
2282
+ node->
2283
+ descriptor,
2284
+ index - wordShift);
2285
+ if (nodeLength - index - 1 >= 0) {
2286
+ #ifndef COLOR
2287
+ writeNucleotideInDescriptor(THYMINE,
2288
+ twin->
2289
+ descriptor,
2290
+ nodeLength - index - 1);
2291
+ #else
2292
+ writeNucleotideInDescriptor(ADENINE,
2293
+ twin->
2294
+ descriptor,
2295
+ nodeLength - index - 1);
2296
+ #endif
2297
+ }
2298
+ } else if (c == 'C') {
2299
+ if (index - wordShift >= 0)
2300
+ writeNucleotideInDescriptor(CYTOSINE,
2301
+ node->
2302
+ descriptor,
2303
+ index - wordShift);
2304
+ if (nodeLength - index - 1 >= 0) {
2305
+ #ifndef COLOR
2306
+ writeNucleotideInDescriptor(GUANINE,
2307
+ twin->
2308
+ descriptor,
2309
+ nodeLength - index - 1);
2310
+ #else
2311
+ writeNucleotideInDescriptor(CYTOSINE,
2312
+ twin->
2313
+ descriptor,
2314
+ nodeLength - index - 1);
2315
+ #endif
2316
+ }
2317
+ } else if (c == 'G') {
2318
+ if (index - wordShift >= 0)
2319
+ writeNucleotideInDescriptor(GUANINE,
2320
+ node->
2321
+ descriptor,
2322
+ index - wordShift);
2323
+ if (nodeLength - index - 1 >= 0) {
2324
+ #ifndef COLOR
2325
+ writeNucleotideInDescriptor(CYTOSINE,
2326
+ twin->
2327
+ descriptor,
2328
+ nodeLength - index - 1);
2329
+ #else
2330
+ writeNucleotideInDescriptor(GUANINE,
2331
+ twin->
2332
+ descriptor,
2333
+ nodeLength - index - 1);
2334
+ #endif
2335
+ }
2336
+ } else if (c == 'T') {
2337
+ if (index - wordShift >= 0)
2338
+ writeNucleotideInDescriptor(THYMINE,
2339
+ node->
2340
+ descriptor,
2341
+ index - wordShift);
2342
+ if (nodeLength - index - 1 >= 0) {
2343
+ #ifndef COLOR
2344
+ writeNucleotideInDescriptor(ADENINE,
2345
+ twin->
2346
+ descriptor,
2347
+ nodeLength - index - 1);
2348
+ #else
2349
+ writeNucleotideInDescriptor(THYMINE,
2350
+ twin->
2351
+ descriptor,
2352
+ nodeLength - index - 1);
2353
+ #endif
2354
+ }
2355
+ }
2356
+
2357
+ index++;
2358
+ }
2359
+
2360
+ if (fgets(line, maxline, file) == NULL) {
2361
+ fclose(file);
2362
+ return graph;
2363
+ }
2364
+ }
2365
+
2366
+ fclose(file);
2367
+ return graph;
2368
+ }
2369
+
2370
+ Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand)
2371
+ {
2372
+ FILE *file = fopen(connectedGraphFilename, "r");
2373
+ const int maxline = MAXLINE;
2374
+ char line[MAXLINE];
2375
+ Coordinate coverage;
2376
+ Arc *arc;
2377
+ IDnum originID, destinationID, multiplicity;
2378
+ boolean finished = false;
2379
+ long long_var3;
2380
+
2381
+ Graph *graph;
2382
+ IDnum nodeCounter, sequenceCount;
2383
+
2384
+ Node *node, *twin;
2385
+ IDnum nodeID = 0;
2386
+ Coordinate index, nodeLength;
2387
+ char c;
2388
+ int wordLength, wordShift;
2389
+ size_t arrayLength;
2390
+ short short_var;
2391
+ long long_var, long_var2;
2392
+ long long longlong_var;
2393
+
2394
+ if (file == NULL)
2395
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", connectedGraphFilename);
2396
+
2397
+ velvetLog("Reading connected graph file %s\n", connectedGraphFilename);
2398
+
2399
+ // First line
2400
+ if (!fgets(line, maxline, file))
2401
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2402
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2403
+ &wordLength, &short_var);
2404
+ nodeCounter = (IDnum) long_var;
2405
+ sequenceCount = (IDnum) long_var2;
2406
+ *double_strand = (boolean) short_var;
2407
+ wordShift = wordLength - 1;
2408
+ graph = emptyGraph(sequenceCount, wordLength);
2409
+ graph->double_stranded = *double_strand;
2410
+ resetWordFilter(wordLength);
2411
+ allocateNodeSpace(graph, nodeCounter);
2412
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2413
+ (long) sequenceCount);
2414
+
2415
+ // Read nodes
2416
+ if (nodeCounter == 0)
2417
+ return graph;
2418
+
2419
+ if (!fgets(line, maxline, file))
2420
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2421
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2422
+ strtok(line, "\t\n");
2423
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2424
+ nodeID = (IDnum) long_var;
2425
+ node = addEmptyNodeToGraph(graph, nodeID);
2426
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2427
+ node->length = (Coordinate) longlong_var;
2428
+ nodeLength = node->length;
2429
+
2430
+ #ifndef SINGLE_COV_CAT
2431
+ Category cat;
2432
+ Coordinate originalCoverage;
2433
+ for (cat = 0; cat < CATEGORIES; cat++) {
2434
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2435
+ coverage = (Coordinate) longlong_var;
2436
+ setVirtualCoverage(node, cat, coverage);
2437
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2438
+ &longlong_var);
2439
+ originalCoverage = (Coordinate) longlong_var;
2440
+ setOriginalVirtualCoverage(node, cat,
2441
+ originalCoverage);
2442
+ }
2443
+ #else
2444
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2445
+ coverage = (Coordinate) longlong_var;
2446
+ setVirtualCoverage(node, coverage);
2447
+ #endif
2448
+
2449
+ arrayLength = node->length / 4;
2450
+ if (node->length % 4 > 0)
2451
+ arrayLength++;
2452
+ node->descriptor =
2453
+ callocOrExit(arrayLength, Descriptor);
2454
+
2455
+ twin = node->twinNode;
2456
+ twin->length = node->length;
2457
+ twin->descriptor =
2458
+ callocOrExit(arrayLength, Descriptor);
2459
+
2460
+ index = 0;
2461
+ while ((c = getc(file)) != '\n') {
2462
+ if (c == 'A') {
2463
+ if (index - wordShift >= 0)
2464
+ writeNucleotideInDescriptor(ADENINE,
2465
+ node->
2466
+ descriptor,
2467
+ index - wordShift);
2468
+ if (nodeLength - index - 1 >= 0) {
2469
+ #ifndef COLOR
2470
+ writeNucleotideInDescriptor(THYMINE,
2471
+ twin->
2472
+ descriptor,
2473
+ nodeLength - index - 1);
2474
+ #else
2475
+ writeNucleotideInDescriptor(ADENINE,
2476
+ twin->
2477
+ descriptor,
2478
+ nodeLength - index - 1);
2479
+ #endif
2480
+ }
2481
+ } else if (c == 'C') {
2482
+ if (index - wordShift >= 0)
2483
+ writeNucleotideInDescriptor(CYTOSINE,
2484
+ node->
2485
+ descriptor,
2486
+ index - wordShift);
2487
+ if (nodeLength - index - 1 >= 0) {
2488
+ #ifndef COLOR
2489
+ writeNucleotideInDescriptor(GUANINE,
2490
+ twin->
2491
+ descriptor,
2492
+ nodeLength - index - 1);
2493
+ #else
2494
+ writeNucleotideInDescriptor(CYTOSINE,
2495
+ twin->
2496
+ descriptor,
2497
+ nodeLength - index - 1);
2498
+ #endif
2499
+ }
2500
+ } else if (c == 'G') {
2501
+ if (index - wordShift >= 0)
2502
+ writeNucleotideInDescriptor(GUANINE,
2503
+ node->
2504
+ descriptor,
2505
+ index - wordShift);
2506
+ if (nodeLength - index - 1 >= 0) {
2507
+ #ifndef COLOR
2508
+ writeNucleotideInDescriptor(CYTOSINE,
2509
+ twin->
2510
+ descriptor,
2511
+ nodeLength - index - 1);
2512
+ #else
2513
+ writeNucleotideInDescriptor(GUANINE,
2514
+ twin->
2515
+ descriptor,
2516
+ nodeLength - index - 1);
2517
+ #endif
2518
+ }
2519
+ } else if (c == 'T') {
2520
+ if (index - wordShift >= 0)
2521
+ writeNucleotideInDescriptor(THYMINE,
2522
+ node->
2523
+ descriptor,
2524
+ index - wordShift);
2525
+ if (nodeLength - index - 1 >= 0) {
2526
+ #ifndef COLOR
2527
+ writeNucleotideInDescriptor(ADENINE,
2528
+ twin->
2529
+ descriptor,
2530
+ nodeLength - index - 1);
2531
+ #else
2532
+ writeNucleotideInDescriptor(THYMINE,
2533
+ twin->
2534
+ descriptor,
2535
+ nodeLength - index - 1);
2536
+ #endif
2537
+ }
2538
+ }
2539
+
2540
+ index++;
2541
+ }
2542
+
2543
+ if (fgets(line, maxline, file) == NULL) {
2544
+ finished = true;
2545
+ }
2546
+ }
2547
+
2548
+ // Read arcs
2549
+ while (!finished && line[0] == 'A') {
2550
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2551
+ &long_var2, &long_var3);
2552
+ originID = (IDnum) long_var;
2553
+ destinationID = (IDnum) long_var2;
2554
+ multiplicity = (IDnum) long_var3;
2555
+ arc =
2556
+ createArc(getNodeInGraph(graph, originID),
2557
+ getNodeInGraph(graph, destinationID), graph);
2558
+ setMultiplicity(arc, multiplicity);
2559
+ if (fgets(line, maxline, file) == NULL)
2560
+ finished = true;
2561
+ }
2562
+
2563
+ fclose(file);
2564
+ return graph;
2565
+ }
2566
+
2567
+ // Prints out the information relative to the topology of a node into a new file
2568
+ // Internal to exportDOTGraph()
2569
+ void DOTNode(Node * node, FILE * outfile)
2570
+ {
2571
+ IDnum ID;
2572
+ Arc *arc;
2573
+ Node *otherNode;
2574
+
2575
+ ID = node->ID;
2576
+ if (ID < 0)
2577
+ return;
2578
+
2579
+ velvetFprintf(outfile, "\t%li [label=\"<left>|%li|<right>\"]\n", (long) ID, (long) ID);
2580
+
2581
+ for (arc = node->arc; arc != NULL; arc = arc->next) {
2582
+ otherNode = arc->destination;
2583
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2584
+ continue;
2585
+ }
2586
+
2587
+ if (otherNode->ID > 0)
2588
+ velvetFprintf(outfile, "\t%li:right -> %li:left\n", (long) ID,
2589
+ (long) otherNode->ID);
2590
+ else
2591
+ velvetFprintf(outfile, "\t%li:right -> %li:right\n", (long) ID,
2592
+ (long) -otherNode->ID);
2593
+ }
2594
+
2595
+ for (arc = node->twinNode->arc; arc != NULL; arc = arc->next) {
2596
+ otherNode = arc->destination;
2597
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2598
+ continue;
2599
+ }
2600
+
2601
+ if (otherNode->ID > 0)
2602
+ velvetFprintf(outfile, "\t%li:left -> %li:left\n", (long) ID,
2603
+ (long) otherNode->ID);
2604
+ else
2605
+ velvetFprintf(outfile, "\t%li:left -> %li:right\n", (long) ID,
2606
+ (long) -otherNode->ID);
2607
+ }
2608
+ }
2609
+
2610
+ TightString *expandNode(Node * node, int WORDLENGTH)
2611
+ {
2612
+ Nucleotide nucleotide;
2613
+ Coordinate index;
2614
+ TightString *tString =
2615
+ newTightString(node->length + WORDLENGTH - 1);
2616
+ Node *twin = node->twinNode;
2617
+ Coordinate length = node->length;
2618
+
2619
+ for (index = 0; index < WORDLENGTH; index++) {
2620
+ nucleotide =
2621
+ getNucleotideInDescriptor(twin->descriptor,
2622
+ length - index - 1);
2623
+ #ifndef COLOR
2624
+ writeNucleotideAtPosition(3 - nucleotide, index, tString);
2625
+ #else
2626
+ writeNucleotideAtPosition(nucleotide, index, tString);
2627
+ #endif
2628
+ }
2629
+
2630
+ for (index = 1; index < node->length; index++) {
2631
+ nucleotide =
2632
+ getNucleotideInDescriptor(node->descriptor, index);
2633
+ writeNucleotideAtPosition(nucleotide,
2634
+ index + WORDLENGTH - 1, tString);
2635
+ }
2636
+
2637
+ return tString;
2638
+ }
2639
+
2640
+ char *expandNodeFragment(Node * node, Coordinate contigStart,
2641
+ Coordinate contigFinish, int wordLength)
2642
+ {
2643
+ Nucleotide nucleotide;
2644
+ Coordinate index;
2645
+ Node *twin = node->twinNode;
2646
+ Coordinate length = contigFinish - contigStart;
2647
+ int wordShift = wordLength - 1;
2648
+ char *string;
2649
+
2650
+ if (length >= wordShift) {
2651
+ string = callocOrExit(length + wordLength, char);
2652
+
2653
+ for (index = 0; index < wordShift; index++) {
2654
+ nucleotide =
2655
+ getNucleotideInDescriptor(twin->descriptor,
2656
+ twin->length - contigStart -
2657
+ index - 1);
2658
+ #ifndef COLOR
2659
+ nucleotide = 3 - nucleotide;
2660
+ #endif
2661
+
2662
+ switch (nucleotide) {
2663
+ case ADENINE:
2664
+ string[index] = 'A';
2665
+ break;
2666
+ case CYTOSINE:
2667
+ string[index] = 'C';
2668
+ break;
2669
+ case GUANINE:
2670
+ string[index] = 'G';
2671
+ break;
2672
+ case THYMINE:
2673
+ string[index] = 'T';
2674
+ break;
2675
+ }
2676
+
2677
+ }
2678
+
2679
+ for (index = 0; index < length; index++) {
2680
+ nucleotide =
2681
+ getNucleotideInDescriptor(node->descriptor,
2682
+ contigStart + index);
2683
+ switch (nucleotide) {
2684
+ case ADENINE:
2685
+ string[index + wordShift] = 'A';
2686
+ break;
2687
+ case CYTOSINE:
2688
+ string[index + wordShift] = 'C';
2689
+ break;
2690
+ case GUANINE:
2691
+ string[index + wordShift] = 'G';
2692
+ break;
2693
+ case THYMINE:
2694
+ string[index + wordShift] = 'T';
2695
+ break;
2696
+ }
2697
+ }
2698
+
2699
+ string[length + wordShift] = '\0';
2700
+ } else {
2701
+ string = callocOrExit(length + 1, char);
2702
+
2703
+ for (index = 0; index < length; index++) {
2704
+ nucleotide =
2705
+ getNucleotideInDescriptor(node->descriptor, contigStart + index);
2706
+ switch (nucleotide) {
2707
+ case ADENINE:
2708
+ string[index] = 'A';
2709
+ break;
2710
+ case CYTOSINE:
2711
+ string[index] = 'C';
2712
+ break;
2713
+ case GUANINE:
2714
+ string[index] = 'G';
2715
+ break;
2716
+ case THYMINE:
2717
+ string[index] = 'T';
2718
+ break;
2719
+ }
2720
+ }
2721
+
2722
+ string[length] = '\0';
2723
+ }
2724
+
2725
+ return string;
2726
+ }
2727
+
2728
+ boolean readStartsAreActivated(Graph * graph)
2729
+ {
2730
+ return graph->nodeReads != NULL;
2731
+ }
2732
+
2733
+ void activateReadStarts(Graph * graph)
2734
+ {
2735
+ graph->nodeReads =
2736
+ callocOrExit(2 * graph->nodeCount + 1, ShortReadMarker *);
2737
+ graph->nodeReadCounts =
2738
+ callocOrExit(2 * graph->nodeCount + 1, IDnum);
2739
+ }
2740
+
2741
+ void deactivateReadStarts(Graph * graph)
2742
+ {
2743
+ free(graph->nodeReads);
2744
+ free(graph->nodeReadCounts);
2745
+
2746
+ graph->nodeReads = NULL;
2747
+ graph->nodeReadCounts = NULL;
2748
+ }
2749
+
2750
+ boolean findIDnumInArray(IDnum query, IDnum * array, IDnum arrayLength)
2751
+ {
2752
+ IDnum leftIndex = 0;
2753
+ IDnum rightIndex = arrayLength;
2754
+ IDnum middleIndex;
2755
+
2756
+ if (arrayLength == 0)
2757
+ return false;
2758
+
2759
+ while (true) {
2760
+ middleIndex = leftIndex + (rightIndex - leftIndex) / 2;
2761
+
2762
+ if (array[middleIndex] == query)
2763
+ return true;
2764
+ else if (leftIndex >= rightIndex)
2765
+ return false;
2766
+ else if (array[middleIndex] > query)
2767
+ rightIndex = middleIndex;
2768
+ else if (leftIndex == middleIndex)
2769
+ leftIndex++;
2770
+ else
2771
+ leftIndex = middleIndex;
2772
+ }
2773
+ }
2774
+
2775
+ static inline int compareShortReadMarkers(const void *A, const void *B)
2776
+ {
2777
+ IDnum a = ((ShortReadMarker *) A)->readID;
2778
+ IDnum b = ((ShortReadMarker *) B)->readID;
2779
+
2780
+ if (a > b)
2781
+ return 1;
2782
+ if (a == b)
2783
+ return 0;
2784
+ return -1;
2785
+ }
2786
+
2787
+ static inline int compareIDnums(const void *A, const void *B)
2788
+ {
2789
+ IDnum a = *((IDnum *) A);
2790
+ IDnum b = *((IDnum *) B);
2791
+
2792
+ if (a > b)
2793
+ return 1;
2794
+ if (a == b)
2795
+ return 0;
2796
+ return -1;
2797
+ }
2798
+
2799
+ void incrementReadStartCount(Node * node, Graph * graph)
2800
+ {
2801
+ graph->nodeReadCounts[node->ID + graph->nodeCount]++;
2802
+ }
2803
+
2804
+ void createNodeReadStartArrays(Graph * graph)
2805
+ {
2806
+ IDnum index;
2807
+
2808
+ if (graph->nodeReads == NULL)
2809
+ return;
2810
+
2811
+ for (index = 0; index <= 2 * (graph->nodeCount); index++) {
2812
+ if (graph->nodeReadCounts[index] != 0) {
2813
+ graph->nodeReads[index] =
2814
+ mallocOrExit(graph->nodeReadCounts[index],
2815
+ ShortReadMarker);
2816
+ graph->nodeReadCounts[index] = 0;
2817
+ } else {
2818
+ graph->nodeReads[index] = NULL;
2819
+ }
2820
+ }
2821
+ }
2822
+
2823
+ void orderNodeReadStartArrays(Graph * graph)
2824
+ {
2825
+ IDnum index;
2826
+
2827
+ if (graph->nodeReads == NULL)
2828
+ return;
2829
+
2830
+ for (index = 0; index <= 2 * (graph->nodeCount); index++)
2831
+ if (graph->nodeReadCounts[index] != 0)
2832
+ qsort(graph->nodeReads[index],
2833
+ graph->nodeReadCounts[index],
2834
+ sizeof(ShortReadMarker),
2835
+ compareShortReadMarkers);
2836
+ }
2837
+
2838
+ void addReadStart(Node * node, IDnum seqID, Coordinate position,
2839
+ Graph * graph, Coordinate offset)
2840
+ {
2841
+ IDnum nodeIndex = getNodeID(node) + graph->nodeCount;
2842
+
2843
+ ShortReadMarker *array = graph->nodeReads[nodeIndex];
2844
+ IDnum arrayLength = graph->nodeReadCounts[nodeIndex];
2845
+
2846
+ array[arrayLength].readID = seqID;
2847
+ array[arrayLength].position = position;
2848
+ array[arrayLength].offset = (ShortLength) offset;
2849
+ // printf("node %d, seq %d, pos %ld, offset %ld\n", getNodeID(node), seqID, position, offset);
2850
+ graph->nodeReadCounts[nodeIndex]++;
2851
+ }
2852
+
2853
+ void blurLastShortReadMarker(Node * node, Graph * graph)
2854
+ {
2855
+ IDnum nodeIndex = getNodeID(node) + nodeCount(graph);
2856
+ IDnum index = graph->nodeReadCounts[nodeIndex] - 1;
2857
+ ShortReadMarker *marker;
2858
+
2859
+ if (index >= 0)
2860
+ marker = &(graph->nodeReads[nodeIndex][index]);
2861
+ else
2862
+ abort();
2863
+
2864
+ setShortReadMarkerPosition(marker, -1);
2865
+ }
2866
+
2867
+ ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
2868
+ IDnum * length)
2869
+ {
2870
+ IDnum targetID, targetLength, targetIndex, targetVal;
2871
+ IDnum sourceID, sourceLength, sourceIndex, sourceVal;
2872
+ IDnum mergeLength;
2873
+ ShortReadMarker *mergeArray, *targetArray, *sourceArray;
2874
+
2875
+ if (graph->nodeReads == NULL) {
2876
+ *length = 0;
2877
+ return NULL;
2878
+ }
2879
+
2880
+ if (nodeA == NULL || nodeB == NULL) {
2881
+ *length = 0;
2882
+ return NULL;
2883
+ }
2884
+
2885
+ targetID = getNodeID(nodeA) + graph->nodeCount;
2886
+ targetArray = graph->nodeReads[targetID];
2887
+ targetLength = graph->nodeReadCounts[targetID];
2888
+
2889
+ sourceID = getNodeID(nodeB) + graph->nodeCount;
2890
+ sourceArray = graph->nodeReads[sourceID];
2891
+ sourceLength = graph->nodeReadCounts[sourceID];
2892
+
2893
+ if (sourceArray == NULL || targetArray == NULL) {
2894
+ *length = 0;
2895
+ return NULL;
2896
+ }
2897
+
2898
+ mergeArray =
2899
+ mallocOrExit(sourceLength +
2900
+ targetLength, ShortReadMarker);
2901
+
2902
+ mergeLength = 0;
2903
+ sourceIndex = 0;
2904
+ targetIndex = 0;
2905
+ sourceVal = sourceArray[0].readID;
2906
+ targetVal = targetArray[0].readID;
2907
+
2908
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
2909
+ switch (compareIDnums(&sourceVal, &targetVal)) {
2910
+ case -1:
2911
+ mergeArray[mergeLength].readID = sourceVal;
2912
+ mergeArray[mergeLength].position = -1;
2913
+ mergeArray[mergeLength].offset = -1;
2914
+ mergeLength++;
2915
+ sourceIndex++;
2916
+ if (sourceIndex < sourceLength)
2917
+ sourceVal =
2918
+ sourceArray[sourceIndex].readID;
2919
+ break;
2920
+ case 0:
2921
+ mergeArray[mergeLength].readID = sourceVal;
2922
+ mergeArray[mergeLength].position = -1;
2923
+ mergeArray[mergeLength].offset = -1;
2924
+ mergeLength++;
2925
+ sourceIndex++;
2926
+ if (sourceIndex < sourceLength)
2927
+ sourceVal =
2928
+ sourceArray[sourceIndex].readID;
2929
+ targetIndex++;
2930
+ if (targetIndex < targetLength)
2931
+ targetVal =
2932
+ targetArray[targetIndex].readID;
2933
+ break;
2934
+ case 1:
2935
+ mergeArray[mergeLength].readID = targetVal;
2936
+ mergeArray[mergeLength].position = -1;
2937
+ mergeArray[mergeLength].offset = -1;
2938
+ mergeLength++;
2939
+ targetIndex++;
2940
+ if (targetIndex < targetLength)
2941
+ targetVal =
2942
+ targetArray[targetIndex].readID;
2943
+ }
2944
+ }
2945
+
2946
+ while (sourceIndex < sourceLength) {
2947
+ mergeArray[mergeLength].readID =
2948
+ sourceArray[sourceIndex].readID;
2949
+ mergeArray[mergeLength].position = -1;
2950
+ mergeArray[mergeLength].offset = -1;
2951
+ mergeLength++;
2952
+ sourceIndex++;
2953
+ }
2954
+
2955
+ while (targetIndex < targetLength) {
2956
+ mergeArray[mergeLength].readID =
2957
+ targetArray[targetIndex].readID;
2958
+ mergeArray[mergeLength].position = -1;
2959
+ mergeArray[mergeLength].offset = -1;
2960
+ mergeLength++;
2961
+ targetIndex++;
2962
+ }
2963
+
2964
+ *length = mergeLength;
2965
+ return mergeArray;
2966
+ }
2967
+
2968
+ ShortReadMarker *extractFrontOfNodeReads(Node * node,
2969
+ Coordinate breakpoint,
2970
+ Graph * graph, IDnum * length,
2971
+ PassageMarkerI sourceMarker,
2972
+ ShortLength * lengths)
2973
+ {
2974
+ IDnum sourceID;
2975
+ IDnum mergeLength, newLength, sourceLength;
2976
+ IDnum sourceIndex;
2977
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
2978
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
2979
+ Coordinate finish;
2980
+ Coordinate revBreakpoint;
2981
+
2982
+ if (graph->nodeReads == NULL) {
2983
+ *length = 0;
2984
+ return NULL;
2985
+ }
2986
+
2987
+ if (node == NULL) {
2988
+ *length = 0;
2989
+ return NULL;
2990
+ }
2991
+
2992
+ if (breakpoint == 0) {
2993
+ return commonNodeReads(node,
2994
+ getTwinNode(getNode
2995
+ (getPreviousInSequence
2996
+ (sourceMarker))),
2997
+ graph, length);
2998
+ }
2999
+
3000
+ sourceID = getNodeID(node) + graph->nodeCount;
3001
+ sourceArray = graph->nodeReads[sourceID];
3002
+ sourceLength = graph->nodeReadCounts[sourceID];
3003
+
3004
+ if (sourceArray == NULL) {
3005
+ *length = 0;
3006
+ return NULL;
3007
+ }
3008
+
3009
+ revBreakpoint = node->length - breakpoint;
3010
+
3011
+ mergeLength = 0;
3012
+ newLength = 0;
3013
+ sourcePtr = sourceArray;
3014
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3015
+ if (sourcePtr->position == -1) {
3016
+ newLength++;
3017
+ mergeLength++;
3018
+ } else {
3019
+ finish =
3020
+ sourcePtr->position - sourcePtr->offset +
3021
+ lengths[sourcePtr->readID - 1];
3022
+ if (sourcePtr->position < revBreakpoint)
3023
+ newLength++;
3024
+ if (finish > revBreakpoint)
3025
+ mergeLength++;
3026
+ }
3027
+ sourcePtr++;
3028
+ }
3029
+
3030
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3031
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3032
+
3033
+ mergePtr = mergeArray;
3034
+ newPtr = newArray;
3035
+ sourcePtr = sourceArray;
3036
+ mergeLength = 0;
3037
+ newLength = 0;
3038
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3039
+ if (sourcePtr->position == -1) {
3040
+ mergePtr->readID = sourcePtr->readID;
3041
+ setShortReadMarkerPosition(mergePtr, -1);
3042
+ setShortReadMarkerOffset(mergePtr, -1);
3043
+ mergePtr++;
3044
+ mergeLength++;
3045
+ newPtr->readID = sourcePtr->readID;
3046
+ setShortReadMarkerPosition(newPtr, -1);
3047
+ setShortReadMarkerOffset(newPtr, -1);
3048
+ newPtr++;
3049
+ newLength++;
3050
+ } else {
3051
+ finish =
3052
+ sourcePtr->position - sourcePtr->offset +
3053
+ lengths[sourcePtr->readID - 1];
3054
+ if (sourcePtr->position < revBreakpoint) {
3055
+ newPtr->readID = sourcePtr->readID;
3056
+ setShortReadMarkerPosition(newPtr,
3057
+ sourcePtr->
3058
+ position);
3059
+ setShortReadMarkerOffset(newPtr,
3060
+ sourcePtr->
3061
+ offset);
3062
+ newPtr++;
3063
+ newLength++;
3064
+
3065
+ // Saddle back reads:
3066
+ if (finish > revBreakpoint) {
3067
+ mergePtr->readID =
3068
+ sourcePtr->readID;
3069
+ setShortReadMarkerPosition
3070
+ (mergePtr, 0);
3071
+ setShortReadMarkerOffset(mergePtr,
3072
+ sourcePtr->
3073
+ offset +
3074
+ revBreakpoint
3075
+ -
3076
+ sourcePtr->
3077
+ position);
3078
+ mergePtr++;
3079
+ }
3080
+ } else if (finish > revBreakpoint) {
3081
+ mergePtr->readID = sourcePtr->readID;
3082
+ setShortReadMarkerPosition(mergePtr,
3083
+ sourcePtr->
3084
+ position - revBreakpoint);
3085
+ setShortReadMarkerOffset(mergePtr,
3086
+ sourcePtr->
3087
+ offset);
3088
+ mergePtr++;
3089
+ mergeLength++;
3090
+ }
3091
+ }
3092
+
3093
+ sourcePtr++;
3094
+ }
3095
+
3096
+ free(sourceArray);
3097
+ graph->nodeReads[sourceID] = newArray;
3098
+ graph->nodeReadCounts[sourceID] = newLength;
3099
+
3100
+ *length = mergeLength;
3101
+ return mergeArray;
3102
+ }
3103
+
3104
+ ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
3105
+ Graph * graph, IDnum * length,
3106
+ PassageMarkerI sourceMarker,
3107
+ ShortLength * lengths)
3108
+ {
3109
+ IDnum sourceID;
3110
+ IDnum mergeLength, newLength, sourceLength;
3111
+ IDnum sourceIndex;
3112
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
3113
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3114
+ Coordinate finish;
3115
+
3116
+ if (graph->nodeReads == NULL) {
3117
+ *length = 0;
3118
+ return NULL;
3119
+ }
3120
+
3121
+ if (node == NULL) {
3122
+ *length = 0;
3123
+ return NULL;
3124
+ }
3125
+
3126
+ if (breakpoint == 0) {
3127
+ return
3128
+ commonNodeReads(getNode
3129
+ (getPreviousInSequence(sourceMarker)),
3130
+ node, graph, length);
3131
+ }
3132
+
3133
+ sourceID = getNodeID(node) + graph->nodeCount;
3134
+ sourceArray = graph->nodeReads[sourceID];
3135
+ sourceLength = graph->nodeReadCounts[sourceID];
3136
+
3137
+ if (sourceArray == NULL) {
3138
+ *length = 0;
3139
+ return NULL;
3140
+ }
3141
+
3142
+ mergeLength = 0;
3143
+ newLength = 0;
3144
+ sourcePtr = sourceArray;
3145
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3146
+ if (sourcePtr->position == -1) {
3147
+ mergeLength++;
3148
+ newLength++;
3149
+ } else {
3150
+ finish =
3151
+ sourcePtr->position - sourcePtr->offset +
3152
+ lengths[sourcePtr->readID - 1];
3153
+ if (sourcePtr->position < breakpoint)
3154
+ mergeLength++;
3155
+ if (finish > breakpoint)
3156
+ newLength++;
3157
+ }
3158
+ sourcePtr++;
3159
+ }
3160
+
3161
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3162
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3163
+
3164
+ mergePtr = mergeArray;
3165
+ newPtr = newArray;
3166
+ sourcePtr = sourceArray;
3167
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3168
+ if (sourcePtr->position == -1) {
3169
+ mergePtr->readID = sourcePtr->readID;
3170
+ setShortReadMarkerPosition(mergePtr, -1);
3171
+ setShortReadMarkerOffset(mergePtr, -1);
3172
+ mergePtr++;
3173
+
3174
+ newPtr->readID = sourcePtr->readID;
3175
+ setShortReadMarkerPosition(newPtr, -1);
3176
+ setShortReadMarkerOffset(newPtr, -1);
3177
+ newPtr++;
3178
+
3179
+ sourcePtr++;
3180
+ continue;
3181
+ } else {
3182
+ finish =
3183
+ sourcePtr->position - sourcePtr->offset +
3184
+ lengths[sourcePtr->readID - 1];
3185
+
3186
+ if (sourcePtr->position < breakpoint) {
3187
+ mergePtr->readID = sourcePtr->readID;
3188
+ setShortReadMarkerPosition(mergePtr,
3189
+ sourcePtr->
3190
+ position);
3191
+ setShortReadMarkerOffset(mergePtr,
3192
+ sourcePtr->
3193
+ offset);
3194
+ mergePtr++;
3195
+
3196
+ // Saddle back reads:
3197
+ if (finish > breakpoint) {
3198
+ newPtr->readID = sourcePtr->readID;
3199
+ setShortReadMarkerPosition(newPtr,
3200
+ 0);
3201
+ setShortReadMarkerOffset(newPtr,
3202
+ sourcePtr->
3203
+ offset +
3204
+ breakpoint
3205
+ -
3206
+ sourcePtr->
3207
+ position);
3208
+ newPtr++;
3209
+ }
3210
+ } else if (finish > breakpoint) {
3211
+ newPtr->readID = sourcePtr->readID;
3212
+ setShortReadMarkerPosition(newPtr,
3213
+ sourcePtr->
3214
+ position -
3215
+ breakpoint);
3216
+ setShortReadMarkerOffset(newPtr,
3217
+ sourcePtr->
3218
+ offset);
3219
+ newPtr++;
3220
+ }
3221
+ }
3222
+
3223
+ sourcePtr++;
3224
+ }
3225
+
3226
+ free(sourceArray);
3227
+ graph->nodeReads[sourceID] = newArray;
3228
+ graph->nodeReadCounts[sourceID] = newLength;
3229
+
3230
+ *length = mergeLength;
3231
+ return mergeArray;
3232
+ }
3233
+
3234
+ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
3235
+ Graph * graph)
3236
+ {
3237
+ IDnum targetID, targetLength, targetIndex, targetVal;
3238
+ IDnum sourceLength, sourceIndex, sourceVal;
3239
+ IDnum mergeLength;
3240
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray;
3241
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr;
3242
+ Coordinate targetPosition;
3243
+ //ShortLength nodeLength = (ShortLength) getNodeLength(node);
3244
+ ShortLength targetOffset;
3245
+
3246
+ if (graph->nodeReads == NULL || reads == NULL || node == NULL)
3247
+ return;
3248
+
3249
+ targetID = getNodeID(node) + graph->nodeCount;
3250
+ targetArray = graph->nodeReads[targetID];
3251
+ targetLength = graph->nodeReadCounts[targetID];
3252
+ targetPtr = targetArray;
3253
+
3254
+ sourceArray = reads;
3255
+ sourceLength = readCount;
3256
+ sourcePtr = sourceArray;
3257
+
3258
+ if (targetArray == NULL) {
3259
+ mergeArray =
3260
+ mallocOrExit(sourceLength, ShortReadMarker);
3261
+ mergePtr = mergeArray;
3262
+
3263
+ sourceIndex = 0;
3264
+ while (sourceIndex < sourceLength) {
3265
+ mergePtr->readID = sourcePtr->readID;
3266
+ setShortReadMarkerPosition(mergePtr, -1);
3267
+ setShortReadMarkerOffset(mergePtr, -1);
3268
+ mergePtr++;
3269
+ sourcePtr++;
3270
+ sourceIndex++;
3271
+ }
3272
+
3273
+ graph->nodeReads[targetID] = mergeArray;
3274
+ graph->nodeReadCounts[targetID] = sourceLength;
3275
+ return;
3276
+ }
3277
+
3278
+ mergeArray =
3279
+ mallocOrExit(sourceLength +
3280
+ targetLength, ShortReadMarker);
3281
+ mergePtr = mergeArray;
3282
+
3283
+ mergeLength = 0;
3284
+ sourceIndex = 0;
3285
+ targetIndex = 0;
3286
+ sourceVal = sourcePtr->readID;
3287
+ targetVal = targetPtr->readID;
3288
+ targetPosition = targetPtr->position;
3289
+ targetOffset = targetPtr->offset;
3290
+
3291
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3292
+ if (sourceVal < targetVal) {
3293
+ mergePtr->readID = sourceVal;
3294
+ setShortReadMarkerPosition(mergePtr, -1);
3295
+ setShortReadMarkerOffset(mergePtr, -1);
3296
+ sourceIndex++;
3297
+ sourcePtr++;
3298
+ if (sourceIndex < sourceLength)
3299
+ sourceVal = sourcePtr->readID;
3300
+ } else if (sourceVal == targetVal) {
3301
+ mergePtr->readID = sourceVal;
3302
+ setShortReadMarkerPosition(mergePtr, -1);
3303
+ setShortReadMarkerOffset(mergePtr, -1);
3304
+ sourceIndex++;
3305
+ sourcePtr++;
3306
+ if (sourceIndex < sourceLength)
3307
+ sourceVal = sourcePtr->readID;
3308
+ targetIndex++;
3309
+ targetPtr++;
3310
+ if (targetIndex < targetLength) {
3311
+ targetVal = targetPtr->readID;
3312
+ targetPosition = targetPtr->position;
3313
+ targetOffset = targetPtr->offset;
3314
+ }
3315
+ } else {
3316
+ mergePtr->readID = targetVal;
3317
+ setShortReadMarkerPosition(mergePtr,
3318
+ targetPosition);
3319
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3320
+ targetIndex++;
3321
+ targetPtr++;
3322
+ if (targetIndex < targetLength) {
3323
+ targetVal = targetPtr->readID;
3324
+ targetPosition = targetPtr->position;
3325
+ targetOffset = targetPtr->offset;
3326
+ }
3327
+ }
3328
+
3329
+ mergeLength++;
3330
+ mergePtr++;
3331
+ }
3332
+
3333
+ while (sourceIndex < sourceLength) {
3334
+ mergePtr->readID = sourcePtr->readID;
3335
+ setShortReadMarkerPosition(mergePtr, -1);
3336
+ setShortReadMarkerOffset(mergePtr, -1);
3337
+ mergeLength++;
3338
+ mergePtr++;
3339
+ sourceIndex++;
3340
+ sourcePtr++;
3341
+ }
3342
+
3343
+ while (targetIndex < targetLength) {
3344
+ mergePtr->readID = targetPtr->readID;
3345
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3346
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3347
+ mergeLength++;
3348
+ mergePtr++;
3349
+ targetIndex++;
3350
+ targetPtr++;
3351
+ }
3352
+
3353
+ free(targetArray);
3354
+ graph->nodeReads[targetID] = mergeArray;
3355
+ graph->nodeReadCounts[targetID] = mergeLength;
3356
+ }
3357
+
3358
+ static inline Coordinate min(Coordinate A, Coordinate B)
3359
+ {
3360
+ return A < B ? A : B;
3361
+ }
3362
+
3363
+ static inline ShortLength min_short(ShortLength A, ShortLength B)
3364
+ {
3365
+ return A < B ? A : B;
3366
+ }
3367
+
3368
+ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
3369
+ Node * target, Graph * graph)
3370
+ {
3371
+ IDnum targetID = getNodeID(target) + graph->nodeCount;
3372
+ ShortReadMarker *targetArray = graph->nodeReads[targetID];
3373
+ IDnum targetLength = graph->nodeReadCounts[targetID];
3374
+ ShortReadMarker *targetPtr = targetArray;
3375
+ ShortReadMarker *sourcePtr = sourceArray;
3376
+ ShortReadMarker *mergeArray, *mergePtr;
3377
+ IDnum mergeLength;
3378
+ Coordinate targetPosition, sourcePosition;
3379
+ ShortLength targetOffset, sourceOffset;
3380
+ IDnum targetIndex, targetVal, sourceIndex, sourceVal;
3381
+
3382
+ if (sourceLength == 0) {
3383
+ free(sourceArray);
3384
+ return;
3385
+ }
3386
+
3387
+ if (targetLength == 0) {
3388
+ free(targetArray);
3389
+ graph->nodeReads[targetID] = sourceArray;
3390
+ graph->nodeReadCounts[targetID] = sourceLength;
3391
+ return;
3392
+ }
3393
+
3394
+ mergeArray =
3395
+ mallocOrExit(sourceLength +
3396
+ targetLength, ShortReadMarker);
3397
+ mergePtr = mergeArray;
3398
+
3399
+ mergeLength = 0;
3400
+ sourceIndex = 0;
3401
+ targetIndex = 0;
3402
+ targetVal = targetPtr->readID;
3403
+ targetPosition = targetPtr->position;
3404
+ targetOffset = targetPtr->offset;
3405
+ sourceVal = sourcePtr->readID;
3406
+ sourcePosition = sourcePtr->position;
3407
+ sourceOffset = sourcePtr->offset;
3408
+
3409
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3410
+ if (sourceVal < targetVal) {
3411
+ mergePtr->readID = sourceVal;
3412
+ setShortReadMarkerPosition(mergePtr,
3413
+ sourcePosition);
3414
+ setShortReadMarkerOffset(mergePtr, sourceOffset);
3415
+ sourceIndex++;
3416
+ if (sourceIndex < sourceLength) {
3417
+ sourcePtr++;
3418
+ sourceVal = sourcePtr->readID;
3419
+ sourcePosition = sourcePtr->position;
3420
+ sourceOffset = sourcePtr->offset;
3421
+ }
3422
+ } else if (sourceVal == targetVal) {
3423
+ mergePtr->readID = sourceVal;
3424
+ if (sourcePosition == -1 && targetPosition == -1) {
3425
+ setShortReadMarkerPosition(mergePtr, -1);
3426
+ setShortReadMarkerOffset(mergePtr, -1);
3427
+ } else if (sourcePosition == -1) {
3428
+ setShortReadMarkerPosition(mergePtr,
3429
+ targetPosition);
3430
+ setShortReadMarkerOffset(mergePtr,
3431
+ targetOffset);
3432
+ } else if (targetPosition == -1) {
3433
+ setShortReadMarkerPosition(mergePtr,
3434
+ sourcePosition);
3435
+ setShortReadMarkerOffset(mergePtr,
3436
+ sourceOffset);
3437
+ } else {
3438
+ setShortReadMarkerPosition(mergePtr,
3439
+ min
3440
+ (sourcePosition,
3441
+ targetPosition));
3442
+ setShortReadMarkerOffset(mergePtr,
3443
+ min_short
3444
+ (sourceOffset,
3445
+ targetOffset));
3446
+ }
3447
+ sourceIndex++;
3448
+ if (sourceIndex < sourceLength) {
3449
+ sourcePtr++;
3450
+ sourceVal = sourcePtr->readID;
3451
+ sourcePosition = sourcePtr->position;
3452
+ sourceOffset = sourcePtr->offset;
3453
+ }
3454
+ targetIndex++;
3455
+ if (targetIndex < targetLength) {
3456
+ targetPtr++;
3457
+ targetVal = targetPtr->readID;
3458
+ targetPosition = targetPtr->position;
3459
+ targetOffset = targetPtr->offset;
3460
+ }
3461
+ } else {
3462
+ mergePtr->readID = targetVal;
3463
+ setShortReadMarkerPosition(mergePtr,
3464
+ targetPosition);
3465
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3466
+ targetIndex++;
3467
+ if (targetIndex < targetLength) {
3468
+ targetPtr++;
3469
+ targetVal = targetPtr->readID;
3470
+ targetPosition = targetPtr->position;
3471
+ targetOffset = targetPtr->offset;
3472
+ }
3473
+ }
3474
+
3475
+ mergeLength++;
3476
+ mergePtr++;
3477
+ }
3478
+
3479
+ while (sourceIndex < sourceLength) {
3480
+ mergePtr->readID = sourcePtr->readID;
3481
+ setShortReadMarkerPosition(mergePtr, sourcePtr->position);
3482
+ setShortReadMarkerOffset(mergePtr, sourcePtr->offset);
3483
+ mergeLength++;
3484
+ mergePtr++;
3485
+ sourceIndex++;
3486
+ sourcePtr++;
3487
+ }
3488
+
3489
+ while (targetIndex < targetLength) {
3490
+ mergePtr->readID = targetPtr->readID;
3491
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3492
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3493
+ mergeLength++;
3494
+ mergePtr++;
3495
+ targetIndex++;
3496
+ targetPtr++;
3497
+ }
3498
+
3499
+ free(targetArray);
3500
+ graph->nodeReads[targetID] = mergeArray;
3501
+ graph->nodeReadCounts[targetID] = mergeLength;
3502
+
3503
+ free(sourceArray);
3504
+ }
3505
+
3506
+ void mergeNodeReads(Node * target, Node * source, Graph * graph)
3507
+ {
3508
+ IDnum sourceID, sourceLength;
3509
+ ShortReadMarker *sourceArray;
3510
+
3511
+ if (graph->nodeReads == NULL || source == NULL || target == NULL)
3512
+ return;
3513
+
3514
+ sourceID = getNodeID(source) + graph->nodeCount;
3515
+ sourceArray = graph->nodeReads[sourceID];
3516
+ sourceLength = graph->nodeReadCounts[sourceID];
3517
+
3518
+ if (sourceArray == NULL)
3519
+ return;
3520
+
3521
+ graph->nodeReads[sourceID] = NULL;
3522
+ graph->nodeReadCounts[sourceID] = 0;
3523
+
3524
+ injectShortReads(sourceArray, sourceLength, target, graph);
3525
+ }
3526
+
3527
+ void foldSymmetricalNodeReads(Node * node, Graph * graph)
3528
+ {
3529
+ IDnum targetID, targetLength, targetIndex;
3530
+ IDnum sourceID, sourceLength, sourceIndex;
3531
+ IDnum targetVal = 0;
3532
+ IDnum sourceVal = 0;
3533
+ IDnum mergeLength;
3534
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray,
3535
+ *mergeArray2;
3536
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr, *mergePtr2;
3537
+
3538
+ if (graph->nodeReads == NULL || node == NULL)
3539
+ return;
3540
+
3541
+ sourceID = getNodeID(node) + graph->nodeCount;
3542
+ sourceArray = graph->nodeReads[sourceID];
3543
+ sourceLength = graph->nodeReadCounts[sourceID];
3544
+ sourcePtr = sourceArray;
3545
+
3546
+ targetID = -getNodeID(node) + graph->nodeCount;
3547
+ targetArray = graph->nodeReads[targetID];
3548
+ targetLength = graph->nodeReadCounts[targetID];
3549
+ targetPtr = targetArray;
3550
+
3551
+ if (sourceArray == NULL && targetArray == NULL)
3552
+ return;
3553
+
3554
+ mergeArray =
3555
+ mallocOrExit(sourceLength +
3556
+ targetLength, ShortReadMarker);
3557
+ mergeArray2 =
3558
+ mallocOrExit(sourceLength +
3559
+ targetLength, ShortReadMarker);
3560
+ mergePtr = mergeArray;
3561
+ mergePtr2 = mergeArray2;
3562
+
3563
+ mergeLength = 0;
3564
+ sourceIndex = 0;
3565
+ targetIndex = 0;
3566
+ if (targetIndex < targetLength)
3567
+ targetVal = targetPtr->readID;
3568
+ if (sourceIndex < sourceLength)
3569
+ sourceVal = sourcePtr->readID;
3570
+
3571
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3572
+ if (sourceVal < targetVal) {
3573
+ mergePtr->readID = sourceVal;
3574
+ setShortReadMarkerPosition(mergePtr, -1);
3575
+ setShortReadMarkerOffset(mergePtr, -1);
3576
+ mergePtr2->readID = sourceVal;
3577
+ setShortReadMarkerPosition(mergePtr2, -1);
3578
+ setShortReadMarkerOffset(mergePtr2, -1);
3579
+ sourceIndex++;
3580
+ sourcePtr++;
3581
+ if (sourceIndex < sourceLength)
3582
+ sourceVal = sourcePtr->readID;
3583
+ } else if (sourceVal == targetVal) {
3584
+ mergePtr->readID = sourceVal;
3585
+ setShortReadMarkerPosition(mergePtr, -1);
3586
+ setShortReadMarkerOffset(mergePtr, -1);
3587
+ mergePtr2->readID = sourceVal;
3588
+ setShortReadMarkerPosition(mergePtr2, -1);
3589
+ setShortReadMarkerOffset(mergePtr2, -1);
3590
+ sourceIndex++;
3591
+ sourcePtr++;
3592
+ if (sourceIndex < sourceLength)
3593
+ sourceVal = sourcePtr->readID;
3594
+ targetIndex++;
3595
+ targetPtr++;
3596
+ if (targetIndex < targetLength)
3597
+ targetVal = targetPtr->readID;
3598
+ } else {
3599
+ mergePtr->readID = targetVal;
3600
+ setShortReadMarkerPosition(mergePtr, -1);
3601
+ setShortReadMarkerOffset(mergePtr, -1);
3602
+ mergePtr2->readID = targetVal;
3603
+ setShortReadMarkerPosition(mergePtr2, -1);
3604
+ setShortReadMarkerOffset(mergePtr2, -1);
3605
+ targetIndex++;
3606
+ targetPtr++;
3607
+ if (targetIndex < targetLength)
3608
+ targetVal = targetPtr->readID;
3609
+ }
3610
+
3611
+ mergeLength++;
3612
+ mergePtr++;
3613
+ mergePtr2++;
3614
+ }
3615
+
3616
+ while (sourceIndex < sourceLength) {
3617
+ mergePtr->readID = sourcePtr->readID;
3618
+ setShortReadMarkerPosition(mergePtr, -1);
3619
+ setShortReadMarkerOffset(mergePtr, -1);
3620
+ mergePtr2->readID = sourcePtr->readID;
3621
+ setShortReadMarkerPosition(mergePtr2, -1);
3622
+ setShortReadMarkerOffset(mergePtr2, -1);
3623
+ mergeLength++;
3624
+ mergePtr++;
3625
+ mergePtr2++;
3626
+ sourceIndex++;
3627
+ sourcePtr++;
3628
+ }
3629
+
3630
+ while (targetIndex < targetLength) {
3631
+ mergePtr->readID = targetPtr->readID;
3632
+ setShortReadMarkerPosition(mergePtr, -1);
3633
+ setShortReadMarkerOffset(mergePtr, -1);
3634
+ mergePtr2->readID = targetPtr->readID;
3635
+ setShortReadMarkerPosition(mergePtr2, -1);
3636
+ setShortReadMarkerOffset(mergePtr2, -1);
3637
+ mergeLength++;
3638
+ mergePtr++;
3639
+ mergePtr2++;
3640
+ targetIndex++;
3641
+ targetPtr++;
3642
+ }
3643
+
3644
+ free(targetArray);
3645
+ graph->nodeReads[targetID] = mergeArray;
3646
+ graph->nodeReadCounts[targetID] = mergeLength;
3647
+
3648
+ free(sourceArray);
3649
+ graph->nodeReads[sourceID] = mergeArray2;
3650
+ graph->nodeReadCounts[sourceID] = mergeLength;
3651
+ }
3652
+
3653
+ void shareReadStarts(Node * target, Node * source, Graph * graph)
3654
+ {
3655
+ ShortReadMarker *sourceArray;
3656
+ IDnum sourceLength, sourceID;
3657
+
3658
+ if (graph->nodeReads == NULL)
3659
+ return;
3660
+
3661
+ if (target == NULL || source == NULL)
3662
+ return;
3663
+
3664
+ sourceID = source->ID + graph->nodeCount;
3665
+ sourceArray = graph->nodeReads[sourceID];
3666
+ sourceLength = graph->nodeReadCounts[sourceID];
3667
+
3668
+ if (sourceArray == NULL)
3669
+ return;
3670
+
3671
+ spreadReadIDs(sourceArray, sourceLength, target, graph);
3672
+ }
3673
+
3674
+ ShortReadMarker **getNodeToReadMappings(Graph * graph)
3675
+ {
3676
+ return graph->nodeReads;
3677
+ }
3678
+
3679
+ IDnum getShortReadMarkerID(ShortReadMarker * marker)
3680
+ {
3681
+ return marker->readID;
3682
+ }
3683
+
3684
+ inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker)
3685
+ {
3686
+ return marker->offset;
3687
+ }
3688
+
3689
+ inline void setShortReadMarkerOffset(ShortReadMarker * marker,
3690
+ ShortLength offset)
3691
+ {
3692
+ marker->offset = offset;
3693
+ }
3694
+
3695
+ IDnum *getNodeReadCounts(Graph * graph)
3696
+ {
3697
+ return graph->nodeReadCounts;
3698
+ }
3699
+
3700
+ int getWordLength(Graph * graph)
3701
+ {
3702
+ return graph->wordLength;
3703
+ }
3704
+
3705
+ ShortReadMarker *getNodeReads(Node * node, Graph * graph)
3706
+ {
3707
+ IDnum id = node->ID + graph->nodeCount;
3708
+ return graph->nodeReads[id];
3709
+ }
3710
+
3711
+ IDnum getNodeReadCount(Node * node, Graph * graph)
3712
+ {
3713
+ IDnum id = node->ID + graph->nodeCount;
3714
+ return graph->nodeReadCounts[id];
3715
+ }
3716
+
3717
+ inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker)
3718
+ {
3719
+ return marker->position;
3720
+ }
3721
+
3722
+ inline void setShortReadMarkerPosition(ShortReadMarker * marker,
3723
+ Coordinate position)
3724
+ {
3725
+ if (position < -100)
3726
+ return;
3727
+
3728
+ marker->position = position;
3729
+ }
3730
+
3731
+ ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
3732
+ IDnum index)
3733
+ {
3734
+ return &(array[index]);
3735
+ }
3736
+
3737
+ void destroyGraph(Graph * graph)
3738
+ {
3739
+ IDnum index;
3740
+ Node *node;
3741
+ for (index = 1; index <= graph->nodeCount; index++) {
3742
+ node = getNodeInGraph(graph, index);
3743
+ if (node != NULL)
3744
+ destroyNode(node, graph);
3745
+ }
3746
+
3747
+ if (graph->gapMarkers)
3748
+ deactivateGapMarkers(graph);
3749
+
3750
+ free(graph->nodes);
3751
+ destroyRecycleBin(nodeMemory);
3752
+ destroyRecycleBin(arcMemory);
3753
+ destroyAllPassageMarkers();
3754
+ free(graph->arcLookupTable);
3755
+ free(graph->nodeReads);
3756
+ free(graph->nodeReadCounts);
3757
+ free(graph);
3758
+ }
3759
+
3760
+ void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
3761
+ Coordinate insertLength_std_dev)
3762
+ {
3763
+ graph->insertLengths[cat] = insertLength;
3764
+ graph->insertLengths_var[cat] =
3765
+ insertLength_std_dev * insertLength_std_dev;
3766
+ }
3767
+
3768
+ Coordinate getInsertLength(Graph * graph, Category cat)
3769
+ {
3770
+ return graph->insertLengths[cat / 2];
3771
+ }
3772
+
3773
+ double getInsertLength_var(Graph * graph, Category cat)
3774
+ {
3775
+ return graph->insertLengths_var[cat / 2];
3776
+ }
3777
+
3778
+ void activateGapMarkers(Graph * graph)
3779
+ {
3780
+ graph->gapMarkers =
3781
+ callocOrExit(graph->nodeCount + 1, GapMarker *);
3782
+ gapMarkerMemory = newRecycleBin(sizeof(GapMarker), GAPBLOCKSIZE);
3783
+ }
3784
+
3785
+ void deactivateGapMarkers(Graph * graph)
3786
+ {
3787
+ free(graph->gapMarkers);
3788
+ graph->gapMarkers = NULL;
3789
+ destroyRecycleBin(gapMarkerMemory);
3790
+ gapMarkerMemory = NULL;
3791
+ }
3792
+
3793
+ static GapMarker *allocateGapMarker()
3794
+ {
3795
+ return (GapMarker *) allocatePointer(gapMarkerMemory);
3796
+ }
3797
+
3798
+ void appendGap(Node * node, Coordinate length, Graph * graph)
3799
+ {
3800
+ IDnum nodeID = getNodeID(node);
3801
+ GapMarker *marker = allocateGapMarker();
3802
+ GapMarker *tmp;
3803
+
3804
+ marker->length = length;
3805
+
3806
+ if (nodeID > 0) {
3807
+ marker->position = node->length;
3808
+ marker->next = graph->gapMarkers[nodeID];
3809
+ graph->gapMarkers[nodeID] = marker;
3810
+ } else {
3811
+ for (tmp = graph->gapMarkers[-nodeID]; tmp != NULL;
3812
+ tmp = tmp->next)
3813
+ tmp->position += length;
3814
+
3815
+ marker->position = 0;
3816
+ marker->next = graph->gapMarkers[-nodeID];
3817
+ graph->gapMarkers[-nodeID] = marker;
3818
+ }
3819
+
3820
+ addBufferToDescriptor(node, length);
3821
+ }
3822
+
3823
+ void appendNodeGaps(Node * destination, Node * source, Graph * graph)
3824
+ {
3825
+ IDnum destinationID = getNodeID(destination);
3826
+ IDnum sourceID = getNodeID(source);
3827
+ GapMarker *marker;
3828
+
3829
+ if (graph->gapMarkers == NULL)
3830
+ return;
3831
+
3832
+ if (destinationID > 0 && sourceID > 0) {
3833
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3834
+ marker = marker->next)
3835
+ marker->position += destination->length;
3836
+ } else if (destinationID > 0 && sourceID < 0) {
3837
+ sourceID = -sourceID;
3838
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3839
+ marker = marker->next)
3840
+ marker->position =
3841
+ source->length + destination->length -
3842
+ marker->position - marker->length;
3843
+ } else if (destinationID < 0 && sourceID > 0) {
3844
+ destinationID = -destinationID;
3845
+ for (marker = graph->gapMarkers[destinationID];
3846
+ marker != NULL; marker = marker->next)
3847
+ marker->position += source->length;
3848
+
3849
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3850
+ marker = marker->next)
3851
+ marker->position =
3852
+ source->length - marker->position -
3853
+ marker->length;
3854
+ } else {
3855
+ destinationID = -destinationID;
3856
+ sourceID = -sourceID;
3857
+ for (marker = graph->gapMarkers[destinationID];
3858
+ marker != NULL; marker = marker->next)
3859
+ marker->position += source->length;
3860
+ }
3861
+
3862
+ if (graph->gapMarkers[destinationID] == NULL)
3863
+ graph->gapMarkers[destinationID] =
3864
+ graph->gapMarkers[sourceID];
3865
+ else {
3866
+ marker = graph->gapMarkers[destinationID];
3867
+ while (marker->next != NULL)
3868
+ marker = marker->next;
3869
+ marker->next = graph->gapMarkers[sourceID];
3870
+ }
3871
+
3872
+ graph->gapMarkers[sourceID] = NULL;
3873
+ }
3874
+
3875
+ GapMarker *getGap(Node * node, Graph * graph)
3876
+ {
3877
+ IDnum nodeID = getNodeID(node);
3878
+
3879
+ if (graph->gapMarkers == NULL)
3880
+ return NULL;
3881
+
3882
+ if (nodeID < 0)
3883
+ nodeID = -nodeID;
3884
+
3885
+ return graph->gapMarkers[nodeID];
3886
+ }
3887
+
3888
+ GapMarker *getNextGap(GapMarker * marker)
3889
+ {
3890
+ return marker->next;
3891
+ }
3892
+
3893
+ Coordinate getGapStart(GapMarker * marker)
3894
+ {
3895
+ return marker->position;
3896
+ }
3897
+
3898
+ Coordinate getGapFinish(GapMarker * marker)
3899
+ {
3900
+ return marker->position + marker->length;
3901
+ }
3902
+
3903
+ void reallocateNodeDescriptor(Node * node, Coordinate length) {
3904
+ Coordinate arrayLength, index, shift;
3905
+ Node * twin = node->twinNode;
3906
+ Descriptor * array;
3907
+ Nucleotide nucleotide;
3908
+
3909
+ if (length < node->length)
3910
+ exitErrorf(EXIT_FAILURE, true, "Sum of node lengths smaller than first!");
3911
+
3912
+ shift = length - node->length;
3913
+
3914
+ arrayLength = length / 4;
3915
+ if (length % 4)
3916
+ arrayLength++;
3917
+
3918
+ node->descriptor = reallocOrExit(node->descriptor, arrayLength, Descriptor);
3919
+
3920
+ array = callocOrExit(arrayLength, Descriptor);
3921
+ for (index = node->length - 1; index >= 0; index--) {
3922
+ nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
3923
+ writeNucleotideInDescriptor(nucleotide, array, index + shift);
3924
+ }
3925
+
3926
+ free(twin->descriptor);
3927
+ twin->descriptor = array;
3928
+ }
3929
+
3930
+ boolean doubleStrandedGraph(Graph * graph) {
3931
+ return graph->double_stranded;
3932
+ }