bio-velvet_underground 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (286) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +19 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +53 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/ext/bioruby.patch +60 -0
  11. data/ext/mkrf_conf.rb +50 -0
  12. data/ext/src/Makefile +125 -0
  13. data/ext/src/src/allocArray.c +305 -0
  14. data/ext/src/src/allocArray.h +86 -0
  15. data/ext/src/src/autoOpen.c +107 -0
  16. data/ext/src/src/autoOpen.h +18 -0
  17. data/ext/src/src/binarySequences.c +813 -0
  18. data/ext/src/src/binarySequences.h +125 -0
  19. data/ext/src/src/concatenatedGraph.c +233 -0
  20. data/ext/src/src/concatenatedGraph.h +30 -0
  21. data/ext/src/src/concatenatedPreGraph.c +262 -0
  22. data/ext/src/src/concatenatedPreGraph.h +29 -0
  23. data/ext/src/src/correctedGraph.c +2642 -0
  24. data/ext/src/src/correctedGraph.h +32 -0
  25. data/ext/src/src/dfib.c +509 -0
  26. data/ext/src/src/dfib.h +69 -0
  27. data/ext/src/src/dfibHeap.c +89 -0
  28. data/ext/src/src/dfibHeap.h +39 -0
  29. data/ext/src/src/dfibpriv.h +105 -0
  30. data/ext/src/src/fib.c +628 -0
  31. data/ext/src/src/fib.h +78 -0
  32. data/ext/src/src/fibHeap.c +79 -0
  33. data/ext/src/src/fibHeap.h +41 -0
  34. data/ext/src/src/fibpriv.h +110 -0
  35. data/ext/src/src/globals.h +153 -0
  36. data/ext/src/src/graph.c +3983 -0
  37. data/ext/src/src/graph.h +233 -0
  38. data/ext/src/src/graphReConstruction.c +1472 -0
  39. data/ext/src/src/graphReConstruction.h +30 -0
  40. data/ext/src/src/graphStats.c +2167 -0
  41. data/ext/src/src/graphStats.h +72 -0
  42. data/ext/src/src/kmer.c +652 -0
  43. data/ext/src/src/kmer.h +73 -0
  44. data/ext/src/src/kmerOccurenceTable.c +236 -0
  45. data/ext/src/src/kmerOccurenceTable.h +44 -0
  46. data/ext/src/src/kseq.h +223 -0
  47. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  48. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  49. data/ext/src/src/passageMarker.c +677 -0
  50. data/ext/src/src/passageMarker.h +137 -0
  51. data/ext/src/src/preGraph.c +1717 -0
  52. data/ext/src/src/preGraph.h +106 -0
  53. data/ext/src/src/preGraphConstruction.c +990 -0
  54. data/ext/src/src/preGraphConstruction.h +26 -0
  55. data/ext/src/src/readCoherentGraph.c +557 -0
  56. data/ext/src/src/readCoherentGraph.h +30 -0
  57. data/ext/src/src/readSet.c +1734 -0
  58. data/ext/src/src/readSet.h +67 -0
  59. data/ext/src/src/recycleBin.c +199 -0
  60. data/ext/src/src/recycleBin.h +58 -0
  61. data/ext/src/src/roadMap.c +342 -0
  62. data/ext/src/src/roadMap.h +65 -0
  63. data/ext/src/src/run.c +318 -0
  64. data/ext/src/src/run.h +52 -0
  65. data/ext/src/src/run2.c +712 -0
  66. data/ext/src/src/scaffold.c +1876 -0
  67. data/ext/src/src/scaffold.h +64 -0
  68. data/ext/src/src/shortReadPairs.c +1243 -0
  69. data/ext/src/src/shortReadPairs.h +32 -0
  70. data/ext/src/src/splay.c +259 -0
  71. data/ext/src/src/splay.h +43 -0
  72. data/ext/src/src/splayTable.c +1315 -0
  73. data/ext/src/src/splayTable.h +31 -0
  74. data/ext/src/src/tightString.c +362 -0
  75. data/ext/src/src/tightString.h +82 -0
  76. data/ext/src/src/utility.c +199 -0
  77. data/ext/src/src/utility.h +98 -0
  78. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  79. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  80. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  81. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  82. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  83. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  84. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  85. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  86. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  87. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  88. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  89. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  90. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  91. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  92. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  93. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  94. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  95. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  96. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  97. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  98. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  99. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  100. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  101. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  102. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  103. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  104. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  105. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  106. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  107. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  108. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  109. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  110. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  111. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  112. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  113. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  114. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  115. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  116. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  117. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  118. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  119. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  120. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  218. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  219. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  220. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  221. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  222. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  223. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  224. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  225. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  226. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  227. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  228. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  229. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  230. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  231. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  232. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  233. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  234. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  235. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  236. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  237. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  238. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  239. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  240. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  241. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  242. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  243. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  244. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  245. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  246. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  247. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  248. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  249. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  250. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  251. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  252. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  253. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  254. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  255. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  256. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  257. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  258. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  259. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  260. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  261. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  262. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  263. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  264. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  265. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  266. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  267. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  268. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  269. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  270. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  271. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  272. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  273. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  274. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  275. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  276. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  277. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  278. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  279. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  280. data/lib/bio-velvet_underground.rb +12 -0
  281. data/lib/bio-velvet_underground/external/VERSION +1 -0
  282. data/lib/bio-velvet_underground/velvet_underground.rb +72 -0
  283. data/spec/binary_sequence_store_spec.rb +27 -0
  284. data/spec/data/1/CnyUnifiedSeq +0 -0
  285. data/spec/spec_helper.rb +31 -0
  286. metadata +456 -0
@@ -0,0 +1,3983 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+
25
+ #include "globals.h"
26
+ #include "graph.h"
27
+ #include "recycleBin.h"
28
+ #include "tightString.h"
29
+ #include "passageMarker.h"
30
+ #include "utility.h"
31
+ #include "kmer.h"
32
+
33
+ #define ADENINE 0
34
+ #define CYTOSINE 1
35
+ #define GUANINE 2
36
+ #define THYMINE 3
37
+
38
+ struct arc_st {
39
+ Arc *twinArc; // 64
40
+ Arc *next; // 64
41
+ Arc *previous; // 64
42
+ Arc *nextInLookupTable; // 64
43
+ Node *destination; // 64
44
+ IDnum multiplicity; // 32
45
+ } ATTRIBUTE_PACKED; // 352 Total
46
+
47
+ struct node_st {
48
+ Node *twinNode; // 64
49
+ Arc *arc; // 64
50
+ Descriptor *descriptor; // 64
51
+ PassageMarkerI marker; // 32
52
+ IDnum length; // 32
53
+ #ifndef SINGLE_COV_CAT
54
+ IDnum virtualCoverage[CATEGORIES]; // 32 * 2
55
+ IDnum originalVirtualCoverage[CATEGORIES]; // 32 * 2
56
+ #else
57
+ IDnum virtualCoverage; // 32 * 2
58
+ #endif
59
+ IDnum ID; // 32
60
+ IDnum arcCount; // 32
61
+ boolean status; // 1
62
+ boolean uniqueness; // 1
63
+ } ATTRIBUTE_PACKED; // 418 Total
64
+
65
+ struct shortReadMarker_st {
66
+ IDnum position;
67
+ IDnum readID;
68
+ ShortLength offset;
69
+ } ATTRIBUTE_PACKED;
70
+
71
+ struct gapMarker_st {
72
+ GapMarker *next;
73
+ IDnum position;
74
+ IDnum length;
75
+ } ATTRIBUTE_PACKED;
76
+
77
+ struct graph_st {
78
+ Node **nodes;
79
+ Arc **arcLookupTable;
80
+ ShortReadMarker **nodeReads;
81
+ IDnum *nodeReadCounts;
82
+ GapMarker **gapMarkers;
83
+ Coordinate insertLengths[CATEGORIES + 1];
84
+ double insertLengths_var[CATEGORIES + 1];
85
+ IDnum sequenceCount;
86
+ IDnum nodeCount;
87
+ int wordLength;
88
+ boolean double_stranded;
89
+ };
90
+
91
+ static RecycleBin *arcMemory = NULL;
92
+ static RecycleBin *nodeMemory = NULL;
93
+ static RecycleBin *gapMarkerMemory = NULL;
94
+
95
+ #define BLOCKSIZE 50
96
+ #define GAPBLOCKSIZE 10000
97
+
98
+ Arc *allocateArc()
99
+ {
100
+ if (arcMemory == NULL)
101
+ arcMemory = newRecycleBin(sizeof(Arc), BLOCKSIZE);
102
+
103
+ return allocatePointer(arcMemory);
104
+ }
105
+
106
+ void deallocateArc(Arc * arc)
107
+ {
108
+ deallocatePointer(arcMemory, arc);
109
+ }
110
+
111
+ Node *allocateNode()
112
+ {
113
+ if (nodeMemory == NULL)
114
+ nodeMemory = newRecycleBin(sizeof(Node), BLOCKSIZE);
115
+
116
+ return (Node *) allocatePointer(nodeMemory);
117
+ }
118
+
119
+ void deallocateNode(Node * node)
120
+ {
121
+ deallocatePointer(nodeMemory, node);
122
+ }
123
+
124
+ // Returns the twin node of a given node
125
+ Node *getTwinNode(Node * node)
126
+ {
127
+ return node->twinNode;
128
+ }
129
+
130
+ // Inserts new passage marker in the marker list of destination node
131
+ void insertPassageMarker(PassageMarkerI marker, Node * destination)
132
+ {
133
+ setTopOfTheNode(marker);
134
+ setNextInNode(marker, destination->marker);
135
+ destination->marker = marker;
136
+ }
137
+
138
+ // Returns the length of the node's descriptor list
139
+ Coordinate getNodeLength(Node * node)
140
+ {
141
+ return node->length;
142
+ }
143
+
144
+ // Returns the number of nodes in the graph
145
+ IDnum nodeCount(Graph * graph)
146
+ {
147
+ return graph->nodeCount;
148
+ }
149
+
150
+ // returns the number of sequences used to buid the graph
151
+ IDnum sequenceCount(Graph * graph)
152
+ {
153
+ return graph->sequenceCount;
154
+ }
155
+
156
+ // Creates an arc from node origin to node destination.
157
+ // If this arc already exists, increments its multiplicity by 1.
158
+ Arc *createArc(Node * originNode, Node * destinationNode, Graph * graph)
159
+ {
160
+ Arc *arc, *twinArc;
161
+ Node *destinationTwin;
162
+ IDnum lookupIndex;
163
+
164
+ if (originNode == NULL || destinationNode == NULL)
165
+ return NULL;
166
+
167
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
168
+
169
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
170
+
171
+ if (arc != NULL) {
172
+ arc->multiplicity++;
173
+ arc->twinArc->multiplicity++;
174
+ return arc;
175
+ }
176
+ // If not found
177
+ #ifdef _OPENMP
178
+ #pragma omp critical
179
+ #endif
180
+ arc = allocateArc();
181
+ arc->destination = destinationNode;
182
+ arc->multiplicity = 1;
183
+ arc->previous = NULL;
184
+ arc->next = originNode->arc;
185
+ if (originNode->arc != NULL)
186
+ originNode->arc->previous = arc;
187
+ originNode->arc = arc;
188
+ originNode->arcCount++;
189
+
190
+ destinationTwin = destinationNode->twinNode;
191
+
192
+ // Hairpin case
193
+ if (destinationTwin == originNode) {
194
+ arc->multiplicity++;
195
+ arc->twinArc = arc;
196
+ if (graph->arcLookupTable != NULL) {
197
+ lookupIndex =
198
+ 2 * originNode->ID + destinationNode->ID +
199
+ 3 * graph->nodeCount;
200
+ arc->nextInLookupTable =
201
+ graph->arcLookupTable[lookupIndex];
202
+ graph->arcLookupTable[lookupIndex] = arc;
203
+ }
204
+ return arc;
205
+ }
206
+
207
+ #ifdef _OPENMP
208
+ #pragma omp critical
209
+ #endif
210
+ twinArc = allocateArc();
211
+ twinArc->destination = originNode->twinNode;
212
+ twinArc->multiplicity = 1;
213
+ twinArc->previous = NULL;
214
+ twinArc->next = destinationTwin->arc;
215
+ if (destinationTwin->arc != NULL)
216
+ destinationTwin->arc->previous = twinArc;
217
+ destinationTwin->arc = twinArc;
218
+ destinationTwin->arcCount++;
219
+
220
+ arc->twinArc = twinArc;
221
+ twinArc->twinArc = arc;
222
+
223
+ if (graph->arcLookupTable != NULL) {
224
+ lookupIndex =
225
+ 2 * originNode->ID + destinationNode->ID +
226
+ 3 * graph->nodeCount;
227
+ arc->nextInLookupTable =
228
+ graph->arcLookupTable[lookupIndex];
229
+ graph->arcLookupTable[lookupIndex] = arc;
230
+
231
+ lookupIndex =
232
+ -2 * destinationNode->ID - originNode->ID +
233
+ 3 * graph->nodeCount;
234
+ twinArc->nextInLookupTable =
235
+ graph->arcLookupTable[lookupIndex];
236
+ graph->arcLookupTable[lookupIndex] = twinArc;
237
+ }
238
+ return arc;
239
+ }
240
+
241
+ void createAnalogousArc(Node * originNode, Node * destinationNode,
242
+ Arc * refArc, Graph * graph)
243
+ {
244
+ Arc *arc, *twinArc;
245
+ Node *destinationTwin;
246
+ IDnum lookupIndex;
247
+
248
+ if (originNode == NULL || destinationNode == NULL)
249
+ return;
250
+
251
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
252
+
253
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
254
+
255
+ if (arc != NULL) {
256
+ if (refArc->twinArc != refArc) {
257
+ arc->multiplicity += getMultiplicity(refArc);
258
+ arc->twinArc->multiplicity +=
259
+ getMultiplicity(refArc);
260
+ } else {
261
+ arc->multiplicity += getMultiplicity(refArc) / 2;
262
+ arc->twinArc->multiplicity +=
263
+ getMultiplicity(refArc) / 2;
264
+ }
265
+ return;
266
+ }
267
+ // If not found
268
+ arc = allocateArc();
269
+ arc->destination = destinationNode;
270
+ arc->multiplicity = getMultiplicity(refArc);
271
+ arc->previous = NULL;
272
+ arc->next = originNode->arc;
273
+ if (originNode->arc != NULL)
274
+ originNode->arc->previous = arc;
275
+ originNode->arc = arc;
276
+ originNode->arcCount++;
277
+
278
+ destinationTwin = destinationNode->twinNode;
279
+
280
+ // Hairpin case
281
+ if (destinationTwin == originNode) {
282
+ arc->twinArc = arc;
283
+ if (refArc->twinArc != refArc)
284
+ arc->multiplicity *= 2;
285
+
286
+ if (graph->arcLookupTable != NULL) {
287
+ lookupIndex =
288
+ 2 * originNode->ID + destinationNode->ID
289
+ + 3 * graph->nodeCount;
290
+ arc->nextInLookupTable =
291
+ graph->arcLookupTable[lookupIndex];
292
+ graph->arcLookupTable[lookupIndex] = arc;
293
+ }
294
+ return;
295
+ }
296
+
297
+ twinArc = allocateArc();
298
+ twinArc->destination = originNode->twinNode;
299
+ twinArc->multiplicity = getMultiplicity(refArc);
300
+ twinArc->previous = NULL;
301
+ twinArc->next = destinationTwin->arc;
302
+ if (destinationTwin->arc != NULL)
303
+ destinationTwin->arc->previous = twinArc;
304
+ destinationTwin->arc = twinArc;
305
+ destinationTwin->arcCount++;
306
+
307
+ arc->twinArc = twinArc;
308
+ twinArc->twinArc = arc;
309
+
310
+ if (graph->arcLookupTable != NULL) {
311
+ lookupIndex =
312
+ 2 * originNode->ID + destinationNode->ID +
313
+ 3 * graph->nodeCount;
314
+ arc->nextInLookupTable =
315
+ graph->arcLookupTable[lookupIndex];
316
+ graph->arcLookupTable[lookupIndex] = arc;
317
+
318
+ lookupIndex =
319
+ -2 * destinationNode->ID - originNode->ID +
320
+ 3 * graph->nodeCount;
321
+ twinArc->nextInLookupTable =
322
+ graph->arcLookupTable[lookupIndex];
323
+ graph->arcLookupTable[lookupIndex] = twinArc;
324
+ }
325
+ }
326
+
327
+ Arc *getArcBetweenNodes(Node * originNode, Node * destinationNode,
328
+ Graph * graph)
329
+ {
330
+ Arc *arc;
331
+ Node *twinDestination, *twinOrigin;
332
+
333
+ if (originNode == NULL || destinationNode == NULL)
334
+ return NULL;
335
+
336
+ if (graph->arcLookupTable != NULL) {
337
+ for (arc =
338
+ graph->arcLookupTable[2 * originNode->ID +
339
+ destinationNode->ID +
340
+ 3 * graph->nodeCount];
341
+ arc != NULL; arc = arc->nextInLookupTable) {
342
+ if (arc->destination == destinationNode) {
343
+ return arc;
344
+ }
345
+ }
346
+ return NULL;
347
+ }
348
+
349
+ twinDestination = destinationNode->twinNode;
350
+ if (originNode->arcCount <= twinDestination->arcCount) {
351
+ for (arc = originNode->arc; arc != NULL; arc = arc->next)
352
+ if (arc->destination == destinationNode)
353
+ return arc;
354
+ return NULL;
355
+ }
356
+
357
+ twinOrigin = originNode->twinNode;
358
+ for (arc = twinDestination->arc; arc != NULL; arc = arc->next)
359
+ if (arc->destination == twinOrigin)
360
+ return arc->twinArc;
361
+ return NULL;
362
+ }
363
+
364
+ void destroyArc(Arc * arc, Graph * graph)
365
+ {
366
+ Node *origin, *destination;
367
+ Arc *twinArc;
368
+ Arc *currentArc;
369
+ IDnum lookupIndex;
370
+
371
+ if (arc == NULL)
372
+ return;
373
+
374
+ twinArc = arc->twinArc;
375
+ origin = twinArc->destination->twinNode;
376
+ destination = arc->destination->twinNode;
377
+
378
+ //velvetLog("Destroying arc %p\n", arc);
379
+
380
+ // Removing arc from list
381
+ if (origin->arc == arc) {
382
+ origin->arc = arc->next;
383
+ if (origin->arc != NULL)
384
+ origin->arc->previous = NULL;
385
+ } else {
386
+ arc->previous->next = arc->next;
387
+ if (arc->next != NULL)
388
+ arc->next->previous = arc->previous;
389
+ }
390
+
391
+ origin->arcCount--;
392
+
393
+ if (destination == origin) {
394
+ if (graph->arcLookupTable != NULL) {
395
+ lookupIndex =
396
+ 2 * origin->ID - destination->ID +
397
+ 3 * graph->nodeCount;
398
+ currentArc = graph->arcLookupTable[lookupIndex];
399
+ if (currentArc == arc)
400
+ graph->arcLookupTable[lookupIndex] =
401
+ arc->nextInLookupTable;
402
+ else {
403
+ while (currentArc->nextInLookupTable !=
404
+ arc)
405
+ currentArc =
406
+ currentArc->nextInLookupTable;
407
+
408
+ currentArc->nextInLookupTable =
409
+ twinArc->nextInLookupTable;
410
+ }
411
+ }
412
+
413
+ deallocateArc(arc);
414
+ return;
415
+ }
416
+ // Removing arc's twin from list
417
+ if (destination->arc == twinArc) {
418
+ destination->arc = twinArc->next;
419
+ if (destination->arc != NULL)
420
+ destination->arc->previous = NULL;
421
+ } else {
422
+ twinArc->previous->next = twinArc->next;
423
+ if (twinArc->next != NULL)
424
+ twinArc->next->previous = twinArc->previous;
425
+ }
426
+
427
+ destination->arcCount--;
428
+
429
+ if (graph->arcLookupTable != NULL) {
430
+ lookupIndex =
431
+ 2 * origin->ID - destination->ID +
432
+ 3 * graph->nodeCount;
433
+ currentArc = graph->arcLookupTable[lookupIndex];
434
+ if (currentArc == arc)
435
+ graph->arcLookupTable[lookupIndex] =
436
+ arc->nextInLookupTable;
437
+ else {
438
+ while (currentArc->nextInLookupTable != arc)
439
+ currentArc = currentArc->nextInLookupTable;
440
+
441
+ currentArc->nextInLookupTable =
442
+ arc->nextInLookupTable;
443
+ }
444
+
445
+ lookupIndex =
446
+ 2 * destination->ID - origin->ID +
447
+ 3 * graph->nodeCount;
448
+ currentArc = graph->arcLookupTable[lookupIndex];
449
+ if (currentArc == twinArc)
450
+ graph->arcLookupTable[lookupIndex] =
451
+ twinArc->nextInLookupTable;
452
+ else {
453
+ while (currentArc->nextInLookupTable != twinArc)
454
+ currentArc = currentArc->nextInLookupTable;
455
+
456
+ currentArc->nextInLookupTable =
457
+ twinArc->nextInLookupTable;
458
+ }
459
+ }
460
+ // Freeing memory
461
+ deallocateArc(arc);
462
+ deallocateArc(twinArc);
463
+ }
464
+
465
+ void destroyNode(Node * node, Graph * graph)
466
+ {
467
+ Node *twin = node->twinNode;
468
+ IDnum ID = node->ID;
469
+ IDnum index;
470
+
471
+ //velvetLog("Destroying %d\n and twin %d\n", getNodeID(node), getNodeID(twin));
472
+
473
+ if (ID < 0)
474
+ ID = -ID;
475
+
476
+ // Node arcs:
477
+ while (node->arc != NULL)
478
+ destroyArc(node->arc, graph);
479
+ while (twin->arc != NULL)
480
+ destroyArc(twin->arc, graph);
481
+
482
+ // Descriptors
483
+ free(node->descriptor);
484
+ free(twin->descriptor);
485
+
486
+ // Passage markers
487
+ while (node->marker != NULL_IDX)
488
+ destroyPassageMarker(node->marker);
489
+
490
+ // Reads starts
491
+ if (graph->nodeReads != NULL) {
492
+ index = ID + graph->nodeCount;
493
+ free(graph->nodeReads[index]);
494
+ graph->nodeReads[index] = NULL;
495
+ graph->nodeReadCounts[index] = 0;
496
+
497
+ index = -ID + graph->nodeCount;
498
+ free(graph->nodeReads[index]);
499
+ graph->nodeReads[index] = NULL;
500
+ graph->nodeReadCounts[index] = 0;
501
+ }
502
+
503
+ graph->nodes[ID] = NULL;
504
+ deallocateNode(node);
505
+ deallocateNode(twin);
506
+ }
507
+
508
+ int outDegree(Node * node)
509
+ {
510
+ int result = 0;
511
+ Arc *arc = node->arc;
512
+ while (arc != NULL) {
513
+ result += arc->multiplicity;
514
+ arc = arc->next;
515
+ }
516
+
517
+ return result;
518
+ }
519
+
520
+ int simpleArcCount(Node * node)
521
+ {
522
+ return node->arcCount;
523
+ }
524
+
525
+ int arcCount(Node * node)
526
+ {
527
+ int result = 0;
528
+ Arc *arc;
529
+
530
+ if (node == NULL)
531
+ return result;
532
+
533
+ arc = node->arc;
534
+ while (arc != NULL) {
535
+ result++;
536
+ if (arc->destination == node->twinNode)
537
+ result++;
538
+ arc = arc->next;
539
+ }
540
+
541
+ return result;
542
+
543
+ }
544
+
545
+ static Nucleotide getNucleotideInDescriptor(Descriptor * descriptor,
546
+ Coordinate i)
547
+ {
548
+ Descriptor *fourMer = descriptor + i / 4;
549
+
550
+ switch (i % 4) {
551
+ case 0:
552
+ return (*fourMer & 3);
553
+ case 1:
554
+ return (*fourMer & 12) >> 2;
555
+ case 2:
556
+ return (*fourMer & 48) >> 4;
557
+ case 3:
558
+ return (*fourMer & 192) >> 6;
559
+ }
560
+ return 0;
561
+ }
562
+
563
+ Nucleotide getNucleotideInNode(Node * node, Coordinate index) {
564
+ return getNucleotideInDescriptor(node->descriptor, index);
565
+ }
566
+
567
+ PassageMarkerI getMarker(Node * node)
568
+ {
569
+ return node->marker;
570
+ }
571
+
572
+ void setMarker(Node * node, PassageMarkerI marker)
573
+ {
574
+ if (node == NULL)
575
+ return;
576
+
577
+ if (marker == NULL_IDX) {
578
+ node->marker = NULL_IDX;
579
+ node->twinNode->marker = NULL_IDX;
580
+ return;
581
+ }
582
+
583
+ node->marker = marker;
584
+ setTopOfTheNode(marker);
585
+ node->twinNode->marker = getTwinMarker(marker);
586
+ setTopOfTheNode(getTwinMarker(marker));
587
+ }
588
+
589
+ void setNodeStatus(Node * node, boolean status)
590
+ {
591
+ node->status = status;
592
+ node->twinNode->status = status;
593
+ }
594
+
595
+ void setSingleNodeStatus(Node * node, boolean status)
596
+ {
597
+ node->status = status;
598
+ }
599
+
600
+ boolean getNodeStatus(Node * node)
601
+ {
602
+ if (node == NULL)
603
+ return false;
604
+ return node->status;
605
+ }
606
+
607
+ IDnum getNodeID(Node * node)
608
+ {
609
+ if (node == NULL)
610
+ return 0;
611
+
612
+ return node->ID;
613
+ }
614
+
615
+ void resetNodeStatus(Graph * graph)
616
+ {
617
+ IDnum nodeIndex;
618
+ Node *node;
619
+
620
+ for (nodeIndex = 1; nodeIndex <= graph->nodeCount; nodeIndex++) {
621
+ node = graph->nodes[nodeIndex];
622
+ if (node == NULL)
623
+ continue;
624
+
625
+ node->status = false;
626
+ node->twinNode->status = false;
627
+ }
628
+ }
629
+
630
+ Node *getNodeInGraph(Graph * graph, IDnum nodeID)
631
+ {
632
+ if (nodeID == 0)
633
+ return NULL;
634
+ else if (nodeID > 0)
635
+ return graph->nodes[nodeID];
636
+ else if (graph->nodes[-nodeID] == NULL)
637
+ return NULL;
638
+ else
639
+ return graph->nodes[-nodeID]->twinNode;
640
+ }
641
+
642
+ Arc *getArc(Node * node)
643
+ {
644
+ return node->arc;
645
+ }
646
+
647
+ Arc *getNextArc(Arc * arc)
648
+ {
649
+ return arc->next;
650
+ }
651
+
652
+ IDnum getMultiplicity(Arc * arc)
653
+ {
654
+ if (arc == NULL)
655
+ return 0;
656
+
657
+ return arc->multiplicity;
658
+ }
659
+
660
+ Node *getOrigin(Arc * arc)
661
+ {
662
+ if (arc == NULL)
663
+ return NULL;
664
+
665
+ return arc->twinArc->destination->twinNode;
666
+ }
667
+
668
+ Node *getDestination(Arc * arc)
669
+ {
670
+ if (arc == NULL)
671
+ return NULL;
672
+
673
+ return arc->destination;
674
+ }
675
+
676
+ IDnum markerCount(Node * node)
677
+ {
678
+ IDnum count = 0;
679
+ PassageMarkerI marker;
680
+
681
+ for (marker = getMarker(node); marker != NULL_IDX;
682
+ marker = getNextInNode(marker))
683
+ count++;
684
+
685
+ return count;
686
+ }
687
+
688
+ void appendNodeSequence(Node * node, TightString * sequence,
689
+ Coordinate writeIndex)
690
+ {
691
+ Coordinate i;
692
+ Nucleotide nucleotide;
693
+
694
+ //velvetLog("Getting sequence from node %d of length %d (%d)\n", getNodeID(node), getNodeLength(node), getLength(nodeLabel));
695
+
696
+ for (i = 0; i < getNodeLength(node); i++) {
697
+ nucleotide =
698
+ getNucleotideInDescriptor(node->descriptor, i);
699
+ writeNucleotideAtPosition(nucleotide, i + writeIndex,
700
+ sequence);
701
+ }
702
+ }
703
+
704
+ static void writeNucleotideInDescriptor(Nucleotide nucleotide,
705
+ Descriptor * descriptor,
706
+ Coordinate i)
707
+ {
708
+ Descriptor *fourMer = descriptor + i / 4;
709
+ switch (i % 4) {
710
+ case 3:
711
+ *fourMer &= 63;
712
+ *fourMer += nucleotide << 6;
713
+ return;
714
+ case 2:
715
+ *fourMer &= 207;
716
+ *fourMer += nucleotide << 4;
717
+ return;
718
+ case 1:
719
+ *fourMer &= 243;
720
+ *fourMer += nucleotide << 2;
721
+ return;
722
+ case 0:
723
+ *fourMer &= 252;
724
+ *fourMer += nucleotide;
725
+ }
726
+ }
727
+
728
+ static inline Descriptor *mergeDescriptors(Descriptor * descr,
729
+ Coordinate destinationLength,
730
+ Descriptor * copy,
731
+ Coordinate sourceLength,
732
+ size_t arrayLength)
733
+ {
734
+ Descriptor *readPtr, *writePtr;
735
+ Descriptor readCopy;
736
+ int readOffset, writeOffset;
737
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
738
+ Coordinate index;
739
+
740
+ readPtr = descr;
741
+ readCopy = *readPtr;
742
+ writePtr = new;
743
+ writeOffset = 0;
744
+ for (index = 0; index < destinationLength; index++) {
745
+ (*writePtr) >>= 2;
746
+ (*writePtr) += (readCopy & 3) << 6;
747
+ readCopy >>= 2;
748
+
749
+ writeOffset++;
750
+ if (writeOffset == 4) {
751
+ writePtr++;
752
+ readPtr++;
753
+ if (index < destinationLength - 1)
754
+ readCopy = *readPtr;
755
+ writeOffset = 0;
756
+ }
757
+ }
758
+
759
+ readPtr = copy;
760
+ readCopy = *readPtr;
761
+ readOffset = 0;
762
+ for (index = 0; index < sourceLength; index++) {
763
+ (*writePtr) >>= 2;
764
+ (*writePtr) += (readCopy & 3) << 6;
765
+ readCopy >>= 2;
766
+
767
+ writeOffset++;
768
+ if (writeOffset == 4) {
769
+ writePtr++;
770
+ writeOffset = 0;
771
+ }
772
+
773
+ readOffset++;
774
+ if (readOffset == 4) {
775
+ readPtr++;
776
+ if (index < sourceLength - 1)
777
+ readCopy = *readPtr;
778
+ readOffset = 0;
779
+ }
780
+ }
781
+
782
+ if (writeOffset != 0) {
783
+ while (writeOffset != 4) {
784
+ (*writePtr) >>= 2;
785
+ writeOffset++;
786
+ }
787
+ }
788
+
789
+ return new;
790
+ }
791
+
792
+ static void addBufferToDescriptor(Node * node, Coordinate length)
793
+ {
794
+ Descriptor *descr;
795
+ Coordinate newLength;
796
+ size_t arrayLength;
797
+ Node *twinNode;
798
+ Coordinate index;
799
+ Descriptor *old_descriptor;
800
+
801
+ if (node == NULL)
802
+ return;
803
+
804
+ twinNode = node->twinNode;
805
+ descr = node->descriptor;
806
+
807
+ // Amendments for empty descriptors
808
+ if (descr == NULL) {
809
+ arrayLength = length / 4;
810
+ if (length % 4 != 0)
811
+ arrayLength++;
812
+
813
+ node->descriptor = callocOrExit(arrayLength, Descriptor);
814
+ node->length = length;
815
+ twinNode->descriptor =
816
+ callocOrExit(arrayLength, Descriptor);
817
+ twinNode->length = length;
818
+ return;
819
+ }
820
+
821
+ newLength = node->length + length;
822
+ arrayLength = newLength / 4;
823
+ if (newLength % 4 != 0)
824
+ arrayLength++;
825
+
826
+ // Merging forward descriptors
827
+ node->descriptor =
828
+ reallocOrExit(node->descriptor, arrayLength, Descriptor);
829
+
830
+ for (index = node->length; index < newLength; index++)
831
+ writeNucleotideInDescriptor(ADENINE, node->descriptor,
832
+ index);
833
+ node->length = newLength;
834
+
835
+ // Merging reverse descriptors
836
+ old_descriptor = twinNode->descriptor;
837
+ twinNode->descriptor = callocOrExit(arrayLength, Descriptor);
838
+ for (index = 0; index < twinNode->length; index++)
839
+ writeNucleotideInDescriptor(getNucleotideInDescriptor
840
+ (old_descriptor, index),
841
+ twinNode->descriptor,
842
+ index + length);
843
+ for (index = 0; index < length; index++)
844
+ writeNucleotideInDescriptor(THYMINE, twinNode->descriptor,
845
+ index);
846
+ free(old_descriptor);
847
+ twinNode->length = newLength;
848
+ }
849
+
850
+ void appendDescriptors(Node * destination, Node * source)
851
+ {
852
+ Descriptor *copy;
853
+ Descriptor *twinCopy;
854
+ Descriptor *descr;
855
+ Descriptor *twinDescr;
856
+ Coordinate newLength, destinationLength, sourceLength;
857
+ size_t arrayLength;
858
+ Descriptor *new;
859
+ Node *twinDestination;
860
+
861
+ if (source == NULL || destination == NULL)
862
+ return;
863
+
864
+ twinDestination = destination->twinNode;
865
+ descr = destination->descriptor;
866
+ twinDescr = twinDestination->descriptor;
867
+ copy = source->descriptor;
868
+ twinCopy = source->twinNode->descriptor;
869
+
870
+ // Amendments for empty descriptors
871
+ if (getNodeLength(source) == 0)
872
+ return;
873
+ if (getNodeLength(destination) == 0) {
874
+ destination->descriptor = copy;
875
+ twinDestination->descriptor = twinCopy;
876
+ source->descriptor = NULL;
877
+ source->twinNode->descriptor = NULL;
878
+ destination->length = source->length;
879
+ destination->twinNode->length = source->length;
880
+ source->length = 0;
881
+ source->twinNode->length = 0;
882
+ return;
883
+ }
884
+
885
+ destinationLength = destination->length;
886
+ sourceLength = source->length;
887
+ newLength = destinationLength + sourceLength;
888
+ arrayLength = newLength / 4;
889
+ if (newLength % 4 != 0)
890
+ arrayLength++;
891
+
892
+ // Merging forward descriptors
893
+ new =
894
+ mergeDescriptors(descr, destinationLength, copy, sourceLength,
895
+ arrayLength);
896
+ free(descr);
897
+ destination->descriptor = new;
898
+ destination->length = newLength;
899
+
900
+ // Merging reverse descriptors
901
+ new =
902
+ mergeDescriptors(twinCopy, sourceLength, twinDescr,
903
+ destinationLength, arrayLength);
904
+ free(twinDescr);
905
+ twinDestination->descriptor = new;
906
+ twinDestination->length = newLength;
907
+ }
908
+
909
+ static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
910
+ {
911
+ Coordinate index;
912
+ Nucleotide nucleotide;
913
+
914
+ for (index = 0; index < sourceLength; index++) {
915
+ nucleotide = getNucleotideInDescriptor(copy, index);
916
+ writeNucleotideInDescriptor(nucleotide, descr, index + destinationLength);
917
+ }
918
+ }
919
+
920
+ static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
921
+ {
922
+ Coordinate shift = totalLength - destinationLength - sourceLength;
923
+ Coordinate index;
924
+ Nucleotide nucleotide;
925
+
926
+ for (index = 0; index < sourceLength; index++) {
927
+ nucleotide = getNucleotideInDescriptor(copy, index);
928
+ writeNucleotideInDescriptor(nucleotide, descr, index + shift);
929
+ }
930
+ }
931
+
932
+ void directlyAppendDescriptors(Node * destination, Node * source, Coordinate totalLength)
933
+ {
934
+ Descriptor *copy;
935
+ Descriptor *twinCopy;
936
+ Descriptor *descr;
937
+ Descriptor *twinDescr;
938
+ Coordinate destinationLength, sourceLength;
939
+
940
+ if (source == NULL || destination == NULL)
941
+ return;
942
+
943
+ descr = destination->descriptor;
944
+ twinDescr = destination->twinNode->descriptor;
945
+ copy = source->descriptor;
946
+ twinCopy = source->twinNode->descriptor;
947
+
948
+ // Amendments for empty descriptors
949
+ if (getNodeLength(source) == 0)
950
+ return;
951
+
952
+ destinationLength = destination->length;
953
+ sourceLength = source->length;
954
+
955
+ // Merging forward descriptors
956
+ catDescriptors(descr, destinationLength, copy, sourceLength);
957
+
958
+ // Merging reverse descriptors
959
+ reverseCatDescriptors(twinDescr, destinationLength, twinCopy, sourceLength, totalLength);
960
+
961
+ destination->length += source->length;
962
+ destination->twinNode->length += source->length;
963
+ }
964
+
965
+ static void copyDownDescriptor(Descriptor ** writePtr, int *writeOffset,
966
+ Descriptor * source, Coordinate length)
967
+ {
968
+ Descriptor *readPtr = source;
969
+ Descriptor readCopy = *readPtr;
970
+ int readOffset = 0;
971
+ Coordinate index;
972
+
973
+ for (index = 0; index < length; index++) {
974
+ (**writePtr) >>= 2;
975
+ (**writePtr) += (readCopy & 3) << 6;
976
+ readCopy >>= 2;
977
+
978
+ (*writeOffset)++;
979
+ if (*writeOffset == 4) {
980
+ (*writePtr)++;
981
+ *writeOffset = 0;
982
+ }
983
+
984
+ readOffset++;
985
+ if (readOffset == 4) {
986
+ readPtr++;
987
+ if (index < length - 1)
988
+ readCopy = *readPtr;
989
+ readOffset = 0;
990
+ }
991
+ }
992
+ }
993
+
994
+ static void copyDownSequence(Descriptor ** writePtr, int *writeOffset,
995
+ TightString * sequence, Coordinate start,
996
+ Coordinate finish, int WORDLENGTH)
997
+ {
998
+ boolean forward = (start < finish);
999
+ Coordinate sourceLength = finish - start;
1000
+ Coordinate index;
1001
+ Nucleotide nucleotide;
1002
+
1003
+ if (!forward)
1004
+ sourceLength *= -1;
1005
+
1006
+ for (index = 0; index < sourceLength; index++) {
1007
+ if (forward)
1008
+ nucleotide =
1009
+ getNucleotide(start + WORDLENGTH - 1 + index,
1010
+ sequence);
1011
+ else
1012
+ nucleotide =
1013
+ #ifndef COLOR
1014
+ 3 - getNucleotide(start - index - 1, sequence);
1015
+ #else
1016
+ getNucleotide(start - index - 1, sequence);
1017
+ #endif
1018
+
1019
+ (**writePtr) >>= 2;
1020
+ (**writePtr) += nucleotide << 6;
1021
+
1022
+ (*writeOffset)++;
1023
+ if (*writeOffset == 4) {
1024
+ (*writePtr)++;
1025
+ *writeOffset = 0;
1026
+ }
1027
+ }
1028
+ }
1029
+
1030
+ static Descriptor *appendSequenceToDescriptor(Descriptor * descr,
1031
+ Coordinate nodeLength,
1032
+ PassageMarkerI marker,
1033
+ TightString *sequences,
1034
+ int WORDLENGTH,
1035
+ size_t arrayLength,
1036
+ boolean downStream)
1037
+ {
1038
+ int writeOffset = 0;
1039
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
1040
+ Descriptor *writePtr = new;
1041
+ TightString *sequence;
1042
+ IDnum sequenceID = getPassageMarkerSequenceID(marker);
1043
+ Coordinate start = getPassageMarkerStart(marker);
1044
+ Coordinate finish = getPassageMarkerFinish(marker);
1045
+
1046
+ if (sequenceID > 0)
1047
+ sequence = getTightStringInArray(sequences, sequenceID - 1);
1048
+ else
1049
+ sequence = getTightStringInArray(sequences, -sequenceID - 1);
1050
+
1051
+ if (downStream)
1052
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1053
+ nodeLength);
1054
+
1055
+ copyDownSequence(&writePtr, &writeOffset, sequence, start, finish,
1056
+ WORDLENGTH);
1057
+
1058
+ if (!downStream)
1059
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1060
+ nodeLength);
1061
+
1062
+ if (writeOffset != 0) {
1063
+ while (writeOffset != 4) {
1064
+ (*writePtr) >>= 2;
1065
+ writeOffset++;
1066
+ }
1067
+ }
1068
+
1069
+ return new;
1070
+ }
1071
+
1072
+ void appendSequence(Node * node, TightString * reads,
1073
+ PassageMarkerI guide, Graph * graph)
1074
+ {
1075
+ Descriptor *descr;
1076
+ Descriptor *twinDescr;
1077
+ Coordinate newLength, nodeLength, sourceLength;
1078
+ size_t arrayLength;
1079
+ Descriptor *new;
1080
+ Node *twinNode;
1081
+
1082
+ if (node == NULL)
1083
+ return;
1084
+
1085
+ twinNode = node->twinNode;
1086
+ descr = node->descriptor;
1087
+ twinDescr = twinNode->descriptor;
1088
+ nodeLength = node->length;
1089
+ sourceLength = getPassageMarkerLength(guide);
1090
+
1091
+ // Amendments for empty descriptors
1092
+ if (sourceLength == 0)
1093
+ return;
1094
+
1095
+ newLength = nodeLength + sourceLength;
1096
+ arrayLength = newLength / 4;
1097
+ if (newLength % 4 != 0)
1098
+ arrayLength++;
1099
+
1100
+ // Merging forward descriptors
1101
+ new =
1102
+ appendSequenceToDescriptor(descr, nodeLength, guide, reads,
1103
+ getWordLength(graph), arrayLength,
1104
+ true);
1105
+ free(descr);
1106
+ node->descriptor = new;
1107
+ node->length = newLength;
1108
+
1109
+ // Merging reverse descriptors
1110
+ new =
1111
+ appendSequenceToDescriptor(twinDescr, nodeLength,
1112
+ getTwinMarker(guide), reads,
1113
+ getWordLength(graph), arrayLength,
1114
+ false);
1115
+ free(twinDescr);
1116
+ twinNode->descriptor = new;
1117
+ twinNode->length = newLength;
1118
+ }
1119
+
1120
+ void setMultiplicity(Arc * arc, IDnum mult)
1121
+ {
1122
+ arc->multiplicity = mult;
1123
+ arc->twinArc->multiplicity = mult;
1124
+ }
1125
+
1126
+ // Reshuffles the graph->nodes array to remove NULL pointers
1127
+ // Beware that node IDs are accordingly reshuffled (all pointers remain valid though)
1128
+ void renumberNodes(Graph * graph)
1129
+ {
1130
+ IDnum nodeIndex;
1131
+ Node *currentNode;
1132
+ IDnum counter = 0;
1133
+ IDnum nodes = graph->nodeCount;
1134
+ IDnum newIndex;
1135
+
1136
+ velvetLog("Renumbering nodes\n");
1137
+ velvetLog("Initial node count %li\n", (long) graph->nodeCount);
1138
+
1139
+ for (nodeIndex = 1; nodeIndex <= nodes; nodeIndex++) {
1140
+ currentNode = getNodeInGraph(graph, nodeIndex);
1141
+
1142
+ if (currentNode == NULL)
1143
+ counter++;
1144
+ else if (counter != 0) {
1145
+ newIndex = nodeIndex - counter;
1146
+ currentNode->ID = newIndex;
1147
+ currentNode->twinNode->ID = -newIndex;
1148
+ graph->nodes[newIndex] = currentNode;
1149
+
1150
+ if (graph->nodeReads != NULL) {
1151
+ graph->nodeReads[newIndex + nodes] =
1152
+ graph->nodeReads[nodeIndex + nodes];
1153
+ graph->nodeReadCounts[newIndex + nodes] =
1154
+ graph->nodeReadCounts[nodeIndex +
1155
+ nodes];
1156
+
1157
+ graph->nodeReads[nodeIndex + nodes] = NULL;
1158
+ graph->nodeReadCounts[nodeIndex + nodes] =
1159
+ 0;
1160
+
1161
+ graph->nodeReads[-newIndex + nodes] =
1162
+ graph->nodeReads[-nodeIndex + nodes];
1163
+ graph->nodeReadCounts[-newIndex + nodes] =
1164
+ graph->nodeReadCounts[-nodeIndex +
1165
+ nodes];
1166
+
1167
+ graph->nodeReads[-nodeIndex + nodes] =
1168
+ NULL;
1169
+ graph->nodeReadCounts[-nodeIndex + nodes] =
1170
+ 0;
1171
+ }
1172
+
1173
+ if (graph->gapMarkers != NULL) {
1174
+ graph->gapMarkers[newIndex] =
1175
+ graph->gapMarkers[nodeIndex];
1176
+ graph->gapMarkers[nodeIndex] = NULL;
1177
+ }
1178
+ }
1179
+ }
1180
+
1181
+ // Shitfting array to the left
1182
+ if (graph->nodeReads != NULL && counter != 0) {
1183
+ for (nodeIndex = counter; nodeIndex <= 2 * nodes - counter;
1184
+ nodeIndex++) {
1185
+ graph->nodeReads[nodeIndex - counter] =
1186
+ graph->nodeReads[nodeIndex];
1187
+ graph->nodeReadCounts[nodeIndex - counter] =
1188
+ graph->nodeReadCounts[nodeIndex];
1189
+ }
1190
+ }
1191
+
1192
+ // Rellocating node space
1193
+ graph->nodeCount -= counter;
1194
+ graph->nodes =
1195
+ reallocOrExit(graph->nodes, graph->nodeCount + 1, Node *);
1196
+
1197
+ // Reallocating short read marker arrays
1198
+ if (graph->nodeReads != NULL) {
1199
+ graph->nodeReads =
1200
+ reallocOrExit(graph->nodeReads,
1201
+ 2 * graph->nodeCount +
1202
+ 1, ShortReadMarker *);
1203
+ graph->nodeReadCounts =
1204
+ reallocOrExit(graph->nodeReadCounts,
1205
+ 2 * graph->nodeCount + 1, IDnum);
1206
+ }
1207
+
1208
+ // Reallocating gap marker table
1209
+ if (graph->gapMarkers != NULL)
1210
+ graph->gapMarkers = reallocOrExit(graph->gapMarkers,
1211
+ graph->nodeCount +
1212
+ 1, GapMarker *);
1213
+
1214
+ velvetLog("Removed %li null nodes\n", (long) counter);
1215
+ }
1216
+
1217
+ void splitNodeDescriptor(Node * source, Node * target, Coordinate offset)
1218
+ {
1219
+ Coordinate originalLength = source->length;
1220
+ Coordinate backLength = originalLength - offset;
1221
+ Coordinate index;
1222
+ Descriptor *descriptor, *new;
1223
+ size_t arrayLength;
1224
+ Nucleotide nucleotide;
1225
+
1226
+ source->length = offset;
1227
+ source->twinNode->length = offset;
1228
+
1229
+ if (target != NULL) {
1230
+ target->length = backLength;
1231
+ target->twinNode->length = backLength;
1232
+ free(target->descriptor);
1233
+ free(target->twinNode->descriptor);
1234
+ target->descriptor = NULL;
1235
+ target->twinNode->descriptor = NULL;
1236
+ }
1237
+
1238
+ if (backLength == 0)
1239
+ return;
1240
+
1241
+ descriptor = source->descriptor;
1242
+
1243
+ arrayLength = backLength / 4;
1244
+ if (backLength % 4 > 0)
1245
+ arrayLength++;
1246
+
1247
+ if (target != NULL) {
1248
+ // Target node .. forwards
1249
+ new = mallocOrExit(arrayLength, Descriptor);
1250
+ target->descriptor = new;
1251
+ for (index = 0; index < backLength; index++) {
1252
+ nucleotide =
1253
+ getNucleotideInDescriptor(descriptor, index);
1254
+ writeNucleotideInDescriptor(nucleotide, new,
1255
+ index);
1256
+ }
1257
+ }
1258
+ // Source node
1259
+ for (index = backLength; index < originalLength; index++) {
1260
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1261
+ writeNucleotideInDescriptor(nucleotide, descriptor,
1262
+ index - backLength);
1263
+ }
1264
+
1265
+ if (target == NULL)
1266
+ return;
1267
+
1268
+ // target node other way
1269
+ descriptor = source->twinNode->descriptor;
1270
+ new = mallocOrExit(arrayLength, Descriptor);
1271
+ target->twinNode->descriptor = new;
1272
+
1273
+ for (index = offset; index < originalLength; index++) {
1274
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1275
+ writeNucleotideInDescriptor(nucleotide, new,
1276
+ index - offset);
1277
+ }
1278
+ }
1279
+
1280
+ void reduceNode(Node * node)
1281
+ {
1282
+ free(node->descriptor);
1283
+ node->descriptor = NULL;
1284
+ node->length = 0;
1285
+
1286
+ free(node->twinNode->descriptor);
1287
+ node->twinNode->descriptor = NULL;
1288
+ node->twinNode->length = 0;
1289
+ }
1290
+
1291
+ // Allocate memory for an empty graph created with sequenceCount different sequences
1292
+ Graph *emptyGraph(IDnum sequenceCount, int wordLength)
1293
+ {
1294
+ Graph *newGraph = mallocOrExit(1, Graph);
1295
+ newGraph->sequenceCount = sequenceCount;
1296
+ newGraph->arcLookupTable = NULL;
1297
+ newGraph->nodeReads = NULL;
1298
+ newGraph->nodeReadCounts = NULL;
1299
+ newGraph->wordLength = wordLength;
1300
+ newGraph->gapMarkers = NULL;
1301
+ return newGraph;
1302
+ }
1303
+
1304
+ static Descriptor *newPositiveDescriptor(IDnum sequenceID,
1305
+ Coordinate start,
1306
+ Coordinate finish,
1307
+ TightString *sequences,
1308
+ int WORDLENGTH)
1309
+ {
1310
+ Coordinate index;
1311
+ Nucleotide nucleotide;
1312
+ TightString *tString = getTightStringInArray (sequences, sequenceID - 1);
1313
+ Coordinate length = finish - start;
1314
+ Descriptor *res;
1315
+ size_t arrayLength = length / 4;
1316
+
1317
+ if (length % 4 > 0)
1318
+ arrayLength++;
1319
+
1320
+ res = mallocOrExit(arrayLength, Descriptor);
1321
+
1322
+ for (index = 0; index < length; index++) {
1323
+ nucleotide =
1324
+ getNucleotide(start + index + WORDLENGTH - 1, tString);
1325
+ writeNucleotideInDescriptor(nucleotide, res, index);
1326
+ }
1327
+
1328
+ return res;
1329
+
1330
+ }
1331
+
1332
+ static Descriptor *newNegativeDescriptor(IDnum sequenceID,
1333
+ Coordinate start,
1334
+ Coordinate finish,
1335
+ TightString *sequences,
1336
+ int WORDLENGTH)
1337
+ {
1338
+ Coordinate index;
1339
+ Nucleotide nucleotide;
1340
+ TightString *tString = getTightStringInArray (sequences, -sequenceID - 1);
1341
+ Coordinate length = start - finish;
1342
+ Descriptor *res;
1343
+ size_t arrayLength = length / 4;
1344
+
1345
+ if (length % 4 > 0)
1346
+ arrayLength++;
1347
+
1348
+ res = mallocOrExit(arrayLength, Descriptor);
1349
+
1350
+ for (index = 0; index < length; index++) {
1351
+ nucleotide = getNucleotide(start - index, tString);
1352
+ #ifndef COLOR
1353
+ writeNucleotideInDescriptor(3 - nucleotide, res, index);
1354
+ #else
1355
+ writeNucleotideInDescriptor(nucleotide, res, index);
1356
+ #endif
1357
+ }
1358
+
1359
+ return res;
1360
+
1361
+ }
1362
+
1363
+ static Descriptor *newDescriptor(IDnum sequenceID, Coordinate start,
1364
+ Coordinate finish,
1365
+ TightString * sequences, int WORDLENGTH)
1366
+ {
1367
+ if (sequenceID > 0)
1368
+ return newPositiveDescriptor(sequenceID, start, finish,
1369
+ sequences, WORDLENGTH);
1370
+ else
1371
+ return newNegativeDescriptor(sequenceID, start, finish,
1372
+ sequences, WORDLENGTH);
1373
+ }
1374
+
1375
+ // Constructor
1376
+ // Memory allocated
1377
+ Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
1378
+ Coordinate offset, IDnum ID, TightString * sequences,
1379
+ int WORDLENGTH)
1380
+ {
1381
+ Node *newnd = allocateNode();
1382
+ Node *antiNode = allocateNode();
1383
+
1384
+ newnd->ID = ID;
1385
+ newnd->descriptor =
1386
+ newDescriptor(sequenceID, start + offset, finish + offset,
1387
+ sequences, WORDLENGTH);
1388
+ newnd->arc = NULL;
1389
+ newnd->arcCount = 0;
1390
+ newnd->marker = NULL_IDX;
1391
+ newnd->status = false;
1392
+
1393
+ #ifndef SINGLE_COV_CAT
1394
+ Category cat;
1395
+ for (cat = 0; cat < CATEGORIES; cat++) {
1396
+ newnd->virtualCoverage[cat] = 0;
1397
+ newnd->originalVirtualCoverage[cat] = 0;
1398
+ }
1399
+ #else
1400
+ newnd->virtualCoverage = 0;
1401
+ #endif
1402
+
1403
+ antiNode->ID = -ID;
1404
+ antiNode->descriptor =
1405
+ newDescriptor(-sequenceID, finish + offset - 1,
1406
+ start + offset - 1, sequences, WORDLENGTH);
1407
+ antiNode->arc = NULL;
1408
+ antiNode->arcCount = 0;
1409
+ antiNode->marker = NULL_IDX;
1410
+ antiNode->status = false;
1411
+
1412
+ #ifndef SINGLE_COV_CAT
1413
+ for (cat = 0; cat < CATEGORIES; cat++) {
1414
+ antiNode->virtualCoverage[cat] = 0;
1415
+ antiNode->originalVirtualCoverage[cat] = 0;
1416
+ }
1417
+ #else
1418
+ antiNode->virtualCoverage = 0;
1419
+ #endif
1420
+
1421
+ newnd->twinNode = antiNode;
1422
+ antiNode->twinNode = newnd;
1423
+
1424
+ if (sequenceID > 0) {
1425
+ newnd->length = finish - start;
1426
+ antiNode->length = finish - start;
1427
+ } else {
1428
+ newnd->length = start - finish;
1429
+ antiNode->length = start - finish;
1430
+ }
1431
+
1432
+ return newnd;
1433
+ }
1434
+
1435
+ void allocateNodeSpace(Graph * graph, IDnum nodeCount)
1436
+ {
1437
+ graph->nodes = callocOrExit(nodeCount + 1, Node *);
1438
+ graph->nodeCount = nodeCount;
1439
+ }
1440
+
1441
+ boolean getUniqueness(Node * node)
1442
+ {
1443
+ return node->uniqueness;
1444
+ }
1445
+
1446
+ void setUniqueness(Node * node, boolean value)
1447
+ {
1448
+ node->uniqueness = value;
1449
+ node->twinNode->uniqueness = value;
1450
+ }
1451
+
1452
+ Node *emptyNode()
1453
+ {
1454
+ Node *newnd = allocateNode();
1455
+ Node *antiNode = allocateNode();
1456
+
1457
+ newnd->ID = 0;
1458
+ newnd->descriptor = NULL;
1459
+ newnd->arc = NULL;
1460
+ newnd->arcCount = 0;
1461
+ newnd->marker = NULL_IDX;
1462
+ newnd->length = 0;
1463
+ newnd->uniqueness = false;
1464
+
1465
+ #ifndef SINGLE_COV_CAT
1466
+ Category cat;
1467
+ for (cat = 0; cat < CATEGORIES; cat++) {
1468
+ newnd->virtualCoverage[cat] = 0;
1469
+ newnd->originalVirtualCoverage[cat] = 0;
1470
+ }
1471
+ #else
1472
+ newnd->virtualCoverage = 0;
1473
+ #endif
1474
+
1475
+ antiNode->ID = 0;
1476
+ antiNode->descriptor = NULL;
1477
+ antiNode->arc = NULL;
1478
+ antiNode->arcCount = 0;
1479
+ antiNode->marker = NULL_IDX;
1480
+ antiNode->length = 0;
1481
+ antiNode->uniqueness = false;
1482
+
1483
+ #ifndef SINGLE_COV_CAT
1484
+ for (cat = 0; cat < CATEGORIES; cat++) {
1485
+ antiNode->virtualCoverage[cat] = 0;
1486
+ antiNode->originalVirtualCoverage[cat] = 0;
1487
+ }
1488
+ #else
1489
+ antiNode->virtualCoverage = 0;
1490
+ #endif
1491
+
1492
+ newnd->twinNode = antiNode;
1493
+ antiNode->twinNode = newnd;
1494
+
1495
+ return newnd;
1496
+
1497
+ }
1498
+
1499
+ Node *addEmptyNodeToGraph(Graph * graph, IDnum ID)
1500
+ {
1501
+ Node *newnd = emptyNode();
1502
+
1503
+ newnd->ID = ID;
1504
+ newnd->twinNode->ID = -ID;
1505
+
1506
+ graph->nodes[ID] = newnd;
1507
+
1508
+ return newnd;
1509
+
1510
+ }
1511
+
1512
+ #ifndef SINGLE_COV_CAT
1513
+
1514
+ void setVirtualCoverage(Node * node, Category category,
1515
+ Coordinate coverage)
1516
+ {
1517
+ node->virtualCoverage[category] = coverage;
1518
+ node->twinNode->virtualCoverage[category] =
1519
+ node->virtualCoverage[category];
1520
+ }
1521
+
1522
+ void incrementVirtualCoverage(Node * node, Category category,
1523
+ Coordinate coverage)
1524
+ {
1525
+ node->virtualCoverage[category] += coverage;
1526
+ node->twinNode->virtualCoverage[category] =
1527
+ node->virtualCoverage[category];
1528
+ }
1529
+
1530
+ Coordinate getVirtualCoverage(Node * node, Category category)
1531
+ {
1532
+ return node->virtualCoverage[category];
1533
+ }
1534
+
1535
+ Coordinate getTotalCoverage(Node * node)
1536
+ {
1537
+ Category cat;
1538
+ Coordinate coverage = 0;
1539
+
1540
+ for (cat = 0; cat < CATEGORIES; cat++)
1541
+ coverage += node->virtualCoverage[cat];
1542
+
1543
+ return coverage;
1544
+ }
1545
+
1546
+ void setOriginalVirtualCoverage(Node * node, Category category,
1547
+ Coordinate coverage)
1548
+ {
1549
+ node->originalVirtualCoverage[category] = coverage;
1550
+ node->twinNode->originalVirtualCoverage[category] =
1551
+ node->originalVirtualCoverage[category];
1552
+ }
1553
+
1554
+ void incrementOriginalVirtualCoverage(Node * node, Category category,
1555
+ Coordinate coverage)
1556
+ {
1557
+ node->originalVirtualCoverage[category] += coverage;
1558
+ node->twinNode->originalVirtualCoverage[category] =
1559
+ node->originalVirtualCoverage[category];
1560
+ }
1561
+
1562
+ Coordinate getOriginalVirtualCoverage(Node * node, Category category)
1563
+ {
1564
+ return node->originalVirtualCoverage[category];
1565
+ }
1566
+
1567
+ #else
1568
+
1569
+ void setVirtualCoverage(Node * node,
1570
+ Coordinate coverage)
1571
+ {
1572
+ node->virtualCoverage = coverage;
1573
+ node->twinNode->virtualCoverage = coverage;
1574
+ }
1575
+
1576
+ void incrementVirtualCoverage(Node * node,
1577
+ Coordinate coverage)
1578
+ {
1579
+ node->virtualCoverage += coverage;
1580
+ node->twinNode->virtualCoverage += coverage;
1581
+ }
1582
+
1583
+ Coordinate getVirtualCoverage(Node * node)
1584
+ {
1585
+ return node->virtualCoverage;
1586
+ }
1587
+
1588
+ Coordinate getTotalCoverage(Node * node)
1589
+ {
1590
+ return node->virtualCoverage;
1591
+ }
1592
+
1593
+ #endif
1594
+
1595
+ boolean hasSingleArc(Node * node)
1596
+ {
1597
+ return node->arcCount == 1;
1598
+ }
1599
+
1600
+ void activateArcLookupTable(Graph * graph)
1601
+ {
1602
+ IDnum index;
1603
+ Node *node;
1604
+ Arc *arc;
1605
+ IDnum nodes = graph->nodeCount;
1606
+ IDnum twinOriginID, destinationID, hash;
1607
+ Arc **table;
1608
+
1609
+ velvetLog("Activating arc lookup table\n");
1610
+
1611
+ graph->arcLookupTable = callocOrExit(6 * nodes + 1, Arc *);
1612
+
1613
+ table = graph->arcLookupTable;
1614
+
1615
+ for (index = -nodes; index <= nodes; index++) {
1616
+ if (index == 0)
1617
+ continue;
1618
+
1619
+ node = getNodeInGraph(graph, index);
1620
+ if (node == 0)
1621
+ continue;
1622
+
1623
+ for (arc = getArc(node); arc != NULL;
1624
+ arc = getNextArc(arc)) {
1625
+ twinOriginID = arc->twinArc->destination->ID;
1626
+ destinationID = arc->destination->ID;
1627
+ hash =
1628
+ 3 * nodes - 2 * twinOriginID + destinationID;
1629
+ arc->nextInLookupTable = table[hash];
1630
+ table[hash] = arc;
1631
+ }
1632
+ }
1633
+
1634
+ velvetLog("Done activating arc lookup table\n");
1635
+ }
1636
+
1637
+ void deactivateArcLookupTable(Graph * graph)
1638
+ {
1639
+ free(graph->arcLookupTable);
1640
+ graph->arcLookupTable = NULL;
1641
+ }
1642
+
1643
+ static void exportNode(FILE * outfile, Node * node, void *withSequence)
1644
+ {
1645
+ Coordinate index;
1646
+ Nucleotide nucleotide;
1647
+
1648
+ if (node == NULL)
1649
+ return;
1650
+
1651
+ velvetFprintf(outfile, "NODE\t%ld\t%lld", (long) node->ID, (long long) node->length);
1652
+
1653
+ #ifndef SINGLE_COV_CAT
1654
+ Category cat;
1655
+ for (cat = 0; cat < CATEGORIES; cat++)
1656
+ velvetFprintf(outfile, "\t%lld\t%lld", (long long) node->virtualCoverage[cat],
1657
+ (long long) node->originalVirtualCoverage[cat]);
1658
+ velvetFprintf(outfile, "\n");
1659
+ #else
1660
+ velvetFprintf(outfile, "\t%lld\n", (long long) node->virtualCoverage);
1661
+ #endif
1662
+
1663
+ if (withSequence == NULL)
1664
+ return;
1665
+
1666
+ for (index = 0; index < node->length; index++) {
1667
+ nucleotide =
1668
+ getNucleotideInDescriptor(node->descriptor, index);
1669
+ switch (nucleotide) {
1670
+ case ADENINE:
1671
+ velvetFprintf(outfile, "A");
1672
+ break;
1673
+ case CYTOSINE:
1674
+ velvetFprintf(outfile, "C");
1675
+ break;
1676
+ case GUANINE:
1677
+ velvetFprintf(outfile, "G");
1678
+ break;
1679
+ case THYMINE:
1680
+ velvetFprintf(outfile, "T");
1681
+ break;
1682
+ }
1683
+ }
1684
+ velvetFprintf(outfile, "\n");
1685
+
1686
+ for (index = 0; index < node->length; index++) {
1687
+ nucleotide =
1688
+ getNucleotideInDescriptor(node->twinNode->descriptor,
1689
+ index);
1690
+ switch (nucleotide) {
1691
+ case ADENINE:
1692
+ velvetFprintf(outfile, "A");
1693
+ break;
1694
+ case CYTOSINE:
1695
+ velvetFprintf(outfile, "C");
1696
+ break;
1697
+ case GUANINE:
1698
+ velvetFprintf(outfile, "G");
1699
+ break;
1700
+ case THYMINE:
1701
+ velvetFprintf(outfile, "T");
1702
+ break;
1703
+ }
1704
+ }
1705
+ velvetFprintf(outfile, "\n");
1706
+ }
1707
+
1708
+ static void exportArc(FILE * outfile, Arc * arc)
1709
+ {
1710
+ IDnum originID, destinationID;
1711
+ IDnum absOriginID, absDestinationID;
1712
+
1713
+ if (arc == NULL)
1714
+ return;
1715
+
1716
+ absOriginID = originID = -arc->twinArc->destination->ID;
1717
+ absDestinationID = destinationID = arc->destination->ID;
1718
+
1719
+ if (absOriginID < 0)
1720
+ absOriginID = -absOriginID;
1721
+ if (absDestinationID < 0)
1722
+ absDestinationID = -absDestinationID;
1723
+
1724
+ if (absDestinationID < absOriginID)
1725
+ return;
1726
+
1727
+ if (originID == destinationID && originID < 0)
1728
+ return;
1729
+
1730
+ velvetFprintf(outfile, "ARC\t%li\t%li\t%li\n", (long) originID, (long) destinationID,
1731
+ (long) arc->multiplicity);
1732
+ }
1733
+
1734
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1735
+ static Arc *mergeArcLists(Arc * left, Arc * right)
1736
+ {
1737
+ Arc *mergedList = NULL;
1738
+ Arc *tail = NULL;
1739
+
1740
+ // Choose first element:
1741
+ if (left->destination->ID <= right->destination->ID) {
1742
+ mergedList = left;
1743
+ tail = left;
1744
+ left = left->next;
1745
+ } else {
1746
+ mergedList = right;
1747
+ tail = right;
1748
+ right = right->next;
1749
+ }
1750
+
1751
+ // Iterate while both lists are still non empty
1752
+ while (left != NULL && right != NULL) {
1753
+ if (left->destination->ID <= right->destination->ID) {
1754
+ tail->next = left;
1755
+ left->previous = tail;
1756
+ left = left->next;
1757
+ } else {
1758
+ tail->next = right;
1759
+ right->previous = tail;
1760
+ right = right->next;
1761
+ }
1762
+
1763
+ tail = tail->next;
1764
+ }
1765
+
1766
+ // Concatenate the remaining list at the end of the merged list
1767
+ if (left != NULL) {
1768
+ tail->next = left;
1769
+ left->previous = tail;
1770
+ }
1771
+
1772
+ if (right != NULL) {
1773
+ tail->next = right;
1774
+ right->previous = tail;
1775
+ }
1776
+
1777
+ return mergedList;
1778
+ }
1779
+
1780
+ static void arcMergeSort(Arc ** arcPtr, IDnum count)
1781
+ {
1782
+
1783
+ IDnum half = count / 2;
1784
+ Arc *left = *arcPtr;
1785
+ Arc *ptr = left;
1786
+ Arc *right;
1787
+ IDnum index;
1788
+
1789
+ if (count == 0 || count == 1)
1790
+ return;
1791
+
1792
+ if (count == 2) {
1793
+ if ((*arcPtr)->destination->ID >
1794
+ (*arcPtr)->next->destination->ID) {
1795
+ (*arcPtr)->next->next = *arcPtr;
1796
+ (*arcPtr)->previous = (*arcPtr)->next;
1797
+ *arcPtr = (*arcPtr)->next;
1798
+ (*arcPtr)->next->next = NULL;
1799
+ (*arcPtr)->previous = NULL;
1800
+ }
1801
+ return;
1802
+ }
1803
+
1804
+ for (index = 0; index < half - 1; index++) {
1805
+ ptr = ptr->next;
1806
+ if (ptr == NULL)
1807
+ return;
1808
+ }
1809
+
1810
+ right = ptr->next;
1811
+ ptr->next = NULL;
1812
+ right->previous = NULL;
1813
+
1814
+ arcMergeSort(&left, half);
1815
+ arcMergeSort(&right, count - half);
1816
+ *arcPtr = mergeArcLists(left, right);
1817
+ }
1818
+
1819
+ static void sortNodeArcs(Node * node)
1820
+ {
1821
+ Arc *arc;
1822
+ IDnum count = 0;
1823
+
1824
+ for (arc = getArc(node); arc != NULL; arc = getNextArc(arc))
1825
+ count++;
1826
+
1827
+ if (count == 0)
1828
+ return;
1829
+
1830
+ arc = getArc(node);
1831
+ arcMergeSort(&arc, count);
1832
+
1833
+ node->arc = arc;
1834
+ }
1835
+
1836
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1837
+ static GapMarker *mergeGapMarkerLists(GapMarker * left, GapMarker * right)
1838
+ {
1839
+ GapMarker *mergedList = NULL;
1840
+ GapMarker *tail = NULL;
1841
+
1842
+ // Choose first element:
1843
+ if (left->position <= right->position) {
1844
+ mergedList = left;
1845
+ tail = left;
1846
+ left = left->next;
1847
+ } else {
1848
+ mergedList = right;
1849
+ tail = right;
1850
+ right = right->next;
1851
+ }
1852
+
1853
+ // Iterate while both lists are still non empty
1854
+ while (left != NULL && right != NULL) {
1855
+ if (left->position <= right->position) {
1856
+ tail->next = left;
1857
+ left = left->next;
1858
+ } else {
1859
+ tail->next = right;
1860
+ right = right->next;
1861
+ }
1862
+
1863
+ tail = tail->next;
1864
+ }
1865
+
1866
+ // Concatenate the remaining list at the end of the merged list
1867
+ if (left != NULL)
1868
+ tail->next = left;
1869
+
1870
+ if (right != NULL)
1871
+ tail->next = right;
1872
+
1873
+ return mergedList;
1874
+ }
1875
+
1876
+ static void gapMergeSort(GapMarker ** gapPtr, IDnum count)
1877
+ {
1878
+
1879
+ IDnum half = count / 2;
1880
+ GapMarker *left = *gapPtr;
1881
+ GapMarker *ptr = left;
1882
+ GapMarker *right;
1883
+ IDnum index;
1884
+
1885
+ if (count == 0 || count == 1)
1886
+ return;
1887
+
1888
+ if (count == 2) {
1889
+ if ((*gapPtr)->position > (*gapPtr)->next->position) {
1890
+ (*gapPtr)->next->next = *gapPtr;
1891
+ *gapPtr = (*gapPtr)->next;
1892
+ (*gapPtr)->next->next = NULL;
1893
+ }
1894
+ return;
1895
+ }
1896
+
1897
+ for (index = 0; index < half - 1; index++) {
1898
+ ptr = ptr->next;
1899
+ if (ptr == NULL)
1900
+ return;
1901
+ }
1902
+
1903
+ right = ptr->next;
1904
+ ptr->next = NULL;
1905
+
1906
+ gapMergeSort(&left, half);
1907
+ gapMergeSort(&right, count - half);
1908
+ *gapPtr = mergeGapMarkerLists(left, right);
1909
+ }
1910
+
1911
+ static void sortNodeGapMarkers(Node * node, Graph * graph)
1912
+ {
1913
+ GapMarker *gap;
1914
+ IDnum count = 0;
1915
+ IDnum ID = getNodeID(node);
1916
+
1917
+ if (ID < 0)
1918
+ ID = -ID;
1919
+
1920
+ for (gap = graph->gapMarkers[ID]; gap != NULL; gap = gap->next)
1921
+ count++;
1922
+
1923
+ if (count == 0)
1924
+ return;
1925
+
1926
+ gap = graph->gapMarkers[ID];
1927
+ gapMergeSort(&gap, count);
1928
+
1929
+ graph->gapMarkers[ID] = gap;
1930
+ }
1931
+
1932
+ void sortGapMarkers(Graph * graph)
1933
+ {
1934
+ IDnum index;
1935
+ Node *node;
1936
+
1937
+ if (graph->gapMarkers == NULL)
1938
+ return;
1939
+
1940
+ for (index = 1; index <= nodeCount(graph); index++) {
1941
+ node = getNodeInGraph(graph, index);
1942
+ if (node)
1943
+ sortNodeGapMarkers(node, graph);
1944
+ }
1945
+ }
1946
+
1947
+ void exportGraph(char *filename, Graph * graph, TightString * sequences)
1948
+ {
1949
+ IDnum index;
1950
+ FILE *outfile;
1951
+ Node *node;
1952
+ Arc *arc;
1953
+ PassageMarkerI marker;
1954
+ ShortReadMarker *reads;
1955
+ IDnum readCount, readIndex;
1956
+
1957
+ if (graph == NULL) {
1958
+ return;
1959
+ }
1960
+
1961
+ outfile = fopen(filename, "w");
1962
+ if (outfile == NULL) {
1963
+ velvetLog("Couldn't open file, sorry\n");
1964
+ return;
1965
+ } else
1966
+ velvetLog("Writing into graph file %s...\n", filename);
1967
+
1968
+ // General data
1969
+ velvetFprintf(outfile, "%li\t%li\t%i\t%i\n", (long) graph->nodeCount,
1970
+ (long) graph->sequenceCount, graph->wordLength, (int) graph->double_stranded);
1971
+
1972
+ // Node info
1973
+ for (index = 1; index <= graph->nodeCount; index++) {
1974
+ node = getNodeInGraph(graph, index);
1975
+ exportNode(outfile, node, (void *) sequences);
1976
+ }
1977
+
1978
+ // Arc info
1979
+ for (index = 1; index <= graph->nodeCount; index++) {
1980
+ node = getNodeInGraph(graph, index);
1981
+ if (node == NULL)
1982
+ continue;
1983
+
1984
+ sortNodeArcs(node);
1985
+ sortNodeArcs(getTwinNode(node));
1986
+
1987
+ for (arc = node->arc; arc != NULL; arc = arc->next)
1988
+ exportArc(outfile, arc);
1989
+ for (arc = node->twinNode->arc; arc != NULL;
1990
+ arc = arc->next)
1991
+ exportArc(outfile, arc);
1992
+ }
1993
+
1994
+ // Sequence info
1995
+ for (index = 1; index <= graph->nodeCount; index++) {
1996
+ node = getNodeInGraph(graph, index);
1997
+ if (node == NULL)
1998
+ continue;
1999
+ for (marker = node->marker; marker != NULL_IDX;
2000
+ marker = getNextInNode(marker))
2001
+ exportMarker(outfile, marker, sequences,
2002
+ graph->wordLength);
2003
+ }
2004
+
2005
+ // Node reads
2006
+ if (readStartsAreActivated(graph)) {
2007
+ for (index = 0; index <= graph->nodeCount * 2; index++) {
2008
+ readCount = graph->nodeReadCounts[index];
2009
+ if (readCount == 0)
2010
+ continue;
2011
+
2012
+ velvetFprintf(outfile, "NR\t%li\t%li\n",
2013
+ (long) (index - graph->nodeCount), (long) readCount);
2014
+
2015
+ reads = graph->nodeReads[index];
2016
+ for (readIndex = 0; readIndex < readCount;
2017
+ readIndex++)
2018
+ velvetFprintf(outfile, "%ld\t%lld\t%d\n",
2019
+ (long) reads[readIndex].readID,
2020
+ (long long) reads[readIndex].position,
2021
+ (int) reads[readIndex].offset);
2022
+ }
2023
+ }
2024
+
2025
+ fclose(outfile);
2026
+ }
2027
+
2028
+ Graph *importGraph(char *filename)
2029
+ {
2030
+ FILE *file = fopen(filename, "r");
2031
+ const int maxline = MAXLINE;
2032
+ char line[MAXLINE];
2033
+ Graph *graph;
2034
+ Coordinate coverage;
2035
+ IDnum nodeCounter, sequenceCount;
2036
+ Node *node, *twin;
2037
+ Arc *arc;
2038
+ IDnum originID, destinationID, multiplicity;
2039
+ PassageMarkerI newMarker, marker;
2040
+ IDnum nodeID, seqID;
2041
+ Coordinate index;
2042
+ Coordinate start, finish;
2043
+ Coordinate startOffset, finishOffset;
2044
+ boolean finished = false;
2045
+ size_t arrayLength;
2046
+ IDnum readCount;
2047
+ ShortReadMarker *array;
2048
+ int wordLength, sCount;
2049
+ ShortLength length;
2050
+ long long_var, long_var2, long_var3;
2051
+ long long longlong_var, longlong_var2, longlong_var3, longlong_var4;
2052
+ short short_var;
2053
+ char c;
2054
+
2055
+ if (file == NULL)
2056
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
2057
+
2058
+ velvetLog("Reading graph file %s\n", filename);
2059
+
2060
+ // First line
2061
+ if (!fgets(line, maxline, file))
2062
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2063
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2064
+ &wordLength, &short_var);
2065
+ nodeCounter = (IDnum) long_var;
2066
+ sequenceCount = (IDnum) long_var2;
2067
+ graph = emptyGraph(sequenceCount, wordLength);
2068
+ graph->double_stranded = (boolean) short_var;
2069
+ resetWordFilter(wordLength);
2070
+ allocateNodeSpace(graph, nodeCounter);
2071
+
2072
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2073
+ (long) sequenceCount);
2074
+
2075
+ if (nodeCounter == 0)
2076
+ return graph;
2077
+
2078
+ // Read nodes
2079
+ if (!fgets(line, maxline, file))
2080
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2081
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2082
+ strtok(line, "\t\n");
2083
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2084
+ nodeID = (IDnum) long_var;
2085
+ node = addEmptyNodeToGraph(graph, nodeID);
2086
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2087
+ node->length = (Coordinate) longlong_var;
2088
+
2089
+ #ifndef SINGLE_COV_CAT
2090
+ Category cat;
2091
+ Coordinate originalCoverage;
2092
+ for (cat = 0; cat < CATEGORIES; cat++) {
2093
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2094
+ coverage = (Coordinate) longlong_var;
2095
+ setVirtualCoverage(node, cat, coverage);
2096
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2097
+ &longlong_var);
2098
+ originalCoverage = (Coordinate) longlong_var;
2099
+ setOriginalVirtualCoverage(node, cat,
2100
+ originalCoverage);
2101
+ }
2102
+ #else
2103
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2104
+ coverage = (Coordinate) longlong_var;
2105
+ setVirtualCoverage(node, coverage);
2106
+ #endif
2107
+
2108
+ arrayLength = node->length / 4;
2109
+ if (node->length % 4 > 0)
2110
+ arrayLength++;
2111
+ node->descriptor =
2112
+ callocOrExit(arrayLength, Descriptor);
2113
+
2114
+ index = 0;
2115
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2116
+ if (c == 'A')
2117
+ writeNucleotideInDescriptor(ADENINE,
2118
+ node->
2119
+ descriptor,
2120
+ index++);
2121
+ else if (c == 'C')
2122
+ writeNucleotideInDescriptor(CYTOSINE,
2123
+ node->
2124
+ descriptor,
2125
+ index++);
2126
+ else if (c == 'G')
2127
+ writeNucleotideInDescriptor(GUANINE,
2128
+ node->
2129
+ descriptor,
2130
+ index++);
2131
+ else if (c == 'T')
2132
+ writeNucleotideInDescriptor(THYMINE,
2133
+ node->
2134
+ descriptor,
2135
+ index++);
2136
+ }
2137
+
2138
+ twin = node->twinNode;
2139
+ twin->length = node->length;
2140
+ twin->descriptor =
2141
+ callocOrExit(arrayLength, Descriptor);
2142
+ index = 0;
2143
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2144
+ if (c == 'A')
2145
+ writeNucleotideInDescriptor(ADENINE,
2146
+ twin->
2147
+ descriptor,
2148
+ index++);
2149
+ else if (c == 'C')
2150
+ writeNucleotideInDescriptor(CYTOSINE,
2151
+ twin->
2152
+ descriptor,
2153
+ index++);
2154
+ else if (c == 'G')
2155
+ writeNucleotideInDescriptor(GUANINE,
2156
+ twin->
2157
+ descriptor,
2158
+ index++);
2159
+ else if (c == 'T')
2160
+ writeNucleotideInDescriptor(THYMINE,
2161
+ twin->
2162
+ descriptor,
2163
+ index++);
2164
+ }
2165
+
2166
+ if (fgets(line, maxline, file) == NULL)
2167
+ finished = true;
2168
+ }
2169
+
2170
+ // Read arcs
2171
+ while (!finished && line[0] == 'A') {
2172
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2173
+ &long_var2, &long_var3);
2174
+ originID = (IDnum) long_var;
2175
+ destinationID = (IDnum) long_var2;
2176
+ multiplicity = (IDnum) long_var3;
2177
+ arc =
2178
+ createArc(getNodeInGraph(graph, originID),
2179
+ getNodeInGraph(graph, destinationID), graph);
2180
+ setMultiplicity(arc, multiplicity);
2181
+ if (fgets(line, maxline, file) == NULL)
2182
+ finished = true;
2183
+ }
2184
+
2185
+ // Read sequences
2186
+ while (!finished && line[0] != 'N') {
2187
+ sscanf(line, "SEQ\t%ld\n", &long_var);
2188
+ seqID = (IDnum) long_var;
2189
+ marker = NULL_IDX;
2190
+ if (!fgets(line, maxline, file))
2191
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2192
+
2193
+ while (!finished && line[0] != 'N' && line[0] != 'S') {
2194
+ sCount =
2195
+ sscanf(line, "%ld\t%lld\t%lld\t%lld\t%lld\n",
2196
+ &long_var, &longlong_var, &longlong_var2, &longlong_var3,
2197
+ &longlong_var4);
2198
+ nodeID = (IDnum) long_var;
2199
+ startOffset = (Coordinate) longlong_var;
2200
+ start = (Coordinate) longlong_var2;
2201
+ finish = (Coordinate) longlong_var3;
2202
+ finishOffset = (Coordinate) longlong_var4;
2203
+ if (sCount != 5) {
2204
+ velvetLog
2205
+ ("ERROR: reading in graph - only %d items read for line '%s'",
2206
+ sCount, line);
2207
+ #ifdef DEBUG
2208
+ abort();
2209
+ #endif
2210
+ exit(1);
2211
+ }
2212
+ newMarker =
2213
+ newPassageMarker(seqID, start, finish,
2214
+ startOffset, finishOffset);
2215
+ transposePassageMarker(newMarker,
2216
+ getNodeInGraph(graph,
2217
+ nodeID));
2218
+ connectPassageMarkers(marker, newMarker, graph);
2219
+ marker = newMarker;
2220
+ if (fgets(line, maxline, file) == NULL)
2221
+ finished = true;
2222
+ }
2223
+ }
2224
+
2225
+ // Node reads
2226
+ while (!finished) {
2227
+ sscanf(line, "NR\t%ld\t%ld\n", &long_var, &long_var2);
2228
+ nodeID = (IDnum) long_var;
2229
+ readCount = (IDnum) long_var2;
2230
+ if (!readStartsAreActivated(graph))
2231
+ activateReadStarts(graph);
2232
+
2233
+ graph->nodeReadCounts[nodeID + graph->nodeCount] =
2234
+ readCount;
2235
+ array = mallocOrExit(readCount, ShortReadMarker);
2236
+ graph->nodeReads[nodeID + graph->nodeCount] = array;
2237
+
2238
+ readCount = 0;
2239
+ if (!fgets(line, maxline, file))
2240
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2241
+ while (!finished && line[0] != 'N') {
2242
+ sscanf(line, "%ld\t%lld\t%hd\n", &long_var,
2243
+ &longlong_var, &short_var);
2244
+ seqID = (IDnum) long_var;
2245
+ startOffset = (Coordinate) longlong_var;
2246
+ length = (ShortLength) short_var;
2247
+ array[readCount].readID = seqID;
2248
+ array[readCount].position = startOffset;
2249
+ array[readCount].offset = length;
2250
+ readCount++;
2251
+ if (fgets(line, maxline, file) == NULL)
2252
+ finished = true;
2253
+ }
2254
+ }
2255
+
2256
+ //velvetLog("New graph has %d nodes\n", graph->nodeCount);
2257
+
2258
+ fclose(file);
2259
+ //velvetLog("Done, exiting\n");
2260
+ return graph;
2261
+ }
2262
+
2263
+ Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
2264
+ {
2265
+ FILE *file = fopen(preGraphFilename, "r");
2266
+ const int maxline = MAXLINE;
2267
+ char line[MAXLINE];
2268
+
2269
+ Graph *graph;
2270
+ IDnum nodeCounter, sequenceCount;
2271
+
2272
+ Node *node, *twin;
2273
+ IDnum nodeID = 0;
2274
+ Coordinate index, nodeLength;
2275
+ char c;
2276
+ int wordLength, wordShift;
2277
+ size_t arrayLength;
2278
+ short short_var;
2279
+ long long_var, long_var2;
2280
+ long long longlong_var;
2281
+
2282
+ if (file == NULL)
2283
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
2284
+
2285
+ velvetLog("Reading pre-graph file %s\n", preGraphFilename);
2286
+
2287
+ // First line
2288
+ if (!fgets(line, maxline, file))
2289
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2290
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2291
+ &wordLength, &short_var);
2292
+ nodeCounter = (IDnum) long_var;
2293
+ sequenceCount = (IDnum) long_var2;
2294
+ *double_strand = (boolean) short_var;
2295
+ wordShift = wordLength - 1;
2296
+ graph = emptyGraph(sequenceCount, wordLength);
2297
+ graph->double_stranded = *double_strand;
2298
+ resetWordFilter(wordLength);
2299
+ allocateNodeSpace(graph, nodeCounter);
2300
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2301
+ (long) sequenceCount);
2302
+
2303
+ // Read nodes
2304
+ if (nodeCounter == 0)
2305
+ return graph;
2306
+
2307
+ if (!fgets(line, maxline, file))
2308
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2309
+ while (line[0] == 'N') {
2310
+ nodeID++;
2311
+ node = addEmptyNodeToGraph(graph, nodeID);
2312
+
2313
+ sscanf(line, "%*s\t%*i\t%lli\n", &longlong_var);
2314
+ node->length = (Coordinate) longlong_var;
2315
+ nodeLength = node->length;
2316
+ arrayLength = node->length / 4;
2317
+ if (node->length % 4 > 0)
2318
+ arrayLength++;
2319
+ node->descriptor =
2320
+ callocOrExit(arrayLength, Descriptor);
2321
+
2322
+ twin = node->twinNode;
2323
+ twin->length = nodeLength;
2324
+ twin->descriptor =
2325
+ callocOrExit(arrayLength, Descriptor);
2326
+
2327
+
2328
+ index = 0;
2329
+ while ((c = getc(file)) != '\n') {
2330
+ if (c == 'A') {
2331
+ if (index - wordShift >= 0)
2332
+ writeNucleotideInDescriptor(ADENINE,
2333
+ node->
2334
+ descriptor,
2335
+ index - wordShift);
2336
+ if (nodeLength - index - 1 >= 0) {
2337
+ #ifndef COLOR
2338
+ writeNucleotideInDescriptor(THYMINE,
2339
+ twin->
2340
+ descriptor,
2341
+ nodeLength - index - 1);
2342
+ #else
2343
+ writeNucleotideInDescriptor(ADENINE,
2344
+ twin->
2345
+ descriptor,
2346
+ nodeLength - index - 1);
2347
+ #endif
2348
+ }
2349
+ } else if (c == 'C') {
2350
+ if (index - wordShift >= 0)
2351
+ writeNucleotideInDescriptor(CYTOSINE,
2352
+ node->
2353
+ descriptor,
2354
+ index - wordShift);
2355
+ if (nodeLength - index - 1 >= 0) {
2356
+ #ifndef COLOR
2357
+ writeNucleotideInDescriptor(GUANINE,
2358
+ twin->
2359
+ descriptor,
2360
+ nodeLength - index - 1);
2361
+ #else
2362
+ writeNucleotideInDescriptor(CYTOSINE,
2363
+ twin->
2364
+ descriptor,
2365
+ nodeLength - index - 1);
2366
+ #endif
2367
+ }
2368
+ } else if (c == 'G') {
2369
+ if (index - wordShift >= 0)
2370
+ writeNucleotideInDescriptor(GUANINE,
2371
+ node->
2372
+ descriptor,
2373
+ index - wordShift);
2374
+ if (nodeLength - index - 1 >= 0) {
2375
+ #ifndef COLOR
2376
+ writeNucleotideInDescriptor(CYTOSINE,
2377
+ twin->
2378
+ descriptor,
2379
+ nodeLength - index - 1);
2380
+ #else
2381
+ writeNucleotideInDescriptor(GUANINE,
2382
+ twin->
2383
+ descriptor,
2384
+ nodeLength - index - 1);
2385
+ #endif
2386
+ }
2387
+ } else if (c == 'T') {
2388
+ if (index - wordShift >= 0)
2389
+ writeNucleotideInDescriptor(THYMINE,
2390
+ node->
2391
+ descriptor,
2392
+ index - wordShift);
2393
+ if (nodeLength - index - 1 >= 0) {
2394
+ #ifndef COLOR
2395
+ writeNucleotideInDescriptor(ADENINE,
2396
+ twin->
2397
+ descriptor,
2398
+ nodeLength - index - 1);
2399
+ #else
2400
+ writeNucleotideInDescriptor(THYMINE,
2401
+ twin->
2402
+ descriptor,
2403
+ nodeLength - index - 1);
2404
+ #endif
2405
+ }
2406
+ }
2407
+
2408
+ index++;
2409
+ }
2410
+
2411
+ if (fgets(line, maxline, file) == NULL) {
2412
+ fclose(file);
2413
+ return graph;
2414
+ }
2415
+ }
2416
+
2417
+ fclose(file);
2418
+ return graph;
2419
+ }
2420
+
2421
+ Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand)
2422
+ {
2423
+ FILE *file = fopen(connectedGraphFilename, "r");
2424
+ const int maxline = MAXLINE;
2425
+ char line[MAXLINE];
2426
+ Coordinate coverage;
2427
+ Arc *arc;
2428
+ IDnum originID, destinationID, multiplicity;
2429
+ boolean finished = false;
2430
+ long long_var3;
2431
+
2432
+ Graph *graph;
2433
+ IDnum nodeCounter, sequenceCount;
2434
+
2435
+ Node *node, *twin;
2436
+ IDnum nodeID = 0;
2437
+ Coordinate index, nodeLength;
2438
+ char c;
2439
+ int wordLength, wordShift;
2440
+ size_t arrayLength;
2441
+ short short_var;
2442
+ long long_var, long_var2;
2443
+ long long longlong_var;
2444
+
2445
+ if (file == NULL)
2446
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", connectedGraphFilename);
2447
+
2448
+ velvetLog("Reading connected graph file %s\n", connectedGraphFilename);
2449
+
2450
+ // First line
2451
+ if (!fgets(line, maxline, file))
2452
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2453
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2454
+ &wordLength, &short_var);
2455
+ nodeCounter = (IDnum) long_var;
2456
+ sequenceCount = (IDnum) long_var2;
2457
+ *double_strand = (boolean) short_var;
2458
+ wordShift = wordLength - 1;
2459
+ graph = emptyGraph(sequenceCount, wordLength);
2460
+ graph->double_stranded = *double_strand;
2461
+ resetWordFilter(wordLength);
2462
+ allocateNodeSpace(graph, nodeCounter);
2463
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2464
+ (long) sequenceCount);
2465
+
2466
+ // Read nodes
2467
+ if (nodeCounter == 0)
2468
+ return graph;
2469
+
2470
+ if (!fgets(line, maxline, file))
2471
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2472
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2473
+ strtok(line, "\t\n");
2474
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2475
+ nodeID = (IDnum) long_var;
2476
+ node = addEmptyNodeToGraph(graph, nodeID);
2477
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2478
+ node->length = (Coordinate) longlong_var;
2479
+ nodeLength = node->length;
2480
+
2481
+ #ifndef SINGLE_COV_CAT
2482
+ Category cat;
2483
+ Coordinate originalCoverage;
2484
+ for (cat = 0; cat < CATEGORIES; cat++) {
2485
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2486
+ coverage = (Coordinate) longlong_var;
2487
+ setVirtualCoverage(node, cat, coverage);
2488
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2489
+ &longlong_var);
2490
+ originalCoverage = (Coordinate) longlong_var;
2491
+ setOriginalVirtualCoverage(node, cat,
2492
+ originalCoverage);
2493
+ }
2494
+ #else
2495
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2496
+ coverage = (Coordinate) longlong_var;
2497
+ setVirtualCoverage(node, coverage);
2498
+ #endif
2499
+
2500
+ arrayLength = node->length / 4;
2501
+ if (node->length % 4 > 0)
2502
+ arrayLength++;
2503
+ node->descriptor =
2504
+ callocOrExit(arrayLength, Descriptor);
2505
+
2506
+ twin = node->twinNode;
2507
+ twin->length = node->length;
2508
+ twin->descriptor =
2509
+ callocOrExit(arrayLength, Descriptor);
2510
+
2511
+ index = 0;
2512
+ while ((c = getc(file)) != '\n') {
2513
+ if (c == 'A') {
2514
+ if (index - wordShift >= 0)
2515
+ writeNucleotideInDescriptor(ADENINE,
2516
+ node->
2517
+ descriptor,
2518
+ index - wordShift);
2519
+ if (nodeLength - index - 1 >= 0) {
2520
+ #ifndef COLOR
2521
+ writeNucleotideInDescriptor(THYMINE,
2522
+ twin->
2523
+ descriptor,
2524
+ nodeLength - index - 1);
2525
+ #else
2526
+ writeNucleotideInDescriptor(ADENINE,
2527
+ twin->
2528
+ descriptor,
2529
+ nodeLength - index - 1);
2530
+ #endif
2531
+ }
2532
+ } else if (c == 'C') {
2533
+ if (index - wordShift >= 0)
2534
+ writeNucleotideInDescriptor(CYTOSINE,
2535
+ node->
2536
+ descriptor,
2537
+ index - wordShift);
2538
+ if (nodeLength - index - 1 >= 0) {
2539
+ #ifndef COLOR
2540
+ writeNucleotideInDescriptor(GUANINE,
2541
+ twin->
2542
+ descriptor,
2543
+ nodeLength - index - 1);
2544
+ #else
2545
+ writeNucleotideInDescriptor(CYTOSINE,
2546
+ twin->
2547
+ descriptor,
2548
+ nodeLength - index - 1);
2549
+ #endif
2550
+ }
2551
+ } else if (c == 'G') {
2552
+ if (index - wordShift >= 0)
2553
+ writeNucleotideInDescriptor(GUANINE,
2554
+ node->
2555
+ descriptor,
2556
+ index - wordShift);
2557
+ if (nodeLength - index - 1 >= 0) {
2558
+ #ifndef COLOR
2559
+ writeNucleotideInDescriptor(CYTOSINE,
2560
+ twin->
2561
+ descriptor,
2562
+ nodeLength - index - 1);
2563
+ #else
2564
+ writeNucleotideInDescriptor(GUANINE,
2565
+ twin->
2566
+ descriptor,
2567
+ nodeLength - index - 1);
2568
+ #endif
2569
+ }
2570
+ } else if (c == 'T') {
2571
+ if (index - wordShift >= 0)
2572
+ writeNucleotideInDescriptor(THYMINE,
2573
+ node->
2574
+ descriptor,
2575
+ index - wordShift);
2576
+ if (nodeLength - index - 1 >= 0) {
2577
+ #ifndef COLOR
2578
+ writeNucleotideInDescriptor(ADENINE,
2579
+ twin->
2580
+ descriptor,
2581
+ nodeLength - index - 1);
2582
+ #else
2583
+ writeNucleotideInDescriptor(THYMINE,
2584
+ twin->
2585
+ descriptor,
2586
+ nodeLength - index - 1);
2587
+ #endif
2588
+ }
2589
+ }
2590
+
2591
+ index++;
2592
+ }
2593
+
2594
+ if (fgets(line, maxline, file) == NULL) {
2595
+ finished = true;
2596
+ }
2597
+ }
2598
+
2599
+ // Read arcs
2600
+ while (!finished && line[0] == 'A') {
2601
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2602
+ &long_var2, &long_var3);
2603
+ originID = (IDnum) long_var;
2604
+ destinationID = (IDnum) long_var2;
2605
+ multiplicity = (IDnum) long_var3;
2606
+ arc =
2607
+ createArc(getNodeInGraph(graph, originID),
2608
+ getNodeInGraph(graph, destinationID), graph);
2609
+ setMultiplicity(arc, multiplicity);
2610
+ if (fgets(line, maxline, file) == NULL)
2611
+ finished = true;
2612
+ }
2613
+
2614
+ fclose(file);
2615
+ return graph;
2616
+ }
2617
+
2618
+ // Prints out the information relative to the topology of a node into a new file
2619
+ // Internal to exportDOTGraph()
2620
+ void DOTNode(Node * node, FILE * outfile)
2621
+ {
2622
+ IDnum ID;
2623
+ Arc *arc;
2624
+ Node *otherNode;
2625
+
2626
+ ID = node->ID;
2627
+ if (ID < 0)
2628
+ return;
2629
+
2630
+ velvetFprintf(outfile, "\t%li [label=\"<left>|%li|<right>\"]\n", (long) ID, (long) ID);
2631
+
2632
+ for (arc = node->arc; arc != NULL; arc = arc->next) {
2633
+ otherNode = arc->destination;
2634
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2635
+ continue;
2636
+ }
2637
+
2638
+ if (otherNode->ID > 0)
2639
+ velvetFprintf(outfile, "\t%li:right -> %li:left\n", (long) ID,
2640
+ (long) otherNode->ID);
2641
+ else
2642
+ velvetFprintf(outfile, "\t%li:right -> %li:right\n", (long) ID,
2643
+ (long) -otherNode->ID);
2644
+ }
2645
+
2646
+ for (arc = node->twinNode->arc; arc != NULL; arc = arc->next) {
2647
+ otherNode = arc->destination;
2648
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2649
+ continue;
2650
+ }
2651
+
2652
+ if (otherNode->ID > 0)
2653
+ velvetFprintf(outfile, "\t%li:left -> %li:left\n", (long) ID,
2654
+ (long) otherNode->ID);
2655
+ else
2656
+ velvetFprintf(outfile, "\t%li:left -> %li:right\n", (long) ID,
2657
+ (long) -otherNode->ID);
2658
+ }
2659
+ }
2660
+
2661
+ TightString *expandNode(Node * node, int WORDLENGTH)
2662
+ {
2663
+ Nucleotide nucleotide;
2664
+ Coordinate index;
2665
+ TightString *tString =
2666
+ newTightString(node->length + WORDLENGTH - 1);
2667
+ Node *twin = node->twinNode;
2668
+ Coordinate length = node->length;
2669
+
2670
+ for (index = 0; index < WORDLENGTH; index++) {
2671
+ nucleotide =
2672
+ getNucleotideInDescriptor(twin->descriptor,
2673
+ length - index - 1);
2674
+ #ifndef COLOR
2675
+ writeNucleotideAtPosition(3 - nucleotide, index, tString);
2676
+ #else
2677
+ writeNucleotideAtPosition(nucleotide, index, tString);
2678
+ #endif
2679
+ }
2680
+
2681
+ for (index = 1; index < node->length; index++) {
2682
+ nucleotide =
2683
+ getNucleotideInDescriptor(node->descriptor, index);
2684
+ writeNucleotideAtPosition(nucleotide,
2685
+ index + WORDLENGTH - 1, tString);
2686
+ }
2687
+
2688
+ return tString;
2689
+ }
2690
+
2691
+ char *expandNodeFragment(Node * node, Coordinate contigStart,
2692
+ Coordinate contigFinish, int wordLength)
2693
+ {
2694
+ Nucleotide nucleotide;
2695
+ Coordinate index;
2696
+ Node *twin = node->twinNode;
2697
+ Coordinate length = contigFinish - contigStart;
2698
+ int wordShift = wordLength - 1;
2699
+ char *string;
2700
+
2701
+ if (length >= wordShift) {
2702
+ string = callocOrExit(length + wordLength, char);
2703
+
2704
+ for (index = 0; index < wordShift; index++) {
2705
+ nucleotide =
2706
+ getNucleotideInDescriptor(twin->descriptor,
2707
+ twin->length - contigStart -
2708
+ index - 1);
2709
+ #ifndef COLOR
2710
+ nucleotide = 3 - nucleotide;
2711
+ #endif
2712
+
2713
+ switch (nucleotide) {
2714
+ case ADENINE:
2715
+ string[index] = 'A';
2716
+ break;
2717
+ case CYTOSINE:
2718
+ string[index] = 'C';
2719
+ break;
2720
+ case GUANINE:
2721
+ string[index] = 'G';
2722
+ break;
2723
+ case THYMINE:
2724
+ string[index] = 'T';
2725
+ break;
2726
+ }
2727
+
2728
+ }
2729
+
2730
+ for (index = 0; index < length; index++) {
2731
+ nucleotide =
2732
+ getNucleotideInDescriptor(node->descriptor,
2733
+ contigStart + index);
2734
+ switch (nucleotide) {
2735
+ case ADENINE:
2736
+ string[index + wordShift] = 'A';
2737
+ break;
2738
+ case CYTOSINE:
2739
+ string[index + wordShift] = 'C';
2740
+ break;
2741
+ case GUANINE:
2742
+ string[index + wordShift] = 'G';
2743
+ break;
2744
+ case THYMINE:
2745
+ string[index + wordShift] = 'T';
2746
+ break;
2747
+ }
2748
+ }
2749
+
2750
+ string[length + wordShift] = '\0';
2751
+ } else {
2752
+ string = callocOrExit(length + 1, char);
2753
+
2754
+ for (index = 0; index < length; index++) {
2755
+ nucleotide =
2756
+ getNucleotideInDescriptor(node->descriptor, contigStart + index);
2757
+ switch (nucleotide) {
2758
+ case ADENINE:
2759
+ string[index] = 'A';
2760
+ break;
2761
+ case CYTOSINE:
2762
+ string[index] = 'C';
2763
+ break;
2764
+ case GUANINE:
2765
+ string[index] = 'G';
2766
+ break;
2767
+ case THYMINE:
2768
+ string[index] = 'T';
2769
+ break;
2770
+ }
2771
+ }
2772
+
2773
+ string[length] = '\0';
2774
+ }
2775
+
2776
+ return string;
2777
+ }
2778
+
2779
+ boolean readStartsAreActivated(Graph * graph)
2780
+ {
2781
+ return graph->nodeReads != NULL;
2782
+ }
2783
+
2784
+ void activateReadStarts(Graph * graph)
2785
+ {
2786
+ graph->nodeReads =
2787
+ callocOrExit(2 * graph->nodeCount + 1, ShortReadMarker *);
2788
+ graph->nodeReadCounts =
2789
+ callocOrExit(2 * graph->nodeCount + 1, IDnum);
2790
+ }
2791
+
2792
+ void deactivateReadStarts(Graph * graph)
2793
+ {
2794
+ free(graph->nodeReads);
2795
+ free(graph->nodeReadCounts);
2796
+
2797
+ graph->nodeReads = NULL;
2798
+ graph->nodeReadCounts = NULL;
2799
+ }
2800
+
2801
+ boolean findIDnumInArray(IDnum query, IDnum * array, IDnum arrayLength)
2802
+ {
2803
+ IDnum leftIndex = 0;
2804
+ IDnum rightIndex = arrayLength;
2805
+ IDnum middleIndex;
2806
+
2807
+ if (arrayLength == 0)
2808
+ return false;
2809
+
2810
+ while (true) {
2811
+ middleIndex = leftIndex + (rightIndex - leftIndex) / 2;
2812
+
2813
+ if (array[middleIndex] == query)
2814
+ return true;
2815
+ else if (leftIndex >= rightIndex)
2816
+ return false;
2817
+ else if (array[middleIndex] > query)
2818
+ rightIndex = middleIndex;
2819
+ else if (leftIndex == middleIndex)
2820
+ leftIndex++;
2821
+ else
2822
+ leftIndex = middleIndex;
2823
+ }
2824
+ }
2825
+
2826
+ static inline int compareShortReadMarkers(const void *A, const void *B)
2827
+ {
2828
+ IDnum a = ((ShortReadMarker *) A)->readID;
2829
+ IDnum b = ((ShortReadMarker *) B)->readID;
2830
+
2831
+ if (a > b)
2832
+ return 1;
2833
+ if (a == b)
2834
+ return 0;
2835
+ return -1;
2836
+ }
2837
+
2838
+ static inline int compareIDnums(const void *A, const void *B)
2839
+ {
2840
+ IDnum a = *((IDnum *) A);
2841
+ IDnum b = *((IDnum *) B);
2842
+
2843
+ if (a > b)
2844
+ return 1;
2845
+ if (a == b)
2846
+ return 0;
2847
+ return -1;
2848
+ }
2849
+
2850
+ void incrementReadStartCount(Node * node, Graph * graph)
2851
+ {
2852
+ graph->nodeReadCounts[node->ID + graph->nodeCount]++;
2853
+ }
2854
+
2855
+ void createNodeReadStartArrays(Graph * graph)
2856
+ {
2857
+ IDnum index;
2858
+
2859
+ if (graph->nodeReads == NULL)
2860
+ return;
2861
+
2862
+ for (index = 0; index <= 2 * (graph->nodeCount); index++) {
2863
+ if (graph->nodeReadCounts[index] != 0) {
2864
+ graph->nodeReads[index] =
2865
+ mallocOrExit(graph->nodeReadCounts[index],
2866
+ ShortReadMarker);
2867
+ graph->nodeReadCounts[index] = 0;
2868
+ } else {
2869
+ graph->nodeReads[index] = NULL;
2870
+ }
2871
+ }
2872
+ }
2873
+
2874
+ void orderNodeReadStartArrays(Graph * graph)
2875
+ {
2876
+ IDnum index;
2877
+
2878
+ if (graph->nodeReads == NULL)
2879
+ return;
2880
+
2881
+ for (index = 0; index <= 2 * (graph->nodeCount); index++)
2882
+ if (graph->nodeReadCounts[index] != 0)
2883
+ qsort(graph->nodeReads[index],
2884
+ graph->nodeReadCounts[index],
2885
+ sizeof(ShortReadMarker),
2886
+ compareShortReadMarkers);
2887
+ }
2888
+
2889
+ void addReadStart(Node * node, IDnum seqID, Coordinate position,
2890
+ Graph * graph, Coordinate offset)
2891
+ {
2892
+ IDnum nodeIndex = getNodeID(node) + graph->nodeCount;
2893
+
2894
+ ShortReadMarker *array = graph->nodeReads[nodeIndex];
2895
+ IDnum arrayLength = graph->nodeReadCounts[nodeIndex];
2896
+
2897
+ array[arrayLength].readID = seqID;
2898
+ array[arrayLength].position = position;
2899
+ array[arrayLength].offset = (ShortLength) offset;
2900
+ // printf("node %d, seq %d, pos %ld, offset %ld\n", getNodeID(node), seqID, position, offset);
2901
+ graph->nodeReadCounts[nodeIndex]++;
2902
+ }
2903
+
2904
+ void blurLastShortReadMarker(Node * node, Graph * graph)
2905
+ {
2906
+ IDnum nodeIndex = getNodeID(node) + nodeCount(graph);
2907
+ IDnum index = graph->nodeReadCounts[nodeIndex] - 1;
2908
+ ShortReadMarker *marker;
2909
+
2910
+ if (index >= 0)
2911
+ marker = &(graph->nodeReads[nodeIndex][index]);
2912
+ else
2913
+ abort();
2914
+
2915
+ setShortReadMarkerPosition(marker, -1);
2916
+ }
2917
+
2918
+ ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
2919
+ IDnum * length)
2920
+ {
2921
+ IDnum targetID, targetLength, targetIndex, targetVal;
2922
+ IDnum sourceID, sourceLength, sourceIndex, sourceVal;
2923
+ IDnum mergeLength;
2924
+ ShortReadMarker *mergeArray, *targetArray, *sourceArray;
2925
+
2926
+ if (graph->nodeReads == NULL) {
2927
+ *length = 0;
2928
+ return NULL;
2929
+ }
2930
+
2931
+ if (nodeA == NULL || nodeB == NULL) {
2932
+ *length = 0;
2933
+ return NULL;
2934
+ }
2935
+
2936
+ targetID = getNodeID(nodeA) + graph->nodeCount;
2937
+ targetArray = graph->nodeReads[targetID];
2938
+ targetLength = graph->nodeReadCounts[targetID];
2939
+
2940
+ sourceID = getNodeID(nodeB) + graph->nodeCount;
2941
+ sourceArray = graph->nodeReads[sourceID];
2942
+ sourceLength = graph->nodeReadCounts[sourceID];
2943
+
2944
+ if (sourceArray == NULL || targetArray == NULL) {
2945
+ *length = 0;
2946
+ return NULL;
2947
+ }
2948
+
2949
+ mergeArray =
2950
+ mallocOrExit(sourceLength +
2951
+ targetLength, ShortReadMarker);
2952
+
2953
+ mergeLength = 0;
2954
+ sourceIndex = 0;
2955
+ targetIndex = 0;
2956
+ sourceVal = sourceArray[0].readID;
2957
+ targetVal = targetArray[0].readID;
2958
+
2959
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
2960
+ switch (compareIDnums(&sourceVal, &targetVal)) {
2961
+ case -1:
2962
+ mergeArray[mergeLength].readID = sourceVal;
2963
+ mergeArray[mergeLength].position = -1;
2964
+ mergeArray[mergeLength].offset = -1;
2965
+ mergeLength++;
2966
+ sourceIndex++;
2967
+ if (sourceIndex < sourceLength)
2968
+ sourceVal =
2969
+ sourceArray[sourceIndex].readID;
2970
+ break;
2971
+ case 0:
2972
+ mergeArray[mergeLength].readID = sourceVal;
2973
+ mergeArray[mergeLength].position = -1;
2974
+ mergeArray[mergeLength].offset = -1;
2975
+ mergeLength++;
2976
+ sourceIndex++;
2977
+ if (sourceIndex < sourceLength)
2978
+ sourceVal =
2979
+ sourceArray[sourceIndex].readID;
2980
+ targetIndex++;
2981
+ if (targetIndex < targetLength)
2982
+ targetVal =
2983
+ targetArray[targetIndex].readID;
2984
+ break;
2985
+ case 1:
2986
+ mergeArray[mergeLength].readID = targetVal;
2987
+ mergeArray[mergeLength].position = -1;
2988
+ mergeArray[mergeLength].offset = -1;
2989
+ mergeLength++;
2990
+ targetIndex++;
2991
+ if (targetIndex < targetLength)
2992
+ targetVal =
2993
+ targetArray[targetIndex].readID;
2994
+ }
2995
+ }
2996
+
2997
+ while (sourceIndex < sourceLength) {
2998
+ mergeArray[mergeLength].readID =
2999
+ sourceArray[sourceIndex].readID;
3000
+ mergeArray[mergeLength].position = -1;
3001
+ mergeArray[mergeLength].offset = -1;
3002
+ mergeLength++;
3003
+ sourceIndex++;
3004
+ }
3005
+
3006
+ while (targetIndex < targetLength) {
3007
+ mergeArray[mergeLength].readID =
3008
+ targetArray[targetIndex].readID;
3009
+ mergeArray[mergeLength].position = -1;
3010
+ mergeArray[mergeLength].offset = -1;
3011
+ mergeLength++;
3012
+ targetIndex++;
3013
+ }
3014
+
3015
+ *length = mergeLength;
3016
+ return mergeArray;
3017
+ }
3018
+
3019
+ ShortReadMarker *extractFrontOfNodeReads(Node * node,
3020
+ Coordinate breakpoint,
3021
+ Graph * graph, IDnum * length,
3022
+ PassageMarkerI sourceMarker,
3023
+ ShortLength * lengths)
3024
+ {
3025
+ IDnum sourceID;
3026
+ IDnum mergeLength, newLength, sourceLength;
3027
+ IDnum sourceIndex;
3028
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
3029
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3030
+ Coordinate finish;
3031
+ Coordinate revBreakpoint;
3032
+
3033
+ if (graph->nodeReads == NULL) {
3034
+ *length = 0;
3035
+ return NULL;
3036
+ }
3037
+
3038
+ if (node == NULL) {
3039
+ *length = 0;
3040
+ return NULL;
3041
+ }
3042
+
3043
+ if (breakpoint == 0) {
3044
+ return commonNodeReads(node,
3045
+ getTwinNode(getNode
3046
+ (getPreviousInSequence
3047
+ (sourceMarker))),
3048
+ graph, length);
3049
+ }
3050
+
3051
+ sourceID = getNodeID(node) + graph->nodeCount;
3052
+ sourceArray = graph->nodeReads[sourceID];
3053
+ sourceLength = graph->nodeReadCounts[sourceID];
3054
+
3055
+ if (sourceArray == NULL) {
3056
+ *length = 0;
3057
+ return NULL;
3058
+ }
3059
+
3060
+ revBreakpoint = node->length - breakpoint;
3061
+
3062
+ mergeLength = 0;
3063
+ newLength = 0;
3064
+ sourcePtr = sourceArray;
3065
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3066
+ if (sourcePtr->position == -1) {
3067
+ newLength++;
3068
+ mergeLength++;
3069
+ } else {
3070
+ finish =
3071
+ sourcePtr->position - sourcePtr->offset +
3072
+ lengths[sourcePtr->readID - 1];
3073
+ if (sourcePtr->position < revBreakpoint)
3074
+ newLength++;
3075
+ if (finish > revBreakpoint)
3076
+ mergeLength++;
3077
+ }
3078
+ sourcePtr++;
3079
+ }
3080
+
3081
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3082
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3083
+
3084
+ mergePtr = mergeArray;
3085
+ newPtr = newArray;
3086
+ sourcePtr = sourceArray;
3087
+ mergeLength = 0;
3088
+ newLength = 0;
3089
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3090
+ if (sourcePtr->position == -1) {
3091
+ mergePtr->readID = sourcePtr->readID;
3092
+ setShortReadMarkerPosition(mergePtr, -1);
3093
+ setShortReadMarkerOffset(mergePtr, -1);
3094
+ mergePtr++;
3095
+ mergeLength++;
3096
+ newPtr->readID = sourcePtr->readID;
3097
+ setShortReadMarkerPosition(newPtr, -1);
3098
+ setShortReadMarkerOffset(newPtr, -1);
3099
+ newPtr++;
3100
+ newLength++;
3101
+ } else {
3102
+ finish =
3103
+ sourcePtr->position - sourcePtr->offset +
3104
+ lengths[sourcePtr->readID - 1];
3105
+ if (sourcePtr->position < revBreakpoint) {
3106
+ newPtr->readID = sourcePtr->readID;
3107
+ setShortReadMarkerPosition(newPtr,
3108
+ sourcePtr->
3109
+ position);
3110
+ setShortReadMarkerOffset(newPtr,
3111
+ sourcePtr->
3112
+ offset);
3113
+ newPtr++;
3114
+ newLength++;
3115
+
3116
+ // Saddle back reads:
3117
+ if (finish > revBreakpoint) {
3118
+ mergePtr->readID =
3119
+ sourcePtr->readID;
3120
+ setShortReadMarkerPosition
3121
+ (mergePtr, 0);
3122
+ setShortReadMarkerOffset(mergePtr,
3123
+ sourcePtr->
3124
+ offset +
3125
+ revBreakpoint
3126
+ -
3127
+ sourcePtr->
3128
+ position);
3129
+ mergePtr++;
3130
+ }
3131
+ } else if (finish > revBreakpoint) {
3132
+ mergePtr->readID = sourcePtr->readID;
3133
+ setShortReadMarkerPosition(mergePtr,
3134
+ sourcePtr->
3135
+ position - revBreakpoint);
3136
+ setShortReadMarkerOffset(mergePtr,
3137
+ sourcePtr->
3138
+ offset);
3139
+ mergePtr++;
3140
+ mergeLength++;
3141
+ }
3142
+ }
3143
+
3144
+ sourcePtr++;
3145
+ }
3146
+
3147
+ free(sourceArray);
3148
+ graph->nodeReads[sourceID] = newArray;
3149
+ graph->nodeReadCounts[sourceID] = newLength;
3150
+
3151
+ *length = mergeLength;
3152
+ return mergeArray;
3153
+ }
3154
+
3155
+ ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
3156
+ Graph * graph, IDnum * length,
3157
+ PassageMarkerI sourceMarker,
3158
+ ShortLength * lengths)
3159
+ {
3160
+ IDnum sourceID;
3161
+ IDnum mergeLength, newLength, sourceLength;
3162
+ IDnum sourceIndex;
3163
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
3164
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3165
+ Coordinate finish;
3166
+
3167
+ if (graph->nodeReads == NULL) {
3168
+ *length = 0;
3169
+ return NULL;
3170
+ }
3171
+
3172
+ if (node == NULL) {
3173
+ *length = 0;
3174
+ return NULL;
3175
+ }
3176
+
3177
+ if (breakpoint == 0) {
3178
+ return
3179
+ commonNodeReads(getNode
3180
+ (getPreviousInSequence(sourceMarker)),
3181
+ node, graph, length);
3182
+ }
3183
+
3184
+ sourceID = getNodeID(node) + graph->nodeCount;
3185
+ sourceArray = graph->nodeReads[sourceID];
3186
+ sourceLength = graph->nodeReadCounts[sourceID];
3187
+
3188
+ if (sourceArray == NULL) {
3189
+ *length = 0;
3190
+ return NULL;
3191
+ }
3192
+
3193
+ mergeLength = 0;
3194
+ newLength = 0;
3195
+ sourcePtr = sourceArray;
3196
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3197
+ if (sourcePtr->position == -1) {
3198
+ mergeLength++;
3199
+ newLength++;
3200
+ } else {
3201
+ finish =
3202
+ sourcePtr->position - sourcePtr->offset +
3203
+ lengths[sourcePtr->readID - 1];
3204
+ if (sourcePtr->position < breakpoint)
3205
+ mergeLength++;
3206
+ if (finish > breakpoint)
3207
+ newLength++;
3208
+ }
3209
+ sourcePtr++;
3210
+ }
3211
+
3212
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3213
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3214
+
3215
+ mergePtr = mergeArray;
3216
+ newPtr = newArray;
3217
+ sourcePtr = sourceArray;
3218
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3219
+ if (sourcePtr->position == -1) {
3220
+ mergePtr->readID = sourcePtr->readID;
3221
+ setShortReadMarkerPosition(mergePtr, -1);
3222
+ setShortReadMarkerOffset(mergePtr, -1);
3223
+ mergePtr++;
3224
+
3225
+ newPtr->readID = sourcePtr->readID;
3226
+ setShortReadMarkerPosition(newPtr, -1);
3227
+ setShortReadMarkerOffset(newPtr, -1);
3228
+ newPtr++;
3229
+
3230
+ sourcePtr++;
3231
+ continue;
3232
+ } else {
3233
+ finish =
3234
+ sourcePtr->position - sourcePtr->offset +
3235
+ lengths[sourcePtr->readID - 1];
3236
+
3237
+ if (sourcePtr->position < breakpoint) {
3238
+ mergePtr->readID = sourcePtr->readID;
3239
+ setShortReadMarkerPosition(mergePtr,
3240
+ sourcePtr->
3241
+ position);
3242
+ setShortReadMarkerOffset(mergePtr,
3243
+ sourcePtr->
3244
+ offset);
3245
+ mergePtr++;
3246
+
3247
+ // Saddle back reads:
3248
+ if (finish > breakpoint) {
3249
+ newPtr->readID = sourcePtr->readID;
3250
+ setShortReadMarkerPosition(newPtr,
3251
+ 0);
3252
+ setShortReadMarkerOffset(newPtr,
3253
+ sourcePtr->
3254
+ offset +
3255
+ breakpoint
3256
+ -
3257
+ sourcePtr->
3258
+ position);
3259
+ newPtr++;
3260
+ }
3261
+ } else if (finish > breakpoint) {
3262
+ newPtr->readID = sourcePtr->readID;
3263
+ setShortReadMarkerPosition(newPtr,
3264
+ sourcePtr->
3265
+ position -
3266
+ breakpoint);
3267
+ setShortReadMarkerOffset(newPtr,
3268
+ sourcePtr->
3269
+ offset);
3270
+ newPtr++;
3271
+ }
3272
+ }
3273
+
3274
+ sourcePtr++;
3275
+ }
3276
+
3277
+ free(sourceArray);
3278
+ graph->nodeReads[sourceID] = newArray;
3279
+ graph->nodeReadCounts[sourceID] = newLength;
3280
+
3281
+ *length = mergeLength;
3282
+ return mergeArray;
3283
+ }
3284
+
3285
+ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
3286
+ Graph * graph)
3287
+ {
3288
+ IDnum targetID, targetLength, targetIndex, targetVal;
3289
+ IDnum sourceLength, sourceIndex, sourceVal;
3290
+ IDnum mergeLength;
3291
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray;
3292
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr;
3293
+ Coordinate targetPosition;
3294
+ //ShortLength nodeLength = (ShortLength) getNodeLength(node);
3295
+ ShortLength targetOffset;
3296
+
3297
+ if (graph->nodeReads == NULL || reads == NULL || node == NULL)
3298
+ return;
3299
+
3300
+ targetID = getNodeID(node) + graph->nodeCount;
3301
+ targetArray = graph->nodeReads[targetID];
3302
+ targetLength = graph->nodeReadCounts[targetID];
3303
+ targetPtr = targetArray;
3304
+
3305
+ sourceArray = reads;
3306
+ sourceLength = readCount;
3307
+ sourcePtr = sourceArray;
3308
+
3309
+ if (targetArray == NULL) {
3310
+ mergeArray =
3311
+ mallocOrExit(sourceLength, ShortReadMarker);
3312
+ mergePtr = mergeArray;
3313
+
3314
+ sourceIndex = 0;
3315
+ while (sourceIndex < sourceLength) {
3316
+ mergePtr->readID = sourcePtr->readID;
3317
+ setShortReadMarkerPosition(mergePtr, -1);
3318
+ setShortReadMarkerOffset(mergePtr, -1);
3319
+ mergePtr++;
3320
+ sourcePtr++;
3321
+ sourceIndex++;
3322
+ }
3323
+
3324
+ graph->nodeReads[targetID] = mergeArray;
3325
+ graph->nodeReadCounts[targetID] = sourceLength;
3326
+ return;
3327
+ }
3328
+
3329
+ mergeArray =
3330
+ mallocOrExit(sourceLength +
3331
+ targetLength, ShortReadMarker);
3332
+ mergePtr = mergeArray;
3333
+
3334
+ mergeLength = 0;
3335
+ sourceIndex = 0;
3336
+ targetIndex = 0;
3337
+ sourceVal = sourcePtr->readID;
3338
+ targetVal = targetPtr->readID;
3339
+ targetPosition = targetPtr->position;
3340
+ targetOffset = targetPtr->offset;
3341
+
3342
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3343
+ if (sourceVal < targetVal) {
3344
+ mergePtr->readID = sourceVal;
3345
+ setShortReadMarkerPosition(mergePtr, -1);
3346
+ setShortReadMarkerOffset(mergePtr, -1);
3347
+ sourceIndex++;
3348
+ sourcePtr++;
3349
+ if (sourceIndex < sourceLength)
3350
+ sourceVal = sourcePtr->readID;
3351
+ } else if (sourceVal == targetVal) {
3352
+ mergePtr->readID = sourceVal;
3353
+ setShortReadMarkerPosition(mergePtr, -1);
3354
+ setShortReadMarkerOffset(mergePtr, -1);
3355
+ sourceIndex++;
3356
+ sourcePtr++;
3357
+ if (sourceIndex < sourceLength)
3358
+ sourceVal = sourcePtr->readID;
3359
+ targetIndex++;
3360
+ targetPtr++;
3361
+ if (targetIndex < targetLength) {
3362
+ targetVal = targetPtr->readID;
3363
+ targetPosition = targetPtr->position;
3364
+ targetOffset = targetPtr->offset;
3365
+ }
3366
+ } else {
3367
+ mergePtr->readID = targetVal;
3368
+ setShortReadMarkerPosition(mergePtr,
3369
+ targetPosition);
3370
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3371
+ targetIndex++;
3372
+ targetPtr++;
3373
+ if (targetIndex < targetLength) {
3374
+ targetVal = targetPtr->readID;
3375
+ targetPosition = targetPtr->position;
3376
+ targetOffset = targetPtr->offset;
3377
+ }
3378
+ }
3379
+
3380
+ mergeLength++;
3381
+ mergePtr++;
3382
+ }
3383
+
3384
+ while (sourceIndex < sourceLength) {
3385
+ mergePtr->readID = sourcePtr->readID;
3386
+ setShortReadMarkerPosition(mergePtr, -1);
3387
+ setShortReadMarkerOffset(mergePtr, -1);
3388
+ mergeLength++;
3389
+ mergePtr++;
3390
+ sourceIndex++;
3391
+ sourcePtr++;
3392
+ }
3393
+
3394
+ while (targetIndex < targetLength) {
3395
+ mergePtr->readID = targetPtr->readID;
3396
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3397
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3398
+ mergeLength++;
3399
+ mergePtr++;
3400
+ targetIndex++;
3401
+ targetPtr++;
3402
+ }
3403
+
3404
+ free(targetArray);
3405
+ graph->nodeReads[targetID] = mergeArray;
3406
+ graph->nodeReadCounts[targetID] = mergeLength;
3407
+ }
3408
+
3409
+ static inline Coordinate min(Coordinate A, Coordinate B)
3410
+ {
3411
+ return A < B ? A : B;
3412
+ }
3413
+
3414
+ static inline ShortLength min_short(ShortLength A, ShortLength B)
3415
+ {
3416
+ return A < B ? A : B;
3417
+ }
3418
+
3419
+ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
3420
+ Node * target, Graph * graph)
3421
+ {
3422
+ IDnum targetID = getNodeID(target) + graph->nodeCount;
3423
+ ShortReadMarker *targetArray = graph->nodeReads[targetID];
3424
+ IDnum targetLength = graph->nodeReadCounts[targetID];
3425
+ ShortReadMarker *targetPtr = targetArray;
3426
+ ShortReadMarker *sourcePtr = sourceArray;
3427
+ ShortReadMarker *mergeArray, *mergePtr;
3428
+ IDnum mergeLength;
3429
+ Coordinate targetPosition, sourcePosition;
3430
+ ShortLength targetOffset, sourceOffset;
3431
+ IDnum targetIndex, targetVal, sourceIndex, sourceVal;
3432
+
3433
+ if (sourceLength == 0) {
3434
+ free(sourceArray);
3435
+ return;
3436
+ }
3437
+
3438
+ if (targetLength == 0) {
3439
+ free(targetArray);
3440
+ graph->nodeReads[targetID] = sourceArray;
3441
+ graph->nodeReadCounts[targetID] = sourceLength;
3442
+ return;
3443
+ }
3444
+
3445
+ mergeArray =
3446
+ mallocOrExit(sourceLength +
3447
+ targetLength, ShortReadMarker);
3448
+ mergePtr = mergeArray;
3449
+
3450
+ mergeLength = 0;
3451
+ sourceIndex = 0;
3452
+ targetIndex = 0;
3453
+ targetVal = targetPtr->readID;
3454
+ targetPosition = targetPtr->position;
3455
+ targetOffset = targetPtr->offset;
3456
+ sourceVal = sourcePtr->readID;
3457
+ sourcePosition = sourcePtr->position;
3458
+ sourceOffset = sourcePtr->offset;
3459
+
3460
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3461
+ if (sourceVal < targetVal) {
3462
+ mergePtr->readID = sourceVal;
3463
+ setShortReadMarkerPosition(mergePtr,
3464
+ sourcePosition);
3465
+ setShortReadMarkerOffset(mergePtr, sourceOffset);
3466
+ sourceIndex++;
3467
+ if (sourceIndex < sourceLength) {
3468
+ sourcePtr++;
3469
+ sourceVal = sourcePtr->readID;
3470
+ sourcePosition = sourcePtr->position;
3471
+ sourceOffset = sourcePtr->offset;
3472
+ }
3473
+ } else if (sourceVal == targetVal) {
3474
+ mergePtr->readID = sourceVal;
3475
+ if (sourcePosition == -1 && targetPosition == -1) {
3476
+ setShortReadMarkerPosition(mergePtr, -1);
3477
+ setShortReadMarkerOffset(mergePtr, -1);
3478
+ } else if (sourcePosition == -1) {
3479
+ setShortReadMarkerPosition(mergePtr,
3480
+ targetPosition);
3481
+ setShortReadMarkerOffset(mergePtr,
3482
+ targetOffset);
3483
+ } else if (targetPosition == -1) {
3484
+ setShortReadMarkerPosition(mergePtr,
3485
+ sourcePosition);
3486
+ setShortReadMarkerOffset(mergePtr,
3487
+ sourceOffset);
3488
+ } else {
3489
+ setShortReadMarkerPosition(mergePtr,
3490
+ min
3491
+ (sourcePosition,
3492
+ targetPosition));
3493
+ setShortReadMarkerOffset(mergePtr,
3494
+ min_short
3495
+ (sourceOffset,
3496
+ targetOffset));
3497
+ }
3498
+ sourceIndex++;
3499
+ if (sourceIndex < sourceLength) {
3500
+ sourcePtr++;
3501
+ sourceVal = sourcePtr->readID;
3502
+ sourcePosition = sourcePtr->position;
3503
+ sourceOffset = sourcePtr->offset;
3504
+ }
3505
+ targetIndex++;
3506
+ if (targetIndex < targetLength) {
3507
+ targetPtr++;
3508
+ targetVal = targetPtr->readID;
3509
+ targetPosition = targetPtr->position;
3510
+ targetOffset = targetPtr->offset;
3511
+ }
3512
+ } else {
3513
+ mergePtr->readID = targetVal;
3514
+ setShortReadMarkerPosition(mergePtr,
3515
+ targetPosition);
3516
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3517
+ targetIndex++;
3518
+ if (targetIndex < targetLength) {
3519
+ targetPtr++;
3520
+ targetVal = targetPtr->readID;
3521
+ targetPosition = targetPtr->position;
3522
+ targetOffset = targetPtr->offset;
3523
+ }
3524
+ }
3525
+
3526
+ mergeLength++;
3527
+ mergePtr++;
3528
+ }
3529
+
3530
+ while (sourceIndex < sourceLength) {
3531
+ mergePtr->readID = sourcePtr->readID;
3532
+ setShortReadMarkerPosition(mergePtr, sourcePtr->position);
3533
+ setShortReadMarkerOffset(mergePtr, sourcePtr->offset);
3534
+ mergeLength++;
3535
+ mergePtr++;
3536
+ sourceIndex++;
3537
+ sourcePtr++;
3538
+ }
3539
+
3540
+ while (targetIndex < targetLength) {
3541
+ mergePtr->readID = targetPtr->readID;
3542
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3543
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3544
+ mergeLength++;
3545
+ mergePtr++;
3546
+ targetIndex++;
3547
+ targetPtr++;
3548
+ }
3549
+
3550
+ free(targetArray);
3551
+ graph->nodeReads[targetID] = mergeArray;
3552
+ graph->nodeReadCounts[targetID] = mergeLength;
3553
+
3554
+ free(sourceArray);
3555
+ }
3556
+
3557
+ void mergeNodeReads(Node * target, Node * source, Graph * graph)
3558
+ {
3559
+ IDnum sourceID, sourceLength;
3560
+ ShortReadMarker *sourceArray;
3561
+
3562
+ if (graph->nodeReads == NULL || source == NULL || target == NULL)
3563
+ return;
3564
+
3565
+ sourceID = getNodeID(source) + graph->nodeCount;
3566
+ sourceArray = graph->nodeReads[sourceID];
3567
+ sourceLength = graph->nodeReadCounts[sourceID];
3568
+
3569
+ if (sourceArray == NULL)
3570
+ return;
3571
+
3572
+ graph->nodeReads[sourceID] = NULL;
3573
+ graph->nodeReadCounts[sourceID] = 0;
3574
+
3575
+ injectShortReads(sourceArray, sourceLength, target, graph);
3576
+ }
3577
+
3578
+ void foldSymmetricalNodeReads(Node * node, Graph * graph)
3579
+ {
3580
+ IDnum targetID, targetLength, targetIndex;
3581
+ IDnum sourceID, sourceLength, sourceIndex;
3582
+ IDnum targetVal = 0;
3583
+ IDnum sourceVal = 0;
3584
+ IDnum mergeLength;
3585
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray,
3586
+ *mergeArray2;
3587
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr, *mergePtr2;
3588
+
3589
+ if (graph->nodeReads == NULL || node == NULL)
3590
+ return;
3591
+
3592
+ sourceID = getNodeID(node) + graph->nodeCount;
3593
+ sourceArray = graph->nodeReads[sourceID];
3594
+ sourceLength = graph->nodeReadCounts[sourceID];
3595
+ sourcePtr = sourceArray;
3596
+
3597
+ targetID = -getNodeID(node) + graph->nodeCount;
3598
+ targetArray = graph->nodeReads[targetID];
3599
+ targetLength = graph->nodeReadCounts[targetID];
3600
+ targetPtr = targetArray;
3601
+
3602
+ if (sourceArray == NULL && targetArray == NULL)
3603
+ return;
3604
+
3605
+ mergeArray =
3606
+ mallocOrExit(sourceLength +
3607
+ targetLength, ShortReadMarker);
3608
+ mergeArray2 =
3609
+ mallocOrExit(sourceLength +
3610
+ targetLength, ShortReadMarker);
3611
+ mergePtr = mergeArray;
3612
+ mergePtr2 = mergeArray2;
3613
+
3614
+ mergeLength = 0;
3615
+ sourceIndex = 0;
3616
+ targetIndex = 0;
3617
+ if (targetIndex < targetLength)
3618
+ targetVal = targetPtr->readID;
3619
+ if (sourceIndex < sourceLength)
3620
+ sourceVal = sourcePtr->readID;
3621
+
3622
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3623
+ if (sourceVal < targetVal) {
3624
+ mergePtr->readID = sourceVal;
3625
+ setShortReadMarkerPosition(mergePtr, -1);
3626
+ setShortReadMarkerOffset(mergePtr, -1);
3627
+ mergePtr2->readID = sourceVal;
3628
+ setShortReadMarkerPosition(mergePtr2, -1);
3629
+ setShortReadMarkerOffset(mergePtr2, -1);
3630
+ sourceIndex++;
3631
+ sourcePtr++;
3632
+ if (sourceIndex < sourceLength)
3633
+ sourceVal = sourcePtr->readID;
3634
+ } else if (sourceVal == targetVal) {
3635
+ mergePtr->readID = sourceVal;
3636
+ setShortReadMarkerPosition(mergePtr, -1);
3637
+ setShortReadMarkerOffset(mergePtr, -1);
3638
+ mergePtr2->readID = sourceVal;
3639
+ setShortReadMarkerPosition(mergePtr2, -1);
3640
+ setShortReadMarkerOffset(mergePtr2, -1);
3641
+ sourceIndex++;
3642
+ sourcePtr++;
3643
+ if (sourceIndex < sourceLength)
3644
+ sourceVal = sourcePtr->readID;
3645
+ targetIndex++;
3646
+ targetPtr++;
3647
+ if (targetIndex < targetLength)
3648
+ targetVal = targetPtr->readID;
3649
+ } else {
3650
+ mergePtr->readID = targetVal;
3651
+ setShortReadMarkerPosition(mergePtr, -1);
3652
+ setShortReadMarkerOffset(mergePtr, -1);
3653
+ mergePtr2->readID = targetVal;
3654
+ setShortReadMarkerPosition(mergePtr2, -1);
3655
+ setShortReadMarkerOffset(mergePtr2, -1);
3656
+ targetIndex++;
3657
+ targetPtr++;
3658
+ if (targetIndex < targetLength)
3659
+ targetVal = targetPtr->readID;
3660
+ }
3661
+
3662
+ mergeLength++;
3663
+ mergePtr++;
3664
+ mergePtr2++;
3665
+ }
3666
+
3667
+ while (sourceIndex < sourceLength) {
3668
+ mergePtr->readID = sourcePtr->readID;
3669
+ setShortReadMarkerPosition(mergePtr, -1);
3670
+ setShortReadMarkerOffset(mergePtr, -1);
3671
+ mergePtr2->readID = sourcePtr->readID;
3672
+ setShortReadMarkerPosition(mergePtr2, -1);
3673
+ setShortReadMarkerOffset(mergePtr2, -1);
3674
+ mergeLength++;
3675
+ mergePtr++;
3676
+ mergePtr2++;
3677
+ sourceIndex++;
3678
+ sourcePtr++;
3679
+ }
3680
+
3681
+ while (targetIndex < targetLength) {
3682
+ mergePtr->readID = targetPtr->readID;
3683
+ setShortReadMarkerPosition(mergePtr, -1);
3684
+ setShortReadMarkerOffset(mergePtr, -1);
3685
+ mergePtr2->readID = targetPtr->readID;
3686
+ setShortReadMarkerPosition(mergePtr2, -1);
3687
+ setShortReadMarkerOffset(mergePtr2, -1);
3688
+ mergeLength++;
3689
+ mergePtr++;
3690
+ mergePtr2++;
3691
+ targetIndex++;
3692
+ targetPtr++;
3693
+ }
3694
+
3695
+ free(targetArray);
3696
+ graph->nodeReads[targetID] = mergeArray;
3697
+ graph->nodeReadCounts[targetID] = mergeLength;
3698
+
3699
+ free(sourceArray);
3700
+ graph->nodeReads[sourceID] = mergeArray2;
3701
+ graph->nodeReadCounts[sourceID] = mergeLength;
3702
+ }
3703
+
3704
+ void shareReadStarts(Node * target, Node * source, Graph * graph)
3705
+ {
3706
+ ShortReadMarker *sourceArray;
3707
+ IDnum sourceLength, sourceID;
3708
+
3709
+ if (graph->nodeReads == NULL)
3710
+ return;
3711
+
3712
+ if (target == NULL || source == NULL)
3713
+ return;
3714
+
3715
+ sourceID = source->ID + graph->nodeCount;
3716
+ sourceArray = graph->nodeReads[sourceID];
3717
+ sourceLength = graph->nodeReadCounts[sourceID];
3718
+
3719
+ if (sourceArray == NULL)
3720
+ return;
3721
+
3722
+ spreadReadIDs(sourceArray, sourceLength, target, graph);
3723
+ }
3724
+
3725
+ ShortReadMarker **getNodeToReadMappings(Graph * graph)
3726
+ {
3727
+ return graph->nodeReads;
3728
+ }
3729
+
3730
+ IDnum getShortReadMarkerID(ShortReadMarker * marker)
3731
+ {
3732
+ return marker->readID;
3733
+ }
3734
+
3735
+ inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker)
3736
+ {
3737
+ return marker->offset;
3738
+ }
3739
+
3740
+ inline void setShortReadMarkerOffset(ShortReadMarker * marker,
3741
+ ShortLength offset)
3742
+ {
3743
+ marker->offset = offset;
3744
+ }
3745
+
3746
+ IDnum *getNodeReadCounts(Graph * graph)
3747
+ {
3748
+ return graph->nodeReadCounts;
3749
+ }
3750
+
3751
+ int getWordLength(Graph * graph)
3752
+ {
3753
+ return graph->wordLength;
3754
+ }
3755
+
3756
+ ShortReadMarker *getNodeReads(Node * node, Graph * graph)
3757
+ {
3758
+ IDnum id = node->ID + graph->nodeCount;
3759
+ return graph->nodeReads[id];
3760
+ }
3761
+
3762
+ IDnum getNodeReadCount(Node * node, Graph * graph)
3763
+ {
3764
+ IDnum id = node->ID + graph->nodeCount;
3765
+ return graph->nodeReadCounts[id];
3766
+ }
3767
+
3768
+ inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker)
3769
+ {
3770
+ return marker->position;
3771
+ }
3772
+
3773
+ inline void setShortReadMarkerPosition(ShortReadMarker * marker,
3774
+ Coordinate position)
3775
+ {
3776
+ if (position < -100)
3777
+ return;
3778
+
3779
+ marker->position = position;
3780
+ }
3781
+
3782
+ ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
3783
+ IDnum index)
3784
+ {
3785
+ return &(array[index]);
3786
+ }
3787
+
3788
+ void destroyGraph(Graph * graph)
3789
+ {
3790
+ IDnum index;
3791
+ Node *node;
3792
+ for (index = 1; index <= graph->nodeCount; index++) {
3793
+ node = getNodeInGraph(graph, index);
3794
+ if (node != NULL)
3795
+ destroyNode(node, graph);
3796
+ }
3797
+
3798
+ if (graph->gapMarkers)
3799
+ deactivateGapMarkers(graph);
3800
+
3801
+ free(graph->nodes);
3802
+ destroyRecycleBin(nodeMemory);
3803
+ destroyRecycleBin(arcMemory);
3804
+ destroyAllPassageMarkers();
3805
+ free(graph->arcLookupTable);
3806
+ free(graph->nodeReads);
3807
+ free(graph->nodeReadCounts);
3808
+ free(graph);
3809
+ }
3810
+
3811
+ void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
3812
+ Coordinate insertLength_std_dev)
3813
+ {
3814
+ graph->insertLengths[cat] = insertLength;
3815
+ graph->insertLengths_var[cat] =
3816
+ insertLength_std_dev * insertLength_std_dev;
3817
+ }
3818
+
3819
+ Coordinate getInsertLength(Graph * graph, Category cat)
3820
+ {
3821
+ return graph->insertLengths[cat / 2];
3822
+ }
3823
+
3824
+ double getInsertLength_var(Graph * graph, Category cat)
3825
+ {
3826
+ return graph->insertLengths_var[cat / 2];
3827
+ }
3828
+
3829
+ void activateGapMarkers(Graph * graph)
3830
+ {
3831
+ graph->gapMarkers =
3832
+ callocOrExit(graph->nodeCount + 1, GapMarker *);
3833
+ gapMarkerMemory = newRecycleBin(sizeof(GapMarker), GAPBLOCKSIZE);
3834
+ }
3835
+
3836
+ void deactivateGapMarkers(Graph * graph)
3837
+ {
3838
+ free(graph->gapMarkers);
3839
+ graph->gapMarkers = NULL;
3840
+ destroyRecycleBin(gapMarkerMemory);
3841
+ gapMarkerMemory = NULL;
3842
+ }
3843
+
3844
+ static GapMarker *allocateGapMarker()
3845
+ {
3846
+ return (GapMarker *) allocatePointer(gapMarkerMemory);
3847
+ }
3848
+
3849
+ void appendGap(Node * node, Coordinate length, Graph * graph)
3850
+ {
3851
+ IDnum nodeID = getNodeID(node);
3852
+ GapMarker *marker = allocateGapMarker();
3853
+ GapMarker *tmp;
3854
+
3855
+ marker->length = length;
3856
+
3857
+ if (nodeID > 0) {
3858
+ marker->position = node->length;
3859
+ marker->next = graph->gapMarkers[nodeID];
3860
+ graph->gapMarkers[nodeID] = marker;
3861
+ } else {
3862
+ for (tmp = graph->gapMarkers[-nodeID]; tmp != NULL;
3863
+ tmp = tmp->next)
3864
+ tmp->position += length;
3865
+
3866
+ marker->position = 0;
3867
+ marker->next = graph->gapMarkers[-nodeID];
3868
+ graph->gapMarkers[-nodeID] = marker;
3869
+ }
3870
+
3871
+ addBufferToDescriptor(node, length);
3872
+ }
3873
+
3874
+ void appendNodeGaps(Node * destination, Node * source, Graph * graph)
3875
+ {
3876
+ IDnum destinationID = getNodeID(destination);
3877
+ IDnum sourceID = getNodeID(source);
3878
+ GapMarker *marker;
3879
+
3880
+ if (graph->gapMarkers == NULL)
3881
+ return;
3882
+
3883
+ if (destinationID > 0 && sourceID > 0) {
3884
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3885
+ marker = marker->next)
3886
+ marker->position += destination->length;
3887
+ } else if (destinationID > 0 && sourceID < 0) {
3888
+ sourceID = -sourceID;
3889
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3890
+ marker = marker->next)
3891
+ marker->position =
3892
+ source->length + destination->length -
3893
+ marker->position - marker->length;
3894
+ } else if (destinationID < 0 && sourceID > 0) {
3895
+ destinationID = -destinationID;
3896
+ for (marker = graph->gapMarkers[destinationID];
3897
+ marker != NULL; marker = marker->next)
3898
+ marker->position += source->length;
3899
+
3900
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3901
+ marker = marker->next)
3902
+ marker->position =
3903
+ source->length - marker->position -
3904
+ marker->length;
3905
+ } else {
3906
+ destinationID = -destinationID;
3907
+ sourceID = -sourceID;
3908
+ for (marker = graph->gapMarkers[destinationID];
3909
+ marker != NULL; marker = marker->next)
3910
+ marker->position += source->length;
3911
+ }
3912
+
3913
+ if (graph->gapMarkers[destinationID] == NULL)
3914
+ graph->gapMarkers[destinationID] =
3915
+ graph->gapMarkers[sourceID];
3916
+ else {
3917
+ marker = graph->gapMarkers[destinationID];
3918
+ while (marker->next != NULL)
3919
+ marker = marker->next;
3920
+ marker->next = graph->gapMarkers[sourceID];
3921
+ }
3922
+
3923
+ graph->gapMarkers[sourceID] = NULL;
3924
+ }
3925
+
3926
+ GapMarker *getGap(Node * node, Graph * graph)
3927
+ {
3928
+ IDnum nodeID = getNodeID(node);
3929
+
3930
+ if (graph->gapMarkers == NULL)
3931
+ return NULL;
3932
+
3933
+ if (nodeID < 0)
3934
+ nodeID = -nodeID;
3935
+
3936
+ return graph->gapMarkers[nodeID];
3937
+ }
3938
+
3939
+ GapMarker *getNextGap(GapMarker * marker)
3940
+ {
3941
+ return marker->next;
3942
+ }
3943
+
3944
+ Coordinate getGapStart(GapMarker * marker)
3945
+ {
3946
+ return marker->position;
3947
+ }
3948
+
3949
+ Coordinate getGapFinish(GapMarker * marker)
3950
+ {
3951
+ return marker->position + marker->length;
3952
+ }
3953
+
3954
+ void reallocateNodeDescriptor(Node * node, Coordinate length) {
3955
+ Coordinate arrayLength, index, shift;
3956
+ Node * twin = node->twinNode;
3957
+ Descriptor * array;
3958
+ Nucleotide nucleotide;
3959
+
3960
+ if (length < node->length)
3961
+ exitErrorf(EXIT_FAILURE, true, "Sum of node lengths smaller than first!");
3962
+
3963
+ shift = length - node->length;
3964
+
3965
+ arrayLength = length / 4;
3966
+ if (length % 4)
3967
+ arrayLength++;
3968
+
3969
+ node->descriptor = reallocOrExit(node->descriptor, arrayLength, Descriptor);
3970
+
3971
+ array = callocOrExit(arrayLength, Descriptor);
3972
+ for (index = node->length - 1; index >= 0; index--) {
3973
+ nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
3974
+ writeNucleotideInDescriptor(nucleotide, array, index + shift);
3975
+ }
3976
+
3977
+ free(twin->descriptor);
3978
+ twin->descriptor = array;
3979
+ }
3980
+
3981
+ boolean doubleStrandedGraph(Graph * graph) {
3982
+ return graph->double_stranded;
3983
+ }