finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,3932 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+
25
+ #include "globals.h"
26
+ #include "graph.h"
27
+ #include "recycleBin.h"
28
+ #include "tightString.h"
29
+ #include "passageMarker.h"
30
+ #include "utility.h"
31
+ #include "kmer.h"
32
+
33
+ #include "graphStructures.h"
34
+
35
+ #define ADENINE 0
36
+ #define CYTOSINE 1
37
+ #define GUANINE 2
38
+ #define THYMINE 3
39
+
40
+ static RecycleBin *arcMemory = NULL;
41
+ static RecycleBin *nodeMemory = NULL;
42
+ static RecycleBin *gapMarkerMemory = NULL;
43
+
44
+ #define BLOCKSIZE 50
45
+ #define GAPBLOCKSIZE 10000
46
+
47
+ Arc *allocateArc()
48
+ {
49
+ if (arcMemory == NULL)
50
+ arcMemory = newRecycleBin(sizeof(Arc), BLOCKSIZE);
51
+
52
+ return allocatePointer(arcMemory);
53
+ }
54
+
55
+ void deallocateArc(Arc * arc)
56
+ {
57
+ deallocatePointer(arcMemory, arc);
58
+ }
59
+
60
+ Node *allocateNode()
61
+ {
62
+ if (nodeMemory == NULL)
63
+ nodeMemory = newRecycleBin(sizeof(Node), BLOCKSIZE);
64
+
65
+ return (Node *) allocatePointer(nodeMemory);
66
+ }
67
+
68
+ void deallocateNode(Node * node)
69
+ {
70
+ deallocatePointer(nodeMemory, node);
71
+ }
72
+
73
+ // Returns the twin node of a given node
74
+ Node *getTwinNode(Node * node)
75
+ {
76
+ return node->twinNode;
77
+ }
78
+
79
+ // Inserts new passage marker in the marker list of destination node
80
+ void insertPassageMarker(PassageMarkerI marker, Node * destination)
81
+ {
82
+ setTopOfTheNode(marker);
83
+ setNextInNode(marker, destination->marker);
84
+ destination->marker = marker;
85
+ }
86
+
87
+ // Returns the length of the node's descriptor list
88
+ Coordinate getNodeLength(Node * node)
89
+ {
90
+ return node->length;
91
+ }
92
+
93
+ // Returns the number of nodes in the graph
94
+ IDnum nodeCount(Graph * graph)
95
+ {
96
+ return graph->nodeCount;
97
+ }
98
+
99
+ // returns the number of sequences used to buid the graph
100
+ IDnum sequenceCount(Graph * graph)
101
+ {
102
+ return graph->sequenceCount;
103
+ }
104
+
105
+ // Creates an arc from node origin to node destination.
106
+ // If this arc already exists, increments its multiplicity by 1.
107
+ Arc *createArc(Node * originNode, Node * destinationNode, Graph * graph)
108
+ {
109
+ Arc *arc, *twinArc;
110
+ Node *destinationTwin;
111
+ IDnum lookupIndex;
112
+
113
+ if (originNode == NULL || destinationNode == NULL)
114
+ return NULL;
115
+
116
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
117
+
118
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
119
+
120
+ if (arc != NULL) {
121
+ arc->multiplicity++;
122
+ arc->twinArc->multiplicity++;
123
+ return arc;
124
+ }
125
+ // If not found
126
+ #ifdef _OPENMP
127
+ #pragma omp critical
128
+ #endif
129
+ arc = allocateArc();
130
+ arc->destination = destinationNode;
131
+ arc->multiplicity = 1;
132
+ arc->previous = NULL;
133
+ arc->next = originNode->arc;
134
+ if (originNode->arc != NULL)
135
+ originNode->arc->previous = arc;
136
+ originNode->arc = arc;
137
+ originNode->arcCount++;
138
+
139
+ destinationTwin = destinationNode->twinNode;
140
+
141
+ // Hairpin case
142
+ if (destinationTwin == originNode) {
143
+ arc->multiplicity++;
144
+ arc->twinArc = arc;
145
+ if (graph->arcLookupTable != NULL) {
146
+ lookupIndex =
147
+ 2 * originNode->ID + destinationNode->ID +
148
+ 3 * graph->nodeCount;
149
+ arc->nextInLookupTable =
150
+ graph->arcLookupTable[lookupIndex];
151
+ graph->arcLookupTable[lookupIndex] = arc;
152
+ }
153
+ return arc;
154
+ }
155
+
156
+ #ifdef _OPENMP
157
+ #pragma omp critical
158
+ #endif
159
+ twinArc = allocateArc();
160
+ twinArc->destination = originNode->twinNode;
161
+ twinArc->multiplicity = 1;
162
+ twinArc->previous = NULL;
163
+ twinArc->next = destinationTwin->arc;
164
+ if (destinationTwin->arc != NULL)
165
+ destinationTwin->arc->previous = twinArc;
166
+ destinationTwin->arc = twinArc;
167
+ destinationTwin->arcCount++;
168
+
169
+ arc->twinArc = twinArc;
170
+ twinArc->twinArc = arc;
171
+
172
+ if (graph->arcLookupTable != NULL) {
173
+ lookupIndex =
174
+ 2 * originNode->ID + destinationNode->ID +
175
+ 3 * graph->nodeCount;
176
+ arc->nextInLookupTable =
177
+ graph->arcLookupTable[lookupIndex];
178
+ graph->arcLookupTable[lookupIndex] = arc;
179
+
180
+ lookupIndex =
181
+ -2 * destinationNode->ID - originNode->ID +
182
+ 3 * graph->nodeCount;
183
+ twinArc->nextInLookupTable =
184
+ graph->arcLookupTable[lookupIndex];
185
+ graph->arcLookupTable[lookupIndex] = twinArc;
186
+ }
187
+ return arc;
188
+ }
189
+
190
+ void createAnalogousArc(Node * originNode, Node * destinationNode,
191
+ Arc * refArc, Graph * graph)
192
+ {
193
+ Arc *arc, *twinArc;
194
+ Node *destinationTwin;
195
+ IDnum lookupIndex;
196
+
197
+ if (originNode == NULL || destinationNode == NULL)
198
+ return;
199
+
200
+ // velvetLog("Connecting nodes %i -> %i\n", originNode->ID, destinationNode->ID);
201
+
202
+ arc = getArcBetweenNodes(originNode, destinationNode, graph);
203
+
204
+ if (arc != NULL) {
205
+ if (refArc->twinArc != refArc) {
206
+ arc->multiplicity += getMultiplicity(refArc);
207
+ arc->twinArc->multiplicity +=
208
+ getMultiplicity(refArc);
209
+ } else {
210
+ arc->multiplicity += getMultiplicity(refArc) / 2;
211
+ arc->twinArc->multiplicity +=
212
+ getMultiplicity(refArc) / 2;
213
+ }
214
+ return;
215
+ }
216
+ // If not found
217
+ arc = allocateArc();
218
+ arc->destination = destinationNode;
219
+ arc->multiplicity = getMultiplicity(refArc);
220
+ arc->previous = NULL;
221
+ arc->next = originNode->arc;
222
+ if (originNode->arc != NULL)
223
+ originNode->arc->previous = arc;
224
+ originNode->arc = arc;
225
+ originNode->arcCount++;
226
+
227
+ destinationTwin = destinationNode->twinNode;
228
+
229
+ // Hairpin case
230
+ if (destinationTwin == originNode) {
231
+ arc->twinArc = arc;
232
+ if (refArc->twinArc != refArc)
233
+ arc->multiplicity *= 2;
234
+
235
+ if (graph->arcLookupTable != NULL) {
236
+ lookupIndex =
237
+ 2 * originNode->ID + destinationNode->ID
238
+ + 3 * graph->nodeCount;
239
+ arc->nextInLookupTable =
240
+ graph->arcLookupTable[lookupIndex];
241
+ graph->arcLookupTable[lookupIndex] = arc;
242
+ }
243
+ return;
244
+ }
245
+
246
+ twinArc = allocateArc();
247
+ twinArc->destination = originNode->twinNode;
248
+ twinArc->multiplicity = getMultiplicity(refArc);
249
+ twinArc->previous = NULL;
250
+ twinArc->next = destinationTwin->arc;
251
+ if (destinationTwin->arc != NULL)
252
+ destinationTwin->arc->previous = twinArc;
253
+ destinationTwin->arc = twinArc;
254
+ destinationTwin->arcCount++;
255
+
256
+ arc->twinArc = twinArc;
257
+ twinArc->twinArc = arc;
258
+
259
+ if (graph->arcLookupTable != NULL) {
260
+ lookupIndex =
261
+ 2 * originNode->ID + destinationNode->ID +
262
+ 3 * graph->nodeCount;
263
+ arc->nextInLookupTable =
264
+ graph->arcLookupTable[lookupIndex];
265
+ graph->arcLookupTable[lookupIndex] = arc;
266
+
267
+ lookupIndex =
268
+ -2 * destinationNode->ID - originNode->ID +
269
+ 3 * graph->nodeCount;
270
+ twinArc->nextInLookupTable =
271
+ graph->arcLookupTable[lookupIndex];
272
+ graph->arcLookupTable[lookupIndex] = twinArc;
273
+ }
274
+ }
275
+
276
+ Arc *getArcBetweenNodes(Node * originNode, Node * destinationNode,
277
+ Graph * graph)
278
+ {
279
+ Arc *arc;
280
+ Node *twinDestination, *twinOrigin;
281
+
282
+ if (originNode == NULL || destinationNode == NULL)
283
+ return NULL;
284
+
285
+ if (graph->arcLookupTable != NULL) {
286
+ for (arc =
287
+ graph->arcLookupTable[2 * originNode->ID +
288
+ destinationNode->ID +
289
+ 3 * graph->nodeCount];
290
+ arc != NULL; arc = arc->nextInLookupTable) {
291
+ if (arc->destination == destinationNode) {
292
+ return arc;
293
+ }
294
+ }
295
+ return NULL;
296
+ }
297
+
298
+ twinDestination = destinationNode->twinNode;
299
+ if (originNode->arcCount <= twinDestination->arcCount) {
300
+ for (arc = originNode->arc; arc != NULL; arc = arc->next)
301
+ if (arc->destination == destinationNode)
302
+ return arc;
303
+ return NULL;
304
+ }
305
+
306
+ twinOrigin = originNode->twinNode;
307
+ for (arc = twinDestination->arc; arc != NULL; arc = arc->next)
308
+ if (arc->destination == twinOrigin)
309
+ return arc->twinArc;
310
+ return NULL;
311
+ }
312
+
313
+ void destroyArc(Arc * arc, Graph * graph)
314
+ {
315
+ Node *origin, *destination;
316
+ Arc *twinArc;
317
+ Arc *currentArc;
318
+ IDnum lookupIndex;
319
+
320
+ if (arc == NULL)
321
+ return;
322
+
323
+ twinArc = arc->twinArc;
324
+ origin = twinArc->destination->twinNode;
325
+ destination = arc->destination->twinNode;
326
+
327
+ //velvetLog("Destroying arc %p\n", arc);
328
+
329
+ // Removing arc from list
330
+ if (origin->arc == arc) {
331
+ origin->arc = arc->next;
332
+ if (origin->arc != NULL)
333
+ origin->arc->previous = NULL;
334
+ } else {
335
+ arc->previous->next = arc->next;
336
+ if (arc->next != NULL)
337
+ arc->next->previous = arc->previous;
338
+ }
339
+
340
+ origin->arcCount--;
341
+
342
+ if (destination == origin) {
343
+ if (graph->arcLookupTable != NULL) {
344
+ lookupIndex =
345
+ 2 * origin->ID - destination->ID +
346
+ 3 * graph->nodeCount;
347
+ currentArc = graph->arcLookupTable[lookupIndex];
348
+ if (currentArc == arc)
349
+ graph->arcLookupTable[lookupIndex] =
350
+ arc->nextInLookupTable;
351
+ else {
352
+ while (currentArc->nextInLookupTable !=
353
+ arc)
354
+ currentArc =
355
+ currentArc->nextInLookupTable;
356
+
357
+ currentArc->nextInLookupTable =
358
+ twinArc->nextInLookupTable;
359
+ }
360
+ }
361
+
362
+ deallocateArc(arc);
363
+ return;
364
+ }
365
+ // Removing arc's twin from list
366
+ if (destination->arc == twinArc) {
367
+ destination->arc = twinArc->next;
368
+ if (destination->arc != NULL)
369
+ destination->arc->previous = NULL;
370
+ } else {
371
+ twinArc->previous->next = twinArc->next;
372
+ if (twinArc->next != NULL)
373
+ twinArc->next->previous = twinArc->previous;
374
+ }
375
+
376
+ destination->arcCount--;
377
+
378
+ if (graph->arcLookupTable != NULL) {
379
+ lookupIndex =
380
+ 2 * origin->ID - destination->ID +
381
+ 3 * graph->nodeCount;
382
+ currentArc = graph->arcLookupTable[lookupIndex];
383
+ if (currentArc == arc)
384
+ graph->arcLookupTable[lookupIndex] =
385
+ arc->nextInLookupTable;
386
+ else {
387
+ while (currentArc->nextInLookupTable != arc)
388
+ currentArc = currentArc->nextInLookupTable;
389
+
390
+ currentArc->nextInLookupTable =
391
+ arc->nextInLookupTable;
392
+ }
393
+
394
+ lookupIndex =
395
+ 2 * destination->ID - origin->ID +
396
+ 3 * graph->nodeCount;
397
+ currentArc = graph->arcLookupTable[lookupIndex];
398
+ if (currentArc == twinArc)
399
+ graph->arcLookupTable[lookupIndex] =
400
+ twinArc->nextInLookupTable;
401
+ else {
402
+ while (currentArc->nextInLookupTable != twinArc)
403
+ currentArc = currentArc->nextInLookupTable;
404
+
405
+ currentArc->nextInLookupTable =
406
+ twinArc->nextInLookupTable;
407
+ }
408
+ }
409
+ // Freeing memory
410
+ deallocateArc(arc);
411
+ deallocateArc(twinArc);
412
+ }
413
+
414
+ void destroyNode(Node * node, Graph * graph)
415
+ {
416
+ Node *twin = node->twinNode;
417
+ IDnum ID = node->ID;
418
+ IDnum index;
419
+
420
+ //velvetLog("Destroying %d\n and twin %d\n", getNodeID(node), getNodeID(twin));
421
+
422
+ if (ID < 0)
423
+ ID = -ID;
424
+
425
+ // Node arcs:
426
+ while (node->arc != NULL)
427
+ destroyArc(node->arc, graph);
428
+ while (twin->arc != NULL)
429
+ destroyArc(twin->arc, graph);
430
+
431
+ // Descriptors
432
+ free(node->descriptor);
433
+ free(twin->descriptor);
434
+
435
+ // Passage markers
436
+ while (node->marker != NULL_IDX)
437
+ destroyPassageMarker(node->marker);
438
+
439
+ // Reads starts
440
+ if (graph->nodeReads != NULL) {
441
+ index = ID + graph->nodeCount;
442
+ free(graph->nodeReads[index]);
443
+ graph->nodeReads[index] = NULL;
444
+ graph->nodeReadCounts[index] = 0;
445
+
446
+ index = -ID + graph->nodeCount;
447
+ free(graph->nodeReads[index]);
448
+ graph->nodeReads[index] = NULL;
449
+ graph->nodeReadCounts[index] = 0;
450
+ }
451
+
452
+ graph->nodes[ID] = NULL;
453
+ deallocateNode(node);
454
+ deallocateNode(twin);
455
+ }
456
+
457
+ int outDegree(Node * node)
458
+ {
459
+ int result = 0;
460
+ Arc *arc = node->arc;
461
+ while (arc != NULL) {
462
+ result += arc->multiplicity;
463
+ arc = arc->next;
464
+ }
465
+
466
+ return result;
467
+ }
468
+
469
+ int simpleArcCount(Node * node)
470
+ {
471
+ return node->arcCount;
472
+ }
473
+
474
+ int arcCount(Node * node)
475
+ {
476
+ int result = 0;
477
+ Arc *arc;
478
+
479
+ if (node == NULL)
480
+ return result;
481
+
482
+ arc = node->arc;
483
+ while (arc != NULL) {
484
+ result++;
485
+ if (arc->destination == node->twinNode)
486
+ result++;
487
+ arc = arc->next;
488
+ }
489
+
490
+ return result;
491
+
492
+ }
493
+
494
+ static Nucleotide getNucleotideInDescriptor(Descriptor * descriptor,
495
+ Coordinate i)
496
+ {
497
+ Descriptor *fourMer = descriptor + i / 4;
498
+
499
+ switch (i % 4) {
500
+ case 0:
501
+ return (*fourMer & 3);
502
+ case 1:
503
+ return (*fourMer & 12) >> 2;
504
+ case 2:
505
+ return (*fourMer & 48) >> 4;
506
+ case 3:
507
+ return (*fourMer & 192) >> 6;
508
+ }
509
+ return 0;
510
+ }
511
+
512
+ Nucleotide getNucleotideInNode(Node * node, Coordinate index) {
513
+ return getNucleotideInDescriptor(node->descriptor, index);
514
+ }
515
+
516
+ PassageMarkerI getMarker(Node * node)
517
+ {
518
+ return node->marker;
519
+ }
520
+
521
+ void setMarker(Node * node, PassageMarkerI marker)
522
+ {
523
+ if (node == NULL)
524
+ return;
525
+
526
+ if (marker == NULL_IDX) {
527
+ node->marker = NULL_IDX;
528
+ node->twinNode->marker = NULL_IDX;
529
+ return;
530
+ }
531
+
532
+ node->marker = marker;
533
+ setTopOfTheNode(marker);
534
+ node->twinNode->marker = getTwinMarker(marker);
535
+ setTopOfTheNode(getTwinMarker(marker));
536
+ }
537
+
538
+ void setNodeStatus(Node * node, boolean status)
539
+ {
540
+ node->status = status;
541
+ node->twinNode->status = status;
542
+ }
543
+
544
+ void setSingleNodeStatus(Node * node, boolean status)
545
+ {
546
+ node->status = status;
547
+ }
548
+
549
+ boolean getNodeStatus(Node * node)
550
+ {
551
+ if (node == NULL)
552
+ return false;
553
+ return node->status;
554
+ }
555
+
556
+ IDnum getNodeID(Node * node)
557
+ {
558
+ if (node == NULL)
559
+ return 0;
560
+
561
+ return node->ID;
562
+ }
563
+
564
+ void resetNodeStatus(Graph * graph)
565
+ {
566
+ IDnum nodeIndex;
567
+ Node *node;
568
+
569
+ for (nodeIndex = 1; nodeIndex <= graph->nodeCount; nodeIndex++) {
570
+ node = graph->nodes[nodeIndex];
571
+ if (node == NULL)
572
+ continue;
573
+
574
+ node->status = false;
575
+ node->twinNode->status = false;
576
+ }
577
+ }
578
+
579
+ Node *getNodeInGraph(Graph * graph, IDnum nodeID)
580
+ {
581
+ if (nodeID == 0)
582
+ return NULL;
583
+ else if (nodeID > 0)
584
+ return graph->nodes[nodeID];
585
+ else if (graph->nodes[-nodeID] == NULL)
586
+ return NULL;
587
+ else
588
+ return graph->nodes[-nodeID]->twinNode;
589
+ }
590
+
591
+ Arc *getArc(Node * node)
592
+ {
593
+ return node->arc;
594
+ }
595
+
596
+ Arc *getNextArc(Arc * arc)
597
+ {
598
+ return arc->next;
599
+ }
600
+
601
+ IDnum getMultiplicity(Arc * arc)
602
+ {
603
+ if (arc == NULL)
604
+ return 0;
605
+
606
+ return arc->multiplicity;
607
+ }
608
+
609
+ Node *getOrigin(Arc * arc)
610
+ {
611
+ if (arc == NULL)
612
+ return NULL;
613
+
614
+ return arc->twinArc->destination->twinNode;
615
+ }
616
+
617
+ Node *getDestination(Arc * arc)
618
+ {
619
+ if (arc == NULL)
620
+ return NULL;
621
+
622
+ return arc->destination;
623
+ }
624
+
625
+ IDnum markerCount(Node * node)
626
+ {
627
+ IDnum count = 0;
628
+ PassageMarkerI marker;
629
+
630
+ for (marker = getMarker(node); marker != NULL_IDX;
631
+ marker = getNextInNode(marker))
632
+ count++;
633
+
634
+ return count;
635
+ }
636
+
637
+ void appendNodeSequence(Node * node, TightString * sequence,
638
+ Coordinate writeIndex)
639
+ {
640
+ Coordinate i;
641
+ Nucleotide nucleotide;
642
+
643
+ //velvetLog("Getting sequence from node %d of length %d (%d)\n", getNodeID(node), getNodeLength(node), getLength(nodeLabel));
644
+
645
+ for (i = 0; i < getNodeLength(node); i++) {
646
+ nucleotide =
647
+ getNucleotideInDescriptor(node->descriptor, i);
648
+ writeNucleotideAtPosition(nucleotide, i + writeIndex,
649
+ sequence);
650
+ }
651
+ }
652
+
653
+ static void writeNucleotideInDescriptor(Nucleotide nucleotide,
654
+ Descriptor * descriptor,
655
+ Coordinate i)
656
+ {
657
+ Descriptor *fourMer = descriptor + i / 4;
658
+ switch (i % 4) {
659
+ case 3:
660
+ *fourMer &= 63;
661
+ *fourMer += nucleotide << 6;
662
+ return;
663
+ case 2:
664
+ *fourMer &= 207;
665
+ *fourMer += nucleotide << 4;
666
+ return;
667
+ case 1:
668
+ *fourMer &= 243;
669
+ *fourMer += nucleotide << 2;
670
+ return;
671
+ case 0:
672
+ *fourMer &= 252;
673
+ *fourMer += nucleotide;
674
+ }
675
+ }
676
+
677
+ static inline Descriptor *mergeDescriptors(Descriptor * descr,
678
+ Coordinate destinationLength,
679
+ Descriptor * copy,
680
+ Coordinate sourceLength,
681
+ size_t arrayLength)
682
+ {
683
+ Descriptor *readPtr, *writePtr;
684
+ Descriptor readCopy;
685
+ int readOffset, writeOffset;
686
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
687
+ Coordinate index;
688
+
689
+ readPtr = descr;
690
+ readCopy = *readPtr;
691
+ writePtr = new;
692
+ writeOffset = 0;
693
+ for (index = 0; index < destinationLength; index++) {
694
+ (*writePtr) >>= 2;
695
+ (*writePtr) += (readCopy & 3) << 6;
696
+ readCopy >>= 2;
697
+
698
+ writeOffset++;
699
+ if (writeOffset == 4) {
700
+ writePtr++;
701
+ readPtr++;
702
+ if (index < destinationLength - 1)
703
+ readCopy = *readPtr;
704
+ writeOffset = 0;
705
+ }
706
+ }
707
+
708
+ readPtr = copy;
709
+ readCopy = *readPtr;
710
+ readOffset = 0;
711
+ for (index = 0; index < sourceLength; index++) {
712
+ (*writePtr) >>= 2;
713
+ (*writePtr) += (readCopy & 3) << 6;
714
+ readCopy >>= 2;
715
+
716
+ writeOffset++;
717
+ if (writeOffset == 4) {
718
+ writePtr++;
719
+ writeOffset = 0;
720
+ }
721
+
722
+ readOffset++;
723
+ if (readOffset == 4) {
724
+ readPtr++;
725
+ if (index < sourceLength - 1)
726
+ readCopy = *readPtr;
727
+ readOffset = 0;
728
+ }
729
+ }
730
+
731
+ if (writeOffset != 0) {
732
+ while (writeOffset != 4) {
733
+ (*writePtr) >>= 2;
734
+ writeOffset++;
735
+ }
736
+ }
737
+
738
+ return new;
739
+ }
740
+
741
+ static void addBufferToDescriptor(Node * node, Coordinate length)
742
+ {
743
+ Descriptor *descr;
744
+ Coordinate newLength;
745
+ size_t arrayLength;
746
+ Node *twinNode;
747
+ Coordinate index;
748
+ Descriptor *old_descriptor;
749
+
750
+ if (node == NULL)
751
+ return;
752
+
753
+ twinNode = node->twinNode;
754
+ descr = node->descriptor;
755
+
756
+ // Amendments for empty descriptors
757
+ if (descr == NULL) {
758
+ arrayLength = length / 4;
759
+ if (length % 4 != 0)
760
+ arrayLength++;
761
+
762
+ node->descriptor = callocOrExit(arrayLength, Descriptor);
763
+ node->length = length;
764
+ twinNode->descriptor =
765
+ callocOrExit(arrayLength, Descriptor);
766
+ twinNode->length = length;
767
+ return;
768
+ }
769
+
770
+ newLength = node->length + length;
771
+ arrayLength = newLength / 4;
772
+ if (newLength % 4 != 0)
773
+ arrayLength++;
774
+
775
+ // Merging forward descriptors
776
+ node->descriptor =
777
+ reallocOrExit(node->descriptor, arrayLength, Descriptor);
778
+
779
+ for (index = node->length; index < newLength; index++)
780
+ writeNucleotideInDescriptor(ADENINE, node->descriptor,
781
+ index);
782
+ node->length = newLength;
783
+
784
+ // Merging reverse descriptors
785
+ old_descriptor = twinNode->descriptor;
786
+ twinNode->descriptor = callocOrExit(arrayLength, Descriptor);
787
+ for (index = 0; index < twinNode->length; index++)
788
+ writeNucleotideInDescriptor(getNucleotideInDescriptor
789
+ (old_descriptor, index),
790
+ twinNode->descriptor,
791
+ index + length);
792
+ for (index = 0; index < length; index++)
793
+ writeNucleotideInDescriptor(THYMINE, twinNode->descriptor,
794
+ index);
795
+ free(old_descriptor);
796
+ twinNode->length = newLength;
797
+ }
798
+
799
+ void appendDescriptors(Node * destination, Node * source)
800
+ {
801
+ Descriptor *copy;
802
+ Descriptor *twinCopy;
803
+ Descriptor *descr;
804
+ Descriptor *twinDescr;
805
+ Coordinate newLength, destinationLength, sourceLength;
806
+ size_t arrayLength;
807
+ Descriptor *new;
808
+ Node *twinDestination;
809
+
810
+ if (source == NULL || destination == NULL)
811
+ return;
812
+
813
+ twinDestination = destination->twinNode;
814
+ descr = destination->descriptor;
815
+ twinDescr = twinDestination->descriptor;
816
+ copy = source->descriptor;
817
+ twinCopy = source->twinNode->descriptor;
818
+
819
+ // Amendments for empty descriptors
820
+ if (getNodeLength(source) == 0)
821
+ return;
822
+ if (getNodeLength(destination) == 0) {
823
+ destination->descriptor = copy;
824
+ twinDestination->descriptor = twinCopy;
825
+ source->descriptor = NULL;
826
+ source->twinNode->descriptor = NULL;
827
+ destination->length = source->length;
828
+ destination->twinNode->length = source->length;
829
+ source->length = 0;
830
+ source->twinNode->length = 0;
831
+ return;
832
+ }
833
+
834
+ destinationLength = destination->length;
835
+ sourceLength = source->length;
836
+ newLength = destinationLength + sourceLength;
837
+ arrayLength = newLength / 4;
838
+ if (newLength % 4 != 0)
839
+ arrayLength++;
840
+
841
+ // Merging forward descriptors
842
+ new =
843
+ mergeDescriptors(descr, destinationLength, copy, sourceLength,
844
+ arrayLength);
845
+ free(descr);
846
+ destination->descriptor = new;
847
+ destination->length = newLength;
848
+
849
+ // Merging reverse descriptors
850
+ new =
851
+ mergeDescriptors(twinCopy, sourceLength, twinDescr,
852
+ destinationLength, arrayLength);
853
+ free(twinDescr);
854
+ twinDestination->descriptor = new;
855
+ twinDestination->length = newLength;
856
+ }
857
+
858
+ static void catDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength)
859
+ {
860
+ Coordinate index;
861
+ Nucleotide nucleotide;
862
+
863
+ for (index = 0; index < sourceLength; index++) {
864
+ nucleotide = getNucleotideInDescriptor(copy, index);
865
+ writeNucleotideInDescriptor(nucleotide, descr, index + destinationLength);
866
+ }
867
+ }
868
+
869
+ static void reverseCatDescriptors(Descriptor * descr, Coordinate destinationLength, Descriptor * copy, Coordinate sourceLength, Coordinate totalLength)
870
+ {
871
+ Coordinate shift = totalLength - destinationLength - sourceLength;
872
+ Coordinate index;
873
+ Nucleotide nucleotide;
874
+
875
+ for (index = 0; index < sourceLength; index++) {
876
+ nucleotide = getNucleotideInDescriptor(copy, index);
877
+ writeNucleotideInDescriptor(nucleotide, descr, index + shift);
878
+ }
879
+ }
880
+
881
+ void directlyAppendDescriptors(Node * destination, Node * source, Coordinate totalLength)
882
+ {
883
+ Descriptor *copy;
884
+ Descriptor *twinCopy;
885
+ Descriptor *descr;
886
+ Descriptor *twinDescr;
887
+ Coordinate destinationLength, sourceLength;
888
+
889
+ if (source == NULL || destination == NULL)
890
+ return;
891
+
892
+ descr = destination->descriptor;
893
+ twinDescr = destination->twinNode->descriptor;
894
+ copy = source->descriptor;
895
+ twinCopy = source->twinNode->descriptor;
896
+
897
+ // Amendments for empty descriptors
898
+ if (getNodeLength(source) == 0)
899
+ return;
900
+
901
+ destinationLength = destination->length;
902
+ sourceLength = source->length;
903
+
904
+ // Merging forward descriptors
905
+ catDescriptors(descr, destinationLength, copy, sourceLength);
906
+
907
+ // Merging reverse descriptors
908
+ reverseCatDescriptors(twinDescr, destinationLength, twinCopy, sourceLength, totalLength);
909
+
910
+ destination->length += source->length;
911
+ destination->twinNode->length += source->length;
912
+ }
913
+
914
+ static void copyDownDescriptor(Descriptor ** writePtr, int *writeOffset,
915
+ Descriptor * source, Coordinate length)
916
+ {
917
+ Descriptor *readPtr = source;
918
+ Descriptor readCopy = *readPtr;
919
+ int readOffset = 0;
920
+ Coordinate index;
921
+
922
+ for (index = 0; index < length; index++) {
923
+ (**writePtr) >>= 2;
924
+ (**writePtr) += (readCopy & 3) << 6;
925
+ readCopy >>= 2;
926
+
927
+ (*writeOffset)++;
928
+ if (*writeOffset == 4) {
929
+ (*writePtr)++;
930
+ *writeOffset = 0;
931
+ }
932
+
933
+ readOffset++;
934
+ if (readOffset == 4) {
935
+ readPtr++;
936
+ if (index < length - 1)
937
+ readCopy = *readPtr;
938
+ readOffset = 0;
939
+ }
940
+ }
941
+ }
942
+
943
+ static void copyDownSequence(Descriptor ** writePtr, int *writeOffset,
944
+ TightString * sequence, Coordinate start,
945
+ Coordinate finish, int WORDLENGTH)
946
+ {
947
+ boolean forward = (start < finish);
948
+ Coordinate sourceLength = finish - start;
949
+ Coordinate index;
950
+ Nucleotide nucleotide;
951
+
952
+ if (!forward)
953
+ sourceLength *= -1;
954
+
955
+ for (index = 0; index < sourceLength; index++) {
956
+ if (forward)
957
+ nucleotide =
958
+ getNucleotide(start + WORDLENGTH - 1 + index,
959
+ sequence);
960
+ else
961
+ nucleotide =
962
+ #ifndef COLOR
963
+ 3 - getNucleotide(start - index - 1, sequence);
964
+ #else
965
+ getNucleotide(start - index - 1, sequence);
966
+ #endif
967
+
968
+ (**writePtr) >>= 2;
969
+ (**writePtr) += nucleotide << 6;
970
+
971
+ (*writeOffset)++;
972
+ if (*writeOffset == 4) {
973
+ (*writePtr)++;
974
+ *writeOffset = 0;
975
+ }
976
+ }
977
+ }
978
+
979
+ static Descriptor *appendSequenceToDescriptor(Descriptor * descr,
980
+ Coordinate nodeLength,
981
+ PassageMarkerI marker,
982
+ TightString *sequences,
983
+ int WORDLENGTH,
984
+ size_t arrayLength,
985
+ boolean downStream)
986
+ {
987
+ int writeOffset = 0;
988
+ Descriptor *new = callocOrExit(arrayLength, Descriptor);
989
+ Descriptor *writePtr = new;
990
+ TightString *sequence;
991
+ IDnum sequenceID = getPassageMarkerSequenceID(marker);
992
+ Coordinate start = getPassageMarkerStart(marker);
993
+ Coordinate finish = getPassageMarkerFinish(marker);
994
+
995
+ if (sequenceID > 0)
996
+ sequence = getTightStringInArray(sequences, sequenceID - 1);
997
+ else
998
+ sequence = getTightStringInArray(sequences, -sequenceID - 1);
999
+
1000
+ if (downStream)
1001
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1002
+ nodeLength);
1003
+
1004
+ copyDownSequence(&writePtr, &writeOffset, sequence, start, finish,
1005
+ WORDLENGTH);
1006
+
1007
+ if (!downStream)
1008
+ copyDownDescriptor(&writePtr, &writeOffset, descr,
1009
+ nodeLength);
1010
+
1011
+ if (writeOffset != 0) {
1012
+ while (writeOffset != 4) {
1013
+ (*writePtr) >>= 2;
1014
+ writeOffset++;
1015
+ }
1016
+ }
1017
+
1018
+ return new;
1019
+ }
1020
+
1021
+ void appendSequence(Node * node, TightString * reads,
1022
+ PassageMarkerI guide, Graph * graph)
1023
+ {
1024
+ Descriptor *descr;
1025
+ Descriptor *twinDescr;
1026
+ Coordinate newLength, nodeLength, sourceLength;
1027
+ size_t arrayLength;
1028
+ Descriptor *new;
1029
+ Node *twinNode;
1030
+
1031
+ if (node == NULL)
1032
+ return;
1033
+
1034
+ twinNode = node->twinNode;
1035
+ descr = node->descriptor;
1036
+ twinDescr = twinNode->descriptor;
1037
+ nodeLength = node->length;
1038
+ sourceLength = getPassageMarkerLength(guide);
1039
+
1040
+ // Amendments for empty descriptors
1041
+ if (sourceLength == 0)
1042
+ return;
1043
+
1044
+ newLength = nodeLength + sourceLength;
1045
+ arrayLength = newLength / 4;
1046
+ if (newLength % 4 != 0)
1047
+ arrayLength++;
1048
+
1049
+ // Merging forward descriptors
1050
+ new =
1051
+ appendSequenceToDescriptor(descr, nodeLength, guide, reads,
1052
+ getWordLength(graph), arrayLength,
1053
+ true);
1054
+ free(descr);
1055
+ node->descriptor = new;
1056
+ node->length = newLength;
1057
+
1058
+ // Merging reverse descriptors
1059
+ new =
1060
+ appendSequenceToDescriptor(twinDescr, nodeLength,
1061
+ getTwinMarker(guide), reads,
1062
+ getWordLength(graph), arrayLength,
1063
+ false);
1064
+ free(twinDescr);
1065
+ twinNode->descriptor = new;
1066
+ twinNode->length = newLength;
1067
+ }
1068
+
1069
+ void setMultiplicity(Arc * arc, IDnum mult)
1070
+ {
1071
+ arc->multiplicity = mult;
1072
+ arc->twinArc->multiplicity = mult;
1073
+ }
1074
+
1075
+ // Reshuffles the graph->nodes array to remove NULL pointers
1076
+ // Beware that node IDs are accordingly reshuffled (all pointers remain valid though)
1077
+ void renumberNodes(Graph * graph)
1078
+ {
1079
+ IDnum nodeIndex;
1080
+ Node *currentNode;
1081
+ IDnum counter = 0;
1082
+ IDnum nodes = graph->nodeCount;
1083
+ IDnum newIndex;
1084
+
1085
+ velvetLog("Renumbering nodes\n");
1086
+ velvetLog("Initial node count %li\n", (long) graph->nodeCount);
1087
+
1088
+ for (nodeIndex = 1; nodeIndex <= nodes; nodeIndex++) {
1089
+ currentNode = getNodeInGraph(graph, nodeIndex);
1090
+
1091
+ if (currentNode == NULL)
1092
+ counter++;
1093
+ else if (counter != 0) {
1094
+ newIndex = nodeIndex - counter;
1095
+ currentNode->ID = newIndex;
1096
+ currentNode->twinNode->ID = -newIndex;
1097
+ graph->nodes[newIndex] = currentNode;
1098
+
1099
+ if (graph->nodeReads != NULL) {
1100
+ graph->nodeReads[newIndex + nodes] =
1101
+ graph->nodeReads[nodeIndex + nodes];
1102
+ graph->nodeReadCounts[newIndex + nodes] =
1103
+ graph->nodeReadCounts[nodeIndex +
1104
+ nodes];
1105
+
1106
+ graph->nodeReads[nodeIndex + nodes] = NULL;
1107
+ graph->nodeReadCounts[nodeIndex + nodes] =
1108
+ 0;
1109
+
1110
+ graph->nodeReads[-newIndex + nodes] =
1111
+ graph->nodeReads[-nodeIndex + nodes];
1112
+ graph->nodeReadCounts[-newIndex + nodes] =
1113
+ graph->nodeReadCounts[-nodeIndex +
1114
+ nodes];
1115
+
1116
+ graph->nodeReads[-nodeIndex + nodes] =
1117
+ NULL;
1118
+ graph->nodeReadCounts[-nodeIndex + nodes] =
1119
+ 0;
1120
+ }
1121
+
1122
+ if (graph->gapMarkers != NULL) {
1123
+ graph->gapMarkers[newIndex] =
1124
+ graph->gapMarkers[nodeIndex];
1125
+ graph->gapMarkers[nodeIndex] = NULL;
1126
+ }
1127
+ }
1128
+ }
1129
+
1130
+ // Shitfting array to the left
1131
+ if (graph->nodeReads != NULL && counter != 0) {
1132
+ for (nodeIndex = counter; nodeIndex <= 2 * nodes - counter;
1133
+ nodeIndex++) {
1134
+ graph->nodeReads[nodeIndex - counter] =
1135
+ graph->nodeReads[nodeIndex];
1136
+ graph->nodeReadCounts[nodeIndex - counter] =
1137
+ graph->nodeReadCounts[nodeIndex];
1138
+ }
1139
+ }
1140
+
1141
+ // Rellocating node space
1142
+ graph->nodeCount -= counter;
1143
+ graph->nodes =
1144
+ reallocOrExit(graph->nodes, graph->nodeCount + 1, Node *);
1145
+
1146
+ // Reallocating short read marker arrays
1147
+ if (graph->nodeReads != NULL) {
1148
+ graph->nodeReads =
1149
+ reallocOrExit(graph->nodeReads,
1150
+ 2 * graph->nodeCount +
1151
+ 1, ShortReadMarker *);
1152
+ graph->nodeReadCounts =
1153
+ reallocOrExit(graph->nodeReadCounts,
1154
+ 2 * graph->nodeCount + 1, IDnum);
1155
+ }
1156
+
1157
+ // Reallocating gap marker table
1158
+ if (graph->gapMarkers != NULL)
1159
+ graph->gapMarkers = reallocOrExit(graph->gapMarkers,
1160
+ graph->nodeCount +
1161
+ 1, GapMarker *);
1162
+
1163
+ velvetLog("Removed %li null nodes\n", (long) counter);
1164
+ }
1165
+
1166
+ void splitNodeDescriptor(Node * source, Node * target, Coordinate offset)
1167
+ {
1168
+ Coordinate originalLength = source->length;
1169
+ Coordinate backLength = originalLength - offset;
1170
+ Coordinate index;
1171
+ Descriptor *descriptor, *new;
1172
+ size_t arrayLength;
1173
+ Nucleotide nucleotide;
1174
+
1175
+ source->length = offset;
1176
+ source->twinNode->length = offset;
1177
+
1178
+ if (target != NULL) {
1179
+ target->length = backLength;
1180
+ target->twinNode->length = backLength;
1181
+ free(target->descriptor);
1182
+ free(target->twinNode->descriptor);
1183
+ target->descriptor = NULL;
1184
+ target->twinNode->descriptor = NULL;
1185
+ }
1186
+
1187
+ if (backLength == 0)
1188
+ return;
1189
+
1190
+ descriptor = source->descriptor;
1191
+
1192
+ arrayLength = backLength / 4;
1193
+ if (backLength % 4 > 0)
1194
+ arrayLength++;
1195
+
1196
+ if (target != NULL) {
1197
+ // Target node .. forwards
1198
+ new = mallocOrExit(arrayLength, Descriptor);
1199
+ target->descriptor = new;
1200
+ for (index = 0; index < backLength; index++) {
1201
+ nucleotide =
1202
+ getNucleotideInDescriptor(descriptor, index);
1203
+ writeNucleotideInDescriptor(nucleotide, new,
1204
+ index);
1205
+ }
1206
+ }
1207
+ // Source node
1208
+ for (index = backLength; index < originalLength; index++) {
1209
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1210
+ writeNucleotideInDescriptor(nucleotide, descriptor,
1211
+ index - backLength);
1212
+ }
1213
+
1214
+ if (target == NULL)
1215
+ return;
1216
+
1217
+ // target node other way
1218
+ descriptor = source->twinNode->descriptor;
1219
+ new = mallocOrExit(arrayLength, Descriptor);
1220
+ target->twinNode->descriptor = new;
1221
+
1222
+ for (index = offset; index < originalLength; index++) {
1223
+ nucleotide = getNucleotideInDescriptor(descriptor, index);
1224
+ writeNucleotideInDescriptor(nucleotide, new,
1225
+ index - offset);
1226
+ }
1227
+ }
1228
+
1229
+ void reduceNode(Node * node)
1230
+ {
1231
+ free(node->descriptor);
1232
+ node->descriptor = NULL;
1233
+ node->length = 0;
1234
+
1235
+ free(node->twinNode->descriptor);
1236
+ node->twinNode->descriptor = NULL;
1237
+ node->twinNode->length = 0;
1238
+ }
1239
+
1240
+ // Allocate memory for an empty graph created with sequenceCount different sequences
1241
+ Graph *emptyGraph(IDnum sequenceCount, int wordLength)
1242
+ {
1243
+ Graph *newGraph = mallocOrExit(1, Graph);
1244
+ newGraph->sequenceCount = sequenceCount;
1245
+ newGraph->arcLookupTable = NULL;
1246
+ newGraph->nodeReads = NULL;
1247
+ newGraph->nodeReadCounts = NULL;
1248
+ newGraph->wordLength = wordLength;
1249
+ newGraph->gapMarkers = NULL;
1250
+ return newGraph;
1251
+ }
1252
+
1253
+ static Descriptor *newPositiveDescriptor(IDnum sequenceID,
1254
+ Coordinate start,
1255
+ Coordinate finish,
1256
+ TightString *sequences,
1257
+ int WORDLENGTH)
1258
+ {
1259
+ Coordinate index;
1260
+ Nucleotide nucleotide;
1261
+ TightString *tString = getTightStringInArray (sequences, sequenceID - 1);
1262
+ Coordinate length = finish - start;
1263
+ Descriptor *res;
1264
+ size_t arrayLength = length / 4;
1265
+
1266
+ if (length % 4 > 0)
1267
+ arrayLength++;
1268
+
1269
+ res = mallocOrExit(arrayLength, Descriptor);
1270
+
1271
+ for (index = 0; index < length; index++) {
1272
+ nucleotide =
1273
+ getNucleotide(start + index + WORDLENGTH - 1, tString);
1274
+ writeNucleotideInDescriptor(nucleotide, res, index);
1275
+ }
1276
+
1277
+ return res;
1278
+
1279
+ }
1280
+
1281
+ static Descriptor *newNegativeDescriptor(IDnum sequenceID,
1282
+ Coordinate start,
1283
+ Coordinate finish,
1284
+ TightString *sequences,
1285
+ int WORDLENGTH)
1286
+ {
1287
+ Coordinate index;
1288
+ Nucleotide nucleotide;
1289
+ TightString *tString = getTightStringInArray (sequences, -sequenceID - 1);
1290
+ Coordinate length = start - finish;
1291
+ Descriptor *res;
1292
+ size_t arrayLength = length / 4;
1293
+
1294
+ if (length % 4 > 0)
1295
+ arrayLength++;
1296
+
1297
+ res = mallocOrExit(arrayLength, Descriptor);
1298
+
1299
+ for (index = 0; index < length; index++) {
1300
+ nucleotide = getNucleotide(start - index, tString);
1301
+ #ifndef COLOR
1302
+ writeNucleotideInDescriptor(3 - nucleotide, res, index);
1303
+ #else
1304
+ writeNucleotideInDescriptor(nucleotide, res, index);
1305
+ #endif
1306
+ }
1307
+
1308
+ return res;
1309
+
1310
+ }
1311
+
1312
+ static Descriptor *newDescriptor(IDnum sequenceID, Coordinate start,
1313
+ Coordinate finish,
1314
+ TightString * sequences, int WORDLENGTH)
1315
+ {
1316
+ if (sequenceID > 0)
1317
+ return newPositiveDescriptor(sequenceID, start, finish,
1318
+ sequences, WORDLENGTH);
1319
+ else
1320
+ return newNegativeDescriptor(sequenceID, start, finish,
1321
+ sequences, WORDLENGTH);
1322
+ }
1323
+
1324
+ // Constructor
1325
+ // Memory allocated
1326
+ Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
1327
+ Coordinate offset, IDnum ID, TightString * sequences,
1328
+ int WORDLENGTH)
1329
+ {
1330
+ Node *newnd = allocateNode();
1331
+ Node *antiNode = allocateNode();
1332
+
1333
+ newnd->ID = ID;
1334
+ newnd->descriptor =
1335
+ newDescriptor(sequenceID, start + offset, finish + offset,
1336
+ sequences, WORDLENGTH);
1337
+ newnd->arc = NULL;
1338
+ newnd->arcCount = 0;
1339
+ newnd->marker = NULL_IDX;
1340
+ newnd->status = false;
1341
+
1342
+ #ifndef SINGLE_COV_CAT
1343
+ Category cat;
1344
+ for (cat = 0; cat < CATEGORIES; cat++) {
1345
+ newnd->virtualCoverage[cat] = 0;
1346
+ newnd->originalVirtualCoverage[cat] = 0;
1347
+ }
1348
+ #else
1349
+ newnd->virtualCoverage = 0;
1350
+ #endif
1351
+
1352
+ antiNode->ID = -ID;
1353
+ antiNode->descriptor =
1354
+ newDescriptor(-sequenceID, finish + offset - 1,
1355
+ start + offset - 1, sequences, WORDLENGTH);
1356
+ antiNode->arc = NULL;
1357
+ antiNode->arcCount = 0;
1358
+ antiNode->marker = NULL_IDX;
1359
+ antiNode->status = false;
1360
+
1361
+ #ifndef SINGLE_COV_CAT
1362
+ for (cat = 0; cat < CATEGORIES; cat++) {
1363
+ antiNode->virtualCoverage[cat] = 0;
1364
+ antiNode->originalVirtualCoverage[cat] = 0;
1365
+ }
1366
+ #else
1367
+ antiNode->virtualCoverage = 0;
1368
+ #endif
1369
+
1370
+ newnd->twinNode = antiNode;
1371
+ antiNode->twinNode = newnd;
1372
+
1373
+ if (sequenceID > 0) {
1374
+ newnd->length = finish - start;
1375
+ antiNode->length = finish - start;
1376
+ } else {
1377
+ newnd->length = start - finish;
1378
+ antiNode->length = start - finish;
1379
+ }
1380
+
1381
+ return newnd;
1382
+ }
1383
+
1384
+ void allocateNodeSpace(Graph * graph, IDnum nodeCount)
1385
+ {
1386
+ graph->nodes = callocOrExit(nodeCount + 1, Node *);
1387
+ graph->nodeCount = nodeCount;
1388
+ }
1389
+
1390
+ boolean getUniqueness(Node * node)
1391
+ {
1392
+ return node->uniqueness;
1393
+ }
1394
+
1395
+ void setUniqueness(Node * node, boolean value)
1396
+ {
1397
+ node->uniqueness = value;
1398
+ node->twinNode->uniqueness = value;
1399
+ }
1400
+
1401
+ Node *emptyNode()
1402
+ {
1403
+ Node *newnd = allocateNode();
1404
+ Node *antiNode = allocateNode();
1405
+
1406
+ newnd->ID = 0;
1407
+ newnd->descriptor = NULL;
1408
+ newnd->arc = NULL;
1409
+ newnd->arcCount = 0;
1410
+ newnd->marker = NULL_IDX;
1411
+ newnd->length = 0;
1412
+ newnd->uniqueness = false;
1413
+
1414
+ #ifndef SINGLE_COV_CAT
1415
+ Category cat;
1416
+ for (cat = 0; cat < CATEGORIES; cat++) {
1417
+ newnd->virtualCoverage[cat] = 0;
1418
+ newnd->originalVirtualCoverage[cat] = 0;
1419
+ }
1420
+ #else
1421
+ newnd->virtualCoverage = 0;
1422
+ #endif
1423
+
1424
+ antiNode->ID = 0;
1425
+ antiNode->descriptor = NULL;
1426
+ antiNode->arc = NULL;
1427
+ antiNode->arcCount = 0;
1428
+ antiNode->marker = NULL_IDX;
1429
+ antiNode->length = 0;
1430
+ antiNode->uniqueness = false;
1431
+
1432
+ #ifndef SINGLE_COV_CAT
1433
+ for (cat = 0; cat < CATEGORIES; cat++) {
1434
+ antiNode->virtualCoverage[cat] = 0;
1435
+ antiNode->originalVirtualCoverage[cat] = 0;
1436
+ }
1437
+ #else
1438
+ antiNode->virtualCoverage = 0;
1439
+ #endif
1440
+
1441
+ newnd->twinNode = antiNode;
1442
+ antiNode->twinNode = newnd;
1443
+
1444
+ return newnd;
1445
+
1446
+ }
1447
+
1448
+ Node *addEmptyNodeToGraph(Graph * graph, IDnum ID)
1449
+ {
1450
+ Node *newnd = emptyNode();
1451
+
1452
+ newnd->ID = ID;
1453
+ newnd->twinNode->ID = -ID;
1454
+
1455
+ graph->nodes[ID] = newnd;
1456
+
1457
+ return newnd;
1458
+
1459
+ }
1460
+
1461
+ #ifndef SINGLE_COV_CAT
1462
+
1463
+ void setVirtualCoverage(Node * node, Category category,
1464
+ Coordinate coverage)
1465
+ {
1466
+ node->virtualCoverage[category] = coverage;
1467
+ node->twinNode->virtualCoverage[category] =
1468
+ node->virtualCoverage[category];
1469
+ }
1470
+
1471
+ void incrementVirtualCoverage(Node * node, Category category,
1472
+ Coordinate coverage)
1473
+ {
1474
+ node->virtualCoverage[category] += coverage;
1475
+ node->twinNode->virtualCoverage[category] =
1476
+ node->virtualCoverage[category];
1477
+ }
1478
+
1479
+ Coordinate getVirtualCoverage(Node * node, Category category)
1480
+ {
1481
+ return node->virtualCoverage[category];
1482
+ }
1483
+
1484
+ Coordinate getTotalCoverage(Node * node)
1485
+ {
1486
+ Category cat;
1487
+ Coordinate coverage = 0;
1488
+
1489
+ for (cat = 0; cat < CATEGORIES; cat++)
1490
+ coverage += node->virtualCoverage[cat];
1491
+
1492
+ return coverage;
1493
+ }
1494
+
1495
+ void setOriginalVirtualCoverage(Node * node, Category category,
1496
+ Coordinate coverage)
1497
+ {
1498
+ node->originalVirtualCoverage[category] = coverage;
1499
+ node->twinNode->originalVirtualCoverage[category] =
1500
+ node->originalVirtualCoverage[category];
1501
+ }
1502
+
1503
+ void incrementOriginalVirtualCoverage(Node * node, Category category,
1504
+ Coordinate coverage)
1505
+ {
1506
+ node->originalVirtualCoverage[category] += coverage;
1507
+ node->twinNode->originalVirtualCoverage[category] =
1508
+ node->originalVirtualCoverage[category];
1509
+ }
1510
+
1511
+ Coordinate getOriginalVirtualCoverage(Node * node, Category category)
1512
+ {
1513
+ return node->originalVirtualCoverage[category];
1514
+ }
1515
+
1516
+ #else
1517
+
1518
+ void setVirtualCoverage(Node * node,
1519
+ Coordinate coverage)
1520
+ {
1521
+ node->virtualCoverage = coverage;
1522
+ node->twinNode->virtualCoverage = coverage;
1523
+ }
1524
+
1525
+ void incrementVirtualCoverage(Node * node,
1526
+ Coordinate coverage)
1527
+ {
1528
+ node->virtualCoverage += coverage;
1529
+ node->twinNode->virtualCoverage += coverage;
1530
+ }
1531
+
1532
+ Coordinate getVirtualCoverage(Node * node)
1533
+ {
1534
+ return node->virtualCoverage;
1535
+ }
1536
+
1537
+ Coordinate getTotalCoverage(Node * node)
1538
+ {
1539
+ return node->virtualCoverage;
1540
+ }
1541
+
1542
+ #endif
1543
+
1544
+ boolean hasSingleArc(Node * node)
1545
+ {
1546
+ return node->arcCount == 1;
1547
+ }
1548
+
1549
+ void activateArcLookupTable(Graph * graph)
1550
+ {
1551
+ IDnum index;
1552
+ Node *node;
1553
+ Arc *arc;
1554
+ IDnum nodes = graph->nodeCount;
1555
+ IDnum twinOriginID, destinationID, hash;
1556
+ Arc **table;
1557
+
1558
+ velvetLog("Activating arc lookup table\n");
1559
+
1560
+ graph->arcLookupTable = callocOrExit(6 * nodes + 1, Arc *);
1561
+
1562
+ table = graph->arcLookupTable;
1563
+
1564
+ for (index = -nodes; index <= nodes; index++) {
1565
+ if (index == 0)
1566
+ continue;
1567
+
1568
+ node = getNodeInGraph(graph, index);
1569
+ if (node == 0)
1570
+ continue;
1571
+
1572
+ for (arc = getArc(node); arc != NULL;
1573
+ arc = getNextArc(arc)) {
1574
+ twinOriginID = arc->twinArc->destination->ID;
1575
+ destinationID = arc->destination->ID;
1576
+ hash =
1577
+ 3 * nodes - 2 * twinOriginID + destinationID;
1578
+ arc->nextInLookupTable = table[hash];
1579
+ table[hash] = arc;
1580
+ }
1581
+ }
1582
+
1583
+ velvetLog("Done activating arc lookup table\n");
1584
+ }
1585
+
1586
+ void deactivateArcLookupTable(Graph * graph)
1587
+ {
1588
+ free(graph->arcLookupTable);
1589
+ graph->arcLookupTable = NULL;
1590
+ }
1591
+
1592
+ static void exportNode(FILE * outfile, Node * node, void *withSequence)
1593
+ {
1594
+ Coordinate index;
1595
+ Nucleotide nucleotide;
1596
+
1597
+ if (node == NULL)
1598
+ return;
1599
+
1600
+ velvetFprintf(outfile, "NODE\t%ld\t%lld", (long) node->ID, (long long) node->length);
1601
+
1602
+ #ifndef SINGLE_COV_CAT
1603
+ Category cat;
1604
+ for (cat = 0; cat < CATEGORIES; cat++)
1605
+ velvetFprintf(outfile, "\t%lld\t%lld", (long long) node->virtualCoverage[cat],
1606
+ (long long) node->originalVirtualCoverage[cat]);
1607
+ velvetFprintf(outfile, "\n");
1608
+ #else
1609
+ velvetFprintf(outfile, "\t%lld\n", (long long) node->virtualCoverage);
1610
+ #endif
1611
+
1612
+ if (withSequence == NULL)
1613
+ return;
1614
+
1615
+ for (index = 0; index < node->length; index++) {
1616
+ nucleotide =
1617
+ getNucleotideInDescriptor(node->descriptor, index);
1618
+ switch (nucleotide) {
1619
+ case ADENINE:
1620
+ velvetFprintf(outfile, "A");
1621
+ break;
1622
+ case CYTOSINE:
1623
+ velvetFprintf(outfile, "C");
1624
+ break;
1625
+ case GUANINE:
1626
+ velvetFprintf(outfile, "G");
1627
+ break;
1628
+ case THYMINE:
1629
+ velvetFprintf(outfile, "T");
1630
+ break;
1631
+ }
1632
+ }
1633
+ velvetFprintf(outfile, "\n");
1634
+
1635
+ for (index = 0; index < node->length; index++) {
1636
+ nucleotide =
1637
+ getNucleotideInDescriptor(node->twinNode->descriptor,
1638
+ index);
1639
+ switch (nucleotide) {
1640
+ case ADENINE:
1641
+ velvetFprintf(outfile, "A");
1642
+ break;
1643
+ case CYTOSINE:
1644
+ velvetFprintf(outfile, "C");
1645
+ break;
1646
+ case GUANINE:
1647
+ velvetFprintf(outfile, "G");
1648
+ break;
1649
+ case THYMINE:
1650
+ velvetFprintf(outfile, "T");
1651
+ break;
1652
+ }
1653
+ }
1654
+ velvetFprintf(outfile, "\n");
1655
+ }
1656
+
1657
+ static void exportArc(FILE * outfile, Arc * arc)
1658
+ {
1659
+ IDnum originID, destinationID;
1660
+ IDnum absOriginID, absDestinationID;
1661
+
1662
+ if (arc == NULL)
1663
+ return;
1664
+
1665
+ absOriginID = originID = -arc->twinArc->destination->ID;
1666
+ absDestinationID = destinationID = arc->destination->ID;
1667
+
1668
+ if (absOriginID < 0)
1669
+ absOriginID = -absOriginID;
1670
+ if (absDestinationID < 0)
1671
+ absDestinationID = -absDestinationID;
1672
+
1673
+ if (absDestinationID < absOriginID)
1674
+ return;
1675
+
1676
+ if (originID == destinationID && originID < 0)
1677
+ return;
1678
+
1679
+ velvetFprintf(outfile, "ARC\t%li\t%li\t%li\n", (long) originID, (long) destinationID,
1680
+ (long) arc->multiplicity);
1681
+ }
1682
+
1683
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1684
+ static Arc *mergeArcLists(Arc * left, Arc * right)
1685
+ {
1686
+ Arc *mergedList = NULL;
1687
+ Arc *tail = NULL;
1688
+
1689
+ // Choose first element:
1690
+ if (left->destination->ID <= right->destination->ID) {
1691
+ mergedList = left;
1692
+ tail = left;
1693
+ left = left->next;
1694
+ } else {
1695
+ mergedList = right;
1696
+ tail = right;
1697
+ right = right->next;
1698
+ }
1699
+
1700
+ // Iterate while both lists are still non empty
1701
+ while (left != NULL && right != NULL) {
1702
+ if (left->destination->ID <= right->destination->ID) {
1703
+ tail->next = left;
1704
+ left->previous = tail;
1705
+ left = left->next;
1706
+ } else {
1707
+ tail->next = right;
1708
+ right->previous = tail;
1709
+ right = right->next;
1710
+ }
1711
+
1712
+ tail = tail->next;
1713
+ }
1714
+
1715
+ // Concatenate the remaining list at the end of the merged list
1716
+ if (left != NULL) {
1717
+ tail->next = left;
1718
+ left->previous = tail;
1719
+ }
1720
+
1721
+ if (right != NULL) {
1722
+ tail->next = right;
1723
+ right->previous = tail;
1724
+ }
1725
+
1726
+ return mergedList;
1727
+ }
1728
+
1729
+ static void arcMergeSort(Arc ** arcPtr, IDnum count)
1730
+ {
1731
+
1732
+ IDnum half = count / 2;
1733
+ Arc *left = *arcPtr;
1734
+ Arc *ptr = left;
1735
+ Arc *right;
1736
+ IDnum index;
1737
+
1738
+ if (count == 0 || count == 1)
1739
+ return;
1740
+
1741
+ if (count == 2) {
1742
+ if ((*arcPtr)->destination->ID >
1743
+ (*arcPtr)->next->destination->ID) {
1744
+ (*arcPtr)->next->next = *arcPtr;
1745
+ (*arcPtr)->previous = (*arcPtr)->next;
1746
+ *arcPtr = (*arcPtr)->next;
1747
+ (*arcPtr)->next->next = NULL;
1748
+ (*arcPtr)->previous = NULL;
1749
+ }
1750
+ return;
1751
+ }
1752
+
1753
+ for (index = 0; index < half - 1; index++) {
1754
+ ptr = ptr->next;
1755
+ if (ptr == NULL)
1756
+ return;
1757
+ }
1758
+
1759
+ right = ptr->next;
1760
+ ptr->next = NULL;
1761
+ right->previous = NULL;
1762
+
1763
+ arcMergeSort(&left, half);
1764
+ arcMergeSort(&right, count - half);
1765
+ *arcPtr = mergeArcLists(left, right);
1766
+ }
1767
+
1768
+ static void sortNodeArcs(Node * node)
1769
+ {
1770
+ Arc *arc;
1771
+ IDnum count = 0;
1772
+
1773
+ for (arc = getArc(node); arc != NULL; arc = getNextArc(arc))
1774
+ count++;
1775
+
1776
+ if (count == 0)
1777
+ return;
1778
+
1779
+ arc = getArc(node);
1780
+ arcMergeSort(&arc, count);
1781
+
1782
+ node->arc = arc;
1783
+ }
1784
+
1785
+ // Merges two lists of annotations in order of increasing position (used in mergeSort mainly)
1786
+ static GapMarker *mergeGapMarkerLists(GapMarker * left, GapMarker * right)
1787
+ {
1788
+ GapMarker *mergedList = NULL;
1789
+ GapMarker *tail = NULL;
1790
+
1791
+ // Choose first element:
1792
+ if (left->position <= right->position) {
1793
+ mergedList = left;
1794
+ tail = left;
1795
+ left = left->next;
1796
+ } else {
1797
+ mergedList = right;
1798
+ tail = right;
1799
+ right = right->next;
1800
+ }
1801
+
1802
+ // Iterate while both lists are still non empty
1803
+ while (left != NULL && right != NULL) {
1804
+ if (left->position <= right->position) {
1805
+ tail->next = left;
1806
+ left = left->next;
1807
+ } else {
1808
+ tail->next = right;
1809
+ right = right->next;
1810
+ }
1811
+
1812
+ tail = tail->next;
1813
+ }
1814
+
1815
+ // Concatenate the remaining list at the end of the merged list
1816
+ if (left != NULL)
1817
+ tail->next = left;
1818
+
1819
+ if (right != NULL)
1820
+ tail->next = right;
1821
+
1822
+ return mergedList;
1823
+ }
1824
+
1825
+ static void gapMergeSort(GapMarker ** gapPtr, IDnum count)
1826
+ {
1827
+
1828
+ IDnum half = count / 2;
1829
+ GapMarker *left = *gapPtr;
1830
+ GapMarker *ptr = left;
1831
+ GapMarker *right;
1832
+ IDnum index;
1833
+
1834
+ if (count == 0 || count == 1)
1835
+ return;
1836
+
1837
+ if (count == 2) {
1838
+ if ((*gapPtr)->position > (*gapPtr)->next->position) {
1839
+ (*gapPtr)->next->next = *gapPtr;
1840
+ *gapPtr = (*gapPtr)->next;
1841
+ (*gapPtr)->next->next = NULL;
1842
+ }
1843
+ return;
1844
+ }
1845
+
1846
+ for (index = 0; index < half - 1; index++) {
1847
+ ptr = ptr->next;
1848
+ if (ptr == NULL)
1849
+ return;
1850
+ }
1851
+
1852
+ right = ptr->next;
1853
+ ptr->next = NULL;
1854
+
1855
+ gapMergeSort(&left, half);
1856
+ gapMergeSort(&right, count - half);
1857
+ *gapPtr = mergeGapMarkerLists(left, right);
1858
+ }
1859
+
1860
+ static void sortNodeGapMarkers(Node * node, Graph * graph)
1861
+ {
1862
+ GapMarker *gap;
1863
+ IDnum count = 0;
1864
+ IDnum ID = getNodeID(node);
1865
+
1866
+ if (ID < 0)
1867
+ ID = -ID;
1868
+
1869
+ for (gap = graph->gapMarkers[ID]; gap != NULL; gap = gap->next)
1870
+ count++;
1871
+
1872
+ if (count == 0)
1873
+ return;
1874
+
1875
+ gap = graph->gapMarkers[ID];
1876
+ gapMergeSort(&gap, count);
1877
+
1878
+ graph->gapMarkers[ID] = gap;
1879
+ }
1880
+
1881
+ void sortGapMarkers(Graph * graph)
1882
+ {
1883
+ IDnum index;
1884
+ Node *node;
1885
+
1886
+ if (graph->gapMarkers == NULL)
1887
+ return;
1888
+
1889
+ for (index = 1; index <= nodeCount(graph); index++) {
1890
+ node = getNodeInGraph(graph, index);
1891
+ if (node)
1892
+ sortNodeGapMarkers(node, graph);
1893
+ }
1894
+ }
1895
+
1896
+ void exportGraph(char *filename, Graph * graph, TightString * sequences)
1897
+ {
1898
+ IDnum index;
1899
+ FILE *outfile;
1900
+ Node *node;
1901
+ Arc *arc;
1902
+ PassageMarkerI marker;
1903
+ ShortReadMarker *reads;
1904
+ IDnum readCount, readIndex;
1905
+
1906
+ if (graph == NULL) {
1907
+ return;
1908
+ }
1909
+
1910
+ outfile = fopen(filename, "w");
1911
+ if (outfile == NULL) {
1912
+ velvetLog("Couldn't open file, sorry\n");
1913
+ return;
1914
+ } else
1915
+ velvetLog("Writing into graph file %s...\n", filename);
1916
+
1917
+ // General data
1918
+ velvetFprintf(outfile, "%li\t%li\t%i\t%i\n", (long) graph->nodeCount,
1919
+ (long) graph->sequenceCount, graph->wordLength, (int) graph->double_stranded);
1920
+
1921
+ // Node info
1922
+ for (index = 1; index <= graph->nodeCount; index++) {
1923
+ node = getNodeInGraph(graph, index);
1924
+ exportNode(outfile, node, (void *) sequences);
1925
+ }
1926
+
1927
+ // Arc info
1928
+ for (index = 1; index <= graph->nodeCount; index++) {
1929
+ node = getNodeInGraph(graph, index);
1930
+ if (node == NULL)
1931
+ continue;
1932
+
1933
+ sortNodeArcs(node);
1934
+ sortNodeArcs(getTwinNode(node));
1935
+
1936
+ for (arc = node->arc; arc != NULL; arc = arc->next)
1937
+ exportArc(outfile, arc);
1938
+ for (arc = node->twinNode->arc; arc != NULL;
1939
+ arc = arc->next)
1940
+ exportArc(outfile, arc);
1941
+ }
1942
+
1943
+ // Sequence info
1944
+ for (index = 1; index <= graph->nodeCount; index++) {
1945
+ node = getNodeInGraph(graph, index);
1946
+ if (node == NULL)
1947
+ continue;
1948
+ for (marker = node->marker; marker != NULL_IDX;
1949
+ marker = getNextInNode(marker))
1950
+ exportMarker(outfile, marker, sequences,
1951
+ graph->wordLength);
1952
+ }
1953
+
1954
+ // Node reads
1955
+ if (readStartsAreActivated(graph)) {
1956
+ for (index = 0; index <= graph->nodeCount * 2; index++) {
1957
+ readCount = graph->nodeReadCounts[index];
1958
+ if (readCount == 0)
1959
+ continue;
1960
+
1961
+ velvetFprintf(outfile, "NR\t%li\t%li\n",
1962
+ (long) (index - graph->nodeCount), (long) readCount);
1963
+
1964
+ reads = graph->nodeReads[index];
1965
+ for (readIndex = 0; readIndex < readCount;
1966
+ readIndex++)
1967
+ velvetFprintf(outfile, "%ld\t%lld\t%d\n",
1968
+ (long) reads[readIndex].readID,
1969
+ (long long) reads[readIndex].position,
1970
+ (int) reads[readIndex].offset);
1971
+ }
1972
+ }
1973
+
1974
+ fclose(outfile);
1975
+ }
1976
+
1977
+ Graph *importGraph(char *filename)
1978
+ {
1979
+ FILE *file = fopen(filename, "r");
1980
+ const int maxline = MAXLINE;
1981
+ char line[MAXLINE];
1982
+ Graph *graph;
1983
+ Coordinate coverage;
1984
+ IDnum nodeCounter, sequenceCount;
1985
+ Node *node, *twin;
1986
+ Arc *arc;
1987
+ IDnum originID, destinationID, multiplicity;
1988
+ PassageMarkerI newMarker, marker;
1989
+ IDnum nodeID, seqID;
1990
+ Coordinate index;
1991
+ Coordinate start, finish;
1992
+ Coordinate startOffset, finishOffset;
1993
+ boolean finished = false;
1994
+ size_t arrayLength;
1995
+ IDnum readCount;
1996
+ ShortReadMarker *array;
1997
+ int wordLength, sCount;
1998
+ ShortLength length;
1999
+ long long_var, long_var2, long_var3;
2000
+ long long longlong_var, longlong_var2, longlong_var3, longlong_var4;
2001
+ short short_var;
2002
+ char c;
2003
+
2004
+ if (file == NULL)
2005
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
2006
+
2007
+ velvetLog("Reading graph file %s\n", filename);
2008
+
2009
+ // First line
2010
+ if (!fgets(line, maxline, file))
2011
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2012
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2013
+ &wordLength, &short_var);
2014
+ nodeCounter = (IDnum) long_var;
2015
+ sequenceCount = (IDnum) long_var2;
2016
+ graph = emptyGraph(sequenceCount, wordLength);
2017
+ graph->double_stranded = (boolean) short_var;
2018
+ resetWordFilter(wordLength);
2019
+ allocateNodeSpace(graph, nodeCounter);
2020
+
2021
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2022
+ (long) sequenceCount);
2023
+
2024
+ if (nodeCounter == 0)
2025
+ return graph;
2026
+
2027
+ // Read nodes
2028
+ if (!fgets(line, maxline, file))
2029
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2030
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2031
+ strtok(line, "\t\n");
2032
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2033
+ nodeID = (IDnum) long_var;
2034
+ node = addEmptyNodeToGraph(graph, nodeID);
2035
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2036
+ node->length = (Coordinate) longlong_var;
2037
+
2038
+ #ifndef SINGLE_COV_CAT
2039
+ Category cat;
2040
+ Coordinate originalCoverage;
2041
+ for (cat = 0; cat < CATEGORIES; cat++) {
2042
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2043
+ coverage = (Coordinate) longlong_var;
2044
+ setVirtualCoverage(node, cat, coverage);
2045
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2046
+ &longlong_var);
2047
+ originalCoverage = (Coordinate) longlong_var;
2048
+ setOriginalVirtualCoverage(node, cat,
2049
+ originalCoverage);
2050
+ }
2051
+ #else
2052
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2053
+ coverage = (Coordinate) longlong_var;
2054
+ setVirtualCoverage(node, coverage);
2055
+ #endif
2056
+
2057
+ arrayLength = node->length / 4;
2058
+ if (node->length % 4 > 0)
2059
+ arrayLength++;
2060
+ node->descriptor =
2061
+ callocOrExit(arrayLength, Descriptor);
2062
+
2063
+ index = 0;
2064
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2065
+ if (c == 'A')
2066
+ writeNucleotideInDescriptor(ADENINE,
2067
+ node->
2068
+ descriptor,
2069
+ index++);
2070
+ else if (c == 'C')
2071
+ writeNucleotideInDescriptor(CYTOSINE,
2072
+ node->
2073
+ descriptor,
2074
+ index++);
2075
+ else if (c == 'G')
2076
+ writeNucleotideInDescriptor(GUANINE,
2077
+ node->
2078
+ descriptor,
2079
+ index++);
2080
+ else if (c == 'T')
2081
+ writeNucleotideInDescriptor(THYMINE,
2082
+ node->
2083
+ descriptor,
2084
+ index++);
2085
+ }
2086
+
2087
+ twin = node->twinNode;
2088
+ twin->length = node->length;
2089
+ twin->descriptor =
2090
+ callocOrExit(arrayLength, Descriptor);
2091
+ index = 0;
2092
+ while ((c = fgetc(file)) != '\n' && c != EOF) {
2093
+ if (c == 'A')
2094
+ writeNucleotideInDescriptor(ADENINE,
2095
+ twin->
2096
+ descriptor,
2097
+ index++);
2098
+ else if (c == 'C')
2099
+ writeNucleotideInDescriptor(CYTOSINE,
2100
+ twin->
2101
+ descriptor,
2102
+ index++);
2103
+ else if (c == 'G')
2104
+ writeNucleotideInDescriptor(GUANINE,
2105
+ twin->
2106
+ descriptor,
2107
+ index++);
2108
+ else if (c == 'T')
2109
+ writeNucleotideInDescriptor(THYMINE,
2110
+ twin->
2111
+ descriptor,
2112
+ index++);
2113
+ }
2114
+
2115
+ if (fgets(line, maxline, file) == NULL)
2116
+ finished = true;
2117
+ }
2118
+
2119
+ // Read arcs
2120
+ while (!finished && line[0] == 'A') {
2121
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2122
+ &long_var2, &long_var3);
2123
+ originID = (IDnum) long_var;
2124
+ destinationID = (IDnum) long_var2;
2125
+ multiplicity = (IDnum) long_var3;
2126
+ arc =
2127
+ createArc(getNodeInGraph(graph, originID),
2128
+ getNodeInGraph(graph, destinationID), graph);
2129
+ setMultiplicity(arc, multiplicity);
2130
+ if (fgets(line, maxline, file) == NULL)
2131
+ finished = true;
2132
+ }
2133
+
2134
+ // Read sequences
2135
+ while (!finished && line[0] != 'N') {
2136
+ sscanf(line, "SEQ\t%ld\n", &long_var);
2137
+ seqID = (IDnum) long_var;
2138
+ marker = NULL_IDX;
2139
+ if (!fgets(line, maxline, file))
2140
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2141
+
2142
+ while (!finished && line[0] != 'N' && line[0] != 'S') {
2143
+ sCount =
2144
+ sscanf(line, "%ld\t%lld\t%lld\t%lld\t%lld\n",
2145
+ &long_var, &longlong_var, &longlong_var2, &longlong_var3,
2146
+ &longlong_var4);
2147
+ nodeID = (IDnum) long_var;
2148
+ startOffset = (Coordinate) longlong_var;
2149
+ start = (Coordinate) longlong_var2;
2150
+ finish = (Coordinate) longlong_var3;
2151
+ finishOffset = (Coordinate) longlong_var4;
2152
+ if (sCount != 5) {
2153
+ velvetLog
2154
+ ("ERROR: reading in graph - only %d items read for line '%s'",
2155
+ sCount, line);
2156
+ #ifdef DEBUG
2157
+ abort();
2158
+ #endif
2159
+ exit(1);
2160
+ }
2161
+ newMarker =
2162
+ newPassageMarker(seqID, start, finish,
2163
+ startOffset, finishOffset);
2164
+ transposePassageMarker(newMarker,
2165
+ getNodeInGraph(graph,
2166
+ nodeID));
2167
+ connectPassageMarkers(marker, newMarker, graph);
2168
+ marker = newMarker;
2169
+ if (fgets(line, maxline, file) == NULL)
2170
+ finished = true;
2171
+ }
2172
+ }
2173
+
2174
+ // Node reads
2175
+ while (!finished) {
2176
+ sscanf(line, "NR\t%ld\t%ld\n", &long_var, &long_var2);
2177
+ nodeID = (IDnum) long_var;
2178
+ readCount = (IDnum) long_var2;
2179
+ if (!readStartsAreActivated(graph))
2180
+ activateReadStarts(graph);
2181
+
2182
+ graph->nodeReadCounts[nodeID + graph->nodeCount] =
2183
+ readCount;
2184
+ array = mallocOrExit(readCount, ShortReadMarker);
2185
+ graph->nodeReads[nodeID + graph->nodeCount] = array;
2186
+
2187
+ readCount = 0;
2188
+ if (!fgets(line, maxline, file))
2189
+ exitErrorf(EXIT_FAILURE, true, "Graph file incomplete");
2190
+ while (!finished && line[0] != 'N') {
2191
+ sscanf(line, "%ld\t%lld\t%hd\n", &long_var,
2192
+ &longlong_var, &short_var);
2193
+ seqID = (IDnum) long_var;
2194
+ startOffset = (Coordinate) longlong_var;
2195
+ length = (ShortLength) short_var;
2196
+ array[readCount].readID = seqID;
2197
+ array[readCount].position = startOffset;
2198
+ array[readCount].offset = length;
2199
+ readCount++;
2200
+ if (fgets(line, maxline, file) == NULL)
2201
+ finished = true;
2202
+ }
2203
+ }
2204
+
2205
+ //velvetLog("New graph has %d nodes\n", graph->nodeCount);
2206
+
2207
+ fclose(file);
2208
+ //velvetLog("Done, exiting\n");
2209
+ return graph;
2210
+ }
2211
+
2212
+ Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand)
2213
+ {
2214
+ FILE *file = fopen(preGraphFilename, "r");
2215
+ const int maxline = MAXLINE;
2216
+ char line[MAXLINE];
2217
+
2218
+ Graph *graph;
2219
+ IDnum nodeCounter, sequenceCount;
2220
+
2221
+ Node *node, *twin;
2222
+ IDnum nodeID = 0;
2223
+ Coordinate index, nodeLength;
2224
+ char c;
2225
+ int wordLength, wordShift;
2226
+ size_t arrayLength;
2227
+ short short_var;
2228
+ long long_var, long_var2;
2229
+ long long longlong_var;
2230
+
2231
+ if (file == NULL)
2232
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
2233
+
2234
+ velvetLog("Reading pre-graph file %s\n", preGraphFilename);
2235
+
2236
+ // First line
2237
+ if (!fgets(line, maxline, file))
2238
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2239
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2240
+ &wordLength, &short_var);
2241
+ nodeCounter = (IDnum) long_var;
2242
+ sequenceCount = (IDnum) long_var2;
2243
+ *double_strand = (boolean) short_var;
2244
+ wordShift = wordLength - 1;
2245
+ graph = emptyGraph(sequenceCount, wordLength);
2246
+ graph->double_stranded = *double_strand;
2247
+ resetWordFilter(wordLength);
2248
+ allocateNodeSpace(graph, nodeCounter);
2249
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2250
+ (long) sequenceCount);
2251
+
2252
+ // Read nodes
2253
+ if (nodeCounter == 0)
2254
+ return graph;
2255
+
2256
+ if (!fgets(line, maxline, file))
2257
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2258
+ while (line[0] == 'N') {
2259
+ nodeID++;
2260
+ node = addEmptyNodeToGraph(graph, nodeID);
2261
+
2262
+ sscanf(line, "%*s\t%*i\t%lli\n", &longlong_var);
2263
+ node->length = (Coordinate) longlong_var;
2264
+ nodeLength = node->length;
2265
+ arrayLength = node->length / 4;
2266
+ if (node->length % 4 > 0)
2267
+ arrayLength++;
2268
+ node->descriptor =
2269
+ callocOrExit(arrayLength, Descriptor);
2270
+
2271
+ twin = node->twinNode;
2272
+ twin->length = nodeLength;
2273
+ twin->descriptor =
2274
+ callocOrExit(arrayLength, Descriptor);
2275
+
2276
+
2277
+ index = 0;
2278
+ while ((c = getc(file)) != '\n') {
2279
+ if (c == 'A') {
2280
+ if (index - wordShift >= 0)
2281
+ writeNucleotideInDescriptor(ADENINE,
2282
+ node->
2283
+ descriptor,
2284
+ index - wordShift);
2285
+ if (nodeLength - index - 1 >= 0) {
2286
+ #ifndef COLOR
2287
+ writeNucleotideInDescriptor(THYMINE,
2288
+ twin->
2289
+ descriptor,
2290
+ nodeLength - index - 1);
2291
+ #else
2292
+ writeNucleotideInDescriptor(ADENINE,
2293
+ twin->
2294
+ descriptor,
2295
+ nodeLength - index - 1);
2296
+ #endif
2297
+ }
2298
+ } else if (c == 'C') {
2299
+ if (index - wordShift >= 0)
2300
+ writeNucleotideInDescriptor(CYTOSINE,
2301
+ node->
2302
+ descriptor,
2303
+ index - wordShift);
2304
+ if (nodeLength - index - 1 >= 0) {
2305
+ #ifndef COLOR
2306
+ writeNucleotideInDescriptor(GUANINE,
2307
+ twin->
2308
+ descriptor,
2309
+ nodeLength - index - 1);
2310
+ #else
2311
+ writeNucleotideInDescriptor(CYTOSINE,
2312
+ twin->
2313
+ descriptor,
2314
+ nodeLength - index - 1);
2315
+ #endif
2316
+ }
2317
+ } else if (c == 'G') {
2318
+ if (index - wordShift >= 0)
2319
+ writeNucleotideInDescriptor(GUANINE,
2320
+ node->
2321
+ descriptor,
2322
+ index - wordShift);
2323
+ if (nodeLength - index - 1 >= 0) {
2324
+ #ifndef COLOR
2325
+ writeNucleotideInDescriptor(CYTOSINE,
2326
+ twin->
2327
+ descriptor,
2328
+ nodeLength - index - 1);
2329
+ #else
2330
+ writeNucleotideInDescriptor(GUANINE,
2331
+ twin->
2332
+ descriptor,
2333
+ nodeLength - index - 1);
2334
+ #endif
2335
+ }
2336
+ } else if (c == 'T') {
2337
+ if (index - wordShift >= 0)
2338
+ writeNucleotideInDescriptor(THYMINE,
2339
+ node->
2340
+ descriptor,
2341
+ index - wordShift);
2342
+ if (nodeLength - index - 1 >= 0) {
2343
+ #ifndef COLOR
2344
+ writeNucleotideInDescriptor(ADENINE,
2345
+ twin->
2346
+ descriptor,
2347
+ nodeLength - index - 1);
2348
+ #else
2349
+ writeNucleotideInDescriptor(THYMINE,
2350
+ twin->
2351
+ descriptor,
2352
+ nodeLength - index - 1);
2353
+ #endif
2354
+ }
2355
+ }
2356
+
2357
+ index++;
2358
+ }
2359
+
2360
+ if (fgets(line, maxline, file) == NULL) {
2361
+ fclose(file);
2362
+ return graph;
2363
+ }
2364
+ }
2365
+
2366
+ fclose(file);
2367
+ return graph;
2368
+ }
2369
+
2370
+ Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand)
2371
+ {
2372
+ FILE *file = fopen(connectedGraphFilename, "r");
2373
+ const int maxline = MAXLINE;
2374
+ char line[MAXLINE];
2375
+ Coordinate coverage;
2376
+ Arc *arc;
2377
+ IDnum originID, destinationID, multiplicity;
2378
+ boolean finished = false;
2379
+ long long_var3;
2380
+
2381
+ Graph *graph;
2382
+ IDnum nodeCounter, sequenceCount;
2383
+
2384
+ Node *node, *twin;
2385
+ IDnum nodeID = 0;
2386
+ Coordinate index, nodeLength;
2387
+ char c;
2388
+ int wordLength, wordShift;
2389
+ size_t arrayLength;
2390
+ short short_var;
2391
+ long long_var, long_var2;
2392
+ long long longlong_var;
2393
+
2394
+ if (file == NULL)
2395
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", connectedGraphFilename);
2396
+
2397
+ velvetLog("Reading connected graph file %s\n", connectedGraphFilename);
2398
+
2399
+ // First line
2400
+ if (!fgets(line, maxline, file))
2401
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2402
+ sscanf(line, "%ld\t%ld\t%i\t%hi\n", &long_var, &long_var2,
2403
+ &wordLength, &short_var);
2404
+ nodeCounter = (IDnum) long_var;
2405
+ sequenceCount = (IDnum) long_var2;
2406
+ *double_strand = (boolean) short_var;
2407
+ wordShift = wordLength - 1;
2408
+ graph = emptyGraph(sequenceCount, wordLength);
2409
+ graph->double_stranded = *double_strand;
2410
+ resetWordFilter(wordLength);
2411
+ allocateNodeSpace(graph, nodeCounter);
2412
+ velvetLog("Graph has %ld nodes and %ld sequences\n", (long) nodeCounter,
2413
+ (long) sequenceCount);
2414
+
2415
+ // Read nodes
2416
+ if (nodeCounter == 0)
2417
+ return graph;
2418
+
2419
+ if (!fgets(line, maxline, file))
2420
+ exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
2421
+ while (!finished && strncmp(line, "NODE", 4) == 0) {
2422
+ strtok(line, "\t\n");
2423
+ sscanf(strtok(NULL, "\t\n"), "%ld", &long_var);
2424
+ nodeID = (IDnum) long_var;
2425
+ node = addEmptyNodeToGraph(graph, nodeID);
2426
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2427
+ node->length = (Coordinate) longlong_var;
2428
+ nodeLength = node->length;
2429
+
2430
+ #ifndef SINGLE_COV_CAT
2431
+ Category cat;
2432
+ Coordinate originalCoverage;
2433
+ for (cat = 0; cat < CATEGORIES; cat++) {
2434
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2435
+ coverage = (Coordinate) longlong_var;
2436
+ setVirtualCoverage(node, cat, coverage);
2437
+ sscanf(strtok(NULL, "\t\n"), "%lld",
2438
+ &longlong_var);
2439
+ originalCoverage = (Coordinate) longlong_var;
2440
+ setOriginalVirtualCoverage(node, cat,
2441
+ originalCoverage);
2442
+ }
2443
+ #else
2444
+ sscanf(strtok(NULL, "\t\n"), "%lld", &longlong_var);
2445
+ coverage = (Coordinate) longlong_var;
2446
+ setVirtualCoverage(node, coverage);
2447
+ #endif
2448
+
2449
+ arrayLength = node->length / 4;
2450
+ if (node->length % 4 > 0)
2451
+ arrayLength++;
2452
+ node->descriptor =
2453
+ callocOrExit(arrayLength, Descriptor);
2454
+
2455
+ twin = node->twinNode;
2456
+ twin->length = node->length;
2457
+ twin->descriptor =
2458
+ callocOrExit(arrayLength, Descriptor);
2459
+
2460
+ index = 0;
2461
+ while ((c = getc(file)) != '\n') {
2462
+ if (c == 'A') {
2463
+ if (index - wordShift >= 0)
2464
+ writeNucleotideInDescriptor(ADENINE,
2465
+ node->
2466
+ descriptor,
2467
+ index - wordShift);
2468
+ if (nodeLength - index - 1 >= 0) {
2469
+ #ifndef COLOR
2470
+ writeNucleotideInDescriptor(THYMINE,
2471
+ twin->
2472
+ descriptor,
2473
+ nodeLength - index - 1);
2474
+ #else
2475
+ writeNucleotideInDescriptor(ADENINE,
2476
+ twin->
2477
+ descriptor,
2478
+ nodeLength - index - 1);
2479
+ #endif
2480
+ }
2481
+ } else if (c == 'C') {
2482
+ if (index - wordShift >= 0)
2483
+ writeNucleotideInDescriptor(CYTOSINE,
2484
+ node->
2485
+ descriptor,
2486
+ index - wordShift);
2487
+ if (nodeLength - index - 1 >= 0) {
2488
+ #ifndef COLOR
2489
+ writeNucleotideInDescriptor(GUANINE,
2490
+ twin->
2491
+ descriptor,
2492
+ nodeLength - index - 1);
2493
+ #else
2494
+ writeNucleotideInDescriptor(CYTOSINE,
2495
+ twin->
2496
+ descriptor,
2497
+ nodeLength - index - 1);
2498
+ #endif
2499
+ }
2500
+ } else if (c == 'G') {
2501
+ if (index - wordShift >= 0)
2502
+ writeNucleotideInDescriptor(GUANINE,
2503
+ node->
2504
+ descriptor,
2505
+ index - wordShift);
2506
+ if (nodeLength - index - 1 >= 0) {
2507
+ #ifndef COLOR
2508
+ writeNucleotideInDescriptor(CYTOSINE,
2509
+ twin->
2510
+ descriptor,
2511
+ nodeLength - index - 1);
2512
+ #else
2513
+ writeNucleotideInDescriptor(GUANINE,
2514
+ twin->
2515
+ descriptor,
2516
+ nodeLength - index - 1);
2517
+ #endif
2518
+ }
2519
+ } else if (c == 'T') {
2520
+ if (index - wordShift >= 0)
2521
+ writeNucleotideInDescriptor(THYMINE,
2522
+ node->
2523
+ descriptor,
2524
+ index - wordShift);
2525
+ if (nodeLength - index - 1 >= 0) {
2526
+ #ifndef COLOR
2527
+ writeNucleotideInDescriptor(ADENINE,
2528
+ twin->
2529
+ descriptor,
2530
+ nodeLength - index - 1);
2531
+ #else
2532
+ writeNucleotideInDescriptor(THYMINE,
2533
+ twin->
2534
+ descriptor,
2535
+ nodeLength - index - 1);
2536
+ #endif
2537
+ }
2538
+ }
2539
+
2540
+ index++;
2541
+ }
2542
+
2543
+ if (fgets(line, maxline, file) == NULL) {
2544
+ finished = true;
2545
+ }
2546
+ }
2547
+
2548
+ // Read arcs
2549
+ while (!finished && line[0] == 'A') {
2550
+ sscanf(line, "ARC\t%ld\t%ld\t%ld\n", &long_var,
2551
+ &long_var2, &long_var3);
2552
+ originID = (IDnum) long_var;
2553
+ destinationID = (IDnum) long_var2;
2554
+ multiplicity = (IDnum) long_var3;
2555
+ arc =
2556
+ createArc(getNodeInGraph(graph, originID),
2557
+ getNodeInGraph(graph, destinationID), graph);
2558
+ setMultiplicity(arc, multiplicity);
2559
+ if (fgets(line, maxline, file) == NULL)
2560
+ finished = true;
2561
+ }
2562
+
2563
+ fclose(file);
2564
+ return graph;
2565
+ }
2566
+
2567
+ // Prints out the information relative to the topology of a node into a new file
2568
+ // Internal to exportDOTGraph()
2569
+ void DOTNode(Node * node, FILE * outfile)
2570
+ {
2571
+ IDnum ID;
2572
+ Arc *arc;
2573
+ Node *otherNode;
2574
+
2575
+ ID = node->ID;
2576
+ if (ID < 0)
2577
+ return;
2578
+
2579
+ velvetFprintf(outfile, "\t%li [label=\"<left>|%li|<right>\"]\n", (long) ID, (long) ID);
2580
+
2581
+ for (arc = node->arc; arc != NULL; arc = arc->next) {
2582
+ otherNode = arc->destination;
2583
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2584
+ continue;
2585
+ }
2586
+
2587
+ if (otherNode->ID > 0)
2588
+ velvetFprintf(outfile, "\t%li:right -> %li:left\n", (long) ID,
2589
+ (long) otherNode->ID);
2590
+ else
2591
+ velvetFprintf(outfile, "\t%li:right -> %li:right\n", (long) ID,
2592
+ (long) -otherNode->ID);
2593
+ }
2594
+
2595
+ for (arc = node->twinNode->arc; arc != NULL; arc = arc->next) {
2596
+ otherNode = arc->destination;
2597
+ if (!(otherNode->ID >= ID || otherNode->ID <= -ID)) {
2598
+ continue;
2599
+ }
2600
+
2601
+ if (otherNode->ID > 0)
2602
+ velvetFprintf(outfile, "\t%li:left -> %li:left\n", (long) ID,
2603
+ (long) otherNode->ID);
2604
+ else
2605
+ velvetFprintf(outfile, "\t%li:left -> %li:right\n", (long) ID,
2606
+ (long) -otherNode->ID);
2607
+ }
2608
+ }
2609
+
2610
+ TightString *expandNode(Node * node, int WORDLENGTH)
2611
+ {
2612
+ Nucleotide nucleotide;
2613
+ Coordinate index;
2614
+ TightString *tString =
2615
+ newTightString(node->length + WORDLENGTH - 1);
2616
+ Node *twin = node->twinNode;
2617
+ Coordinate length = node->length;
2618
+
2619
+ for (index = 0; index < WORDLENGTH; index++) {
2620
+ nucleotide =
2621
+ getNucleotideInDescriptor(twin->descriptor,
2622
+ length - index - 1);
2623
+ #ifndef COLOR
2624
+ writeNucleotideAtPosition(3 - nucleotide, index, tString);
2625
+ #else
2626
+ writeNucleotideAtPosition(nucleotide, index, tString);
2627
+ #endif
2628
+ }
2629
+
2630
+ for (index = 1; index < node->length; index++) {
2631
+ nucleotide =
2632
+ getNucleotideInDescriptor(node->descriptor, index);
2633
+ writeNucleotideAtPosition(nucleotide,
2634
+ index + WORDLENGTH - 1, tString);
2635
+ }
2636
+
2637
+ return tString;
2638
+ }
2639
+
2640
+ char *expandNodeFragment(Node * node, Coordinate contigStart,
2641
+ Coordinate contigFinish, int wordLength)
2642
+ {
2643
+ Nucleotide nucleotide;
2644
+ Coordinate index;
2645
+ Node *twin = node->twinNode;
2646
+ Coordinate length = contigFinish - contigStart;
2647
+ int wordShift = wordLength - 1;
2648
+ char *string;
2649
+
2650
+ if (length >= wordShift) {
2651
+ string = callocOrExit(length + wordLength, char);
2652
+
2653
+ for (index = 0; index < wordShift; index++) {
2654
+ nucleotide =
2655
+ getNucleotideInDescriptor(twin->descriptor,
2656
+ twin->length - contigStart -
2657
+ index - 1);
2658
+ #ifndef COLOR
2659
+ nucleotide = 3 - nucleotide;
2660
+ #endif
2661
+
2662
+ switch (nucleotide) {
2663
+ case ADENINE:
2664
+ string[index] = 'A';
2665
+ break;
2666
+ case CYTOSINE:
2667
+ string[index] = 'C';
2668
+ break;
2669
+ case GUANINE:
2670
+ string[index] = 'G';
2671
+ break;
2672
+ case THYMINE:
2673
+ string[index] = 'T';
2674
+ break;
2675
+ }
2676
+
2677
+ }
2678
+
2679
+ for (index = 0; index < length; index++) {
2680
+ nucleotide =
2681
+ getNucleotideInDescriptor(node->descriptor,
2682
+ contigStart + index);
2683
+ switch (nucleotide) {
2684
+ case ADENINE:
2685
+ string[index + wordShift] = 'A';
2686
+ break;
2687
+ case CYTOSINE:
2688
+ string[index + wordShift] = 'C';
2689
+ break;
2690
+ case GUANINE:
2691
+ string[index + wordShift] = 'G';
2692
+ break;
2693
+ case THYMINE:
2694
+ string[index + wordShift] = 'T';
2695
+ break;
2696
+ }
2697
+ }
2698
+
2699
+ string[length + wordShift] = '\0';
2700
+ } else {
2701
+ string = callocOrExit(length + 1, char);
2702
+
2703
+ for (index = 0; index < length; index++) {
2704
+ nucleotide =
2705
+ getNucleotideInDescriptor(node->descriptor, contigStart + index);
2706
+ switch (nucleotide) {
2707
+ case ADENINE:
2708
+ string[index] = 'A';
2709
+ break;
2710
+ case CYTOSINE:
2711
+ string[index] = 'C';
2712
+ break;
2713
+ case GUANINE:
2714
+ string[index] = 'G';
2715
+ break;
2716
+ case THYMINE:
2717
+ string[index] = 'T';
2718
+ break;
2719
+ }
2720
+ }
2721
+
2722
+ string[length] = '\0';
2723
+ }
2724
+
2725
+ return string;
2726
+ }
2727
+
2728
+ boolean readStartsAreActivated(Graph * graph)
2729
+ {
2730
+ return graph->nodeReads != NULL;
2731
+ }
2732
+
2733
+ void activateReadStarts(Graph * graph)
2734
+ {
2735
+ graph->nodeReads =
2736
+ callocOrExit(2 * graph->nodeCount + 1, ShortReadMarker *);
2737
+ graph->nodeReadCounts =
2738
+ callocOrExit(2 * graph->nodeCount + 1, IDnum);
2739
+ }
2740
+
2741
+ void deactivateReadStarts(Graph * graph)
2742
+ {
2743
+ free(graph->nodeReads);
2744
+ free(graph->nodeReadCounts);
2745
+
2746
+ graph->nodeReads = NULL;
2747
+ graph->nodeReadCounts = NULL;
2748
+ }
2749
+
2750
+ boolean findIDnumInArray(IDnum query, IDnum * array, IDnum arrayLength)
2751
+ {
2752
+ IDnum leftIndex = 0;
2753
+ IDnum rightIndex = arrayLength;
2754
+ IDnum middleIndex;
2755
+
2756
+ if (arrayLength == 0)
2757
+ return false;
2758
+
2759
+ while (true) {
2760
+ middleIndex = leftIndex + (rightIndex - leftIndex) / 2;
2761
+
2762
+ if (array[middleIndex] == query)
2763
+ return true;
2764
+ else if (leftIndex >= rightIndex)
2765
+ return false;
2766
+ else if (array[middleIndex] > query)
2767
+ rightIndex = middleIndex;
2768
+ else if (leftIndex == middleIndex)
2769
+ leftIndex++;
2770
+ else
2771
+ leftIndex = middleIndex;
2772
+ }
2773
+ }
2774
+
2775
+ static inline int compareShortReadMarkers(const void *A, const void *B)
2776
+ {
2777
+ IDnum a = ((ShortReadMarker *) A)->readID;
2778
+ IDnum b = ((ShortReadMarker *) B)->readID;
2779
+
2780
+ if (a > b)
2781
+ return 1;
2782
+ if (a == b)
2783
+ return 0;
2784
+ return -1;
2785
+ }
2786
+
2787
+ static inline int compareIDnums(const void *A, const void *B)
2788
+ {
2789
+ IDnum a = *((IDnum *) A);
2790
+ IDnum b = *((IDnum *) B);
2791
+
2792
+ if (a > b)
2793
+ return 1;
2794
+ if (a == b)
2795
+ return 0;
2796
+ return -1;
2797
+ }
2798
+
2799
+ void incrementReadStartCount(Node * node, Graph * graph)
2800
+ {
2801
+ graph->nodeReadCounts[node->ID + graph->nodeCount]++;
2802
+ }
2803
+
2804
+ void createNodeReadStartArrays(Graph * graph)
2805
+ {
2806
+ IDnum index;
2807
+
2808
+ if (graph->nodeReads == NULL)
2809
+ return;
2810
+
2811
+ for (index = 0; index <= 2 * (graph->nodeCount); index++) {
2812
+ if (graph->nodeReadCounts[index] != 0) {
2813
+ graph->nodeReads[index] =
2814
+ mallocOrExit(graph->nodeReadCounts[index],
2815
+ ShortReadMarker);
2816
+ graph->nodeReadCounts[index] = 0;
2817
+ } else {
2818
+ graph->nodeReads[index] = NULL;
2819
+ }
2820
+ }
2821
+ }
2822
+
2823
+ void orderNodeReadStartArrays(Graph * graph)
2824
+ {
2825
+ IDnum index;
2826
+
2827
+ if (graph->nodeReads == NULL)
2828
+ return;
2829
+
2830
+ for (index = 0; index <= 2 * (graph->nodeCount); index++)
2831
+ if (graph->nodeReadCounts[index] != 0)
2832
+ qsort(graph->nodeReads[index],
2833
+ graph->nodeReadCounts[index],
2834
+ sizeof(ShortReadMarker),
2835
+ compareShortReadMarkers);
2836
+ }
2837
+
2838
+ void addReadStart(Node * node, IDnum seqID, Coordinate position,
2839
+ Graph * graph, Coordinate offset)
2840
+ {
2841
+ IDnum nodeIndex = getNodeID(node) + graph->nodeCount;
2842
+
2843
+ ShortReadMarker *array = graph->nodeReads[nodeIndex];
2844
+ IDnum arrayLength = graph->nodeReadCounts[nodeIndex];
2845
+
2846
+ array[arrayLength].readID = seqID;
2847
+ array[arrayLength].position = position;
2848
+ array[arrayLength].offset = (ShortLength) offset;
2849
+ // printf("node %d, seq %d, pos %ld, offset %ld\n", getNodeID(node), seqID, position, offset);
2850
+ graph->nodeReadCounts[nodeIndex]++;
2851
+ }
2852
+
2853
+ void blurLastShortReadMarker(Node * node, Graph * graph)
2854
+ {
2855
+ IDnum nodeIndex = getNodeID(node) + nodeCount(graph);
2856
+ IDnum index = graph->nodeReadCounts[nodeIndex] - 1;
2857
+ ShortReadMarker *marker;
2858
+
2859
+ if (index >= 0)
2860
+ marker = &(graph->nodeReads[nodeIndex][index]);
2861
+ else
2862
+ abort();
2863
+
2864
+ setShortReadMarkerPosition(marker, -1);
2865
+ }
2866
+
2867
+ ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
2868
+ IDnum * length)
2869
+ {
2870
+ IDnum targetID, targetLength, targetIndex, targetVal;
2871
+ IDnum sourceID, sourceLength, sourceIndex, sourceVal;
2872
+ IDnum mergeLength;
2873
+ ShortReadMarker *mergeArray, *targetArray, *sourceArray;
2874
+
2875
+ if (graph->nodeReads == NULL) {
2876
+ *length = 0;
2877
+ return NULL;
2878
+ }
2879
+
2880
+ if (nodeA == NULL || nodeB == NULL) {
2881
+ *length = 0;
2882
+ return NULL;
2883
+ }
2884
+
2885
+ targetID = getNodeID(nodeA) + graph->nodeCount;
2886
+ targetArray = graph->nodeReads[targetID];
2887
+ targetLength = graph->nodeReadCounts[targetID];
2888
+
2889
+ sourceID = getNodeID(nodeB) + graph->nodeCount;
2890
+ sourceArray = graph->nodeReads[sourceID];
2891
+ sourceLength = graph->nodeReadCounts[sourceID];
2892
+
2893
+ if (sourceArray == NULL || targetArray == NULL) {
2894
+ *length = 0;
2895
+ return NULL;
2896
+ }
2897
+
2898
+ mergeArray =
2899
+ mallocOrExit(sourceLength +
2900
+ targetLength, ShortReadMarker);
2901
+
2902
+ mergeLength = 0;
2903
+ sourceIndex = 0;
2904
+ targetIndex = 0;
2905
+ sourceVal = sourceArray[0].readID;
2906
+ targetVal = targetArray[0].readID;
2907
+
2908
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
2909
+ switch (compareIDnums(&sourceVal, &targetVal)) {
2910
+ case -1:
2911
+ mergeArray[mergeLength].readID = sourceVal;
2912
+ mergeArray[mergeLength].position = -1;
2913
+ mergeArray[mergeLength].offset = -1;
2914
+ mergeLength++;
2915
+ sourceIndex++;
2916
+ if (sourceIndex < sourceLength)
2917
+ sourceVal =
2918
+ sourceArray[sourceIndex].readID;
2919
+ break;
2920
+ case 0:
2921
+ mergeArray[mergeLength].readID = sourceVal;
2922
+ mergeArray[mergeLength].position = -1;
2923
+ mergeArray[mergeLength].offset = -1;
2924
+ mergeLength++;
2925
+ sourceIndex++;
2926
+ if (sourceIndex < sourceLength)
2927
+ sourceVal =
2928
+ sourceArray[sourceIndex].readID;
2929
+ targetIndex++;
2930
+ if (targetIndex < targetLength)
2931
+ targetVal =
2932
+ targetArray[targetIndex].readID;
2933
+ break;
2934
+ case 1:
2935
+ mergeArray[mergeLength].readID = targetVal;
2936
+ mergeArray[mergeLength].position = -1;
2937
+ mergeArray[mergeLength].offset = -1;
2938
+ mergeLength++;
2939
+ targetIndex++;
2940
+ if (targetIndex < targetLength)
2941
+ targetVal =
2942
+ targetArray[targetIndex].readID;
2943
+ }
2944
+ }
2945
+
2946
+ while (sourceIndex < sourceLength) {
2947
+ mergeArray[mergeLength].readID =
2948
+ sourceArray[sourceIndex].readID;
2949
+ mergeArray[mergeLength].position = -1;
2950
+ mergeArray[mergeLength].offset = -1;
2951
+ mergeLength++;
2952
+ sourceIndex++;
2953
+ }
2954
+
2955
+ while (targetIndex < targetLength) {
2956
+ mergeArray[mergeLength].readID =
2957
+ targetArray[targetIndex].readID;
2958
+ mergeArray[mergeLength].position = -1;
2959
+ mergeArray[mergeLength].offset = -1;
2960
+ mergeLength++;
2961
+ targetIndex++;
2962
+ }
2963
+
2964
+ *length = mergeLength;
2965
+ return mergeArray;
2966
+ }
2967
+
2968
+ ShortReadMarker *extractFrontOfNodeReads(Node * node,
2969
+ Coordinate breakpoint,
2970
+ Graph * graph, IDnum * length,
2971
+ PassageMarkerI sourceMarker,
2972
+ ShortLength * lengths)
2973
+ {
2974
+ IDnum sourceID;
2975
+ IDnum mergeLength, newLength, sourceLength;
2976
+ IDnum sourceIndex;
2977
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
2978
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
2979
+ Coordinate finish;
2980
+ Coordinate revBreakpoint;
2981
+
2982
+ if (graph->nodeReads == NULL) {
2983
+ *length = 0;
2984
+ return NULL;
2985
+ }
2986
+
2987
+ if (node == NULL) {
2988
+ *length = 0;
2989
+ return NULL;
2990
+ }
2991
+
2992
+ if (breakpoint == 0) {
2993
+ return commonNodeReads(node,
2994
+ getTwinNode(getNode
2995
+ (getPreviousInSequence
2996
+ (sourceMarker))),
2997
+ graph, length);
2998
+ }
2999
+
3000
+ sourceID = getNodeID(node) + graph->nodeCount;
3001
+ sourceArray = graph->nodeReads[sourceID];
3002
+ sourceLength = graph->nodeReadCounts[sourceID];
3003
+
3004
+ if (sourceArray == NULL) {
3005
+ *length = 0;
3006
+ return NULL;
3007
+ }
3008
+
3009
+ revBreakpoint = node->length - breakpoint;
3010
+
3011
+ mergeLength = 0;
3012
+ newLength = 0;
3013
+ sourcePtr = sourceArray;
3014
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3015
+ if (sourcePtr->position == -1) {
3016
+ newLength++;
3017
+ mergeLength++;
3018
+ } else {
3019
+ finish =
3020
+ sourcePtr->position - sourcePtr->offset +
3021
+ lengths[sourcePtr->readID - 1];
3022
+ if (sourcePtr->position < revBreakpoint)
3023
+ newLength++;
3024
+ if (finish > revBreakpoint)
3025
+ mergeLength++;
3026
+ }
3027
+ sourcePtr++;
3028
+ }
3029
+
3030
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3031
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3032
+
3033
+ mergePtr = mergeArray;
3034
+ newPtr = newArray;
3035
+ sourcePtr = sourceArray;
3036
+ mergeLength = 0;
3037
+ newLength = 0;
3038
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3039
+ if (sourcePtr->position == -1) {
3040
+ mergePtr->readID = sourcePtr->readID;
3041
+ setShortReadMarkerPosition(mergePtr, -1);
3042
+ setShortReadMarkerOffset(mergePtr, -1);
3043
+ mergePtr++;
3044
+ mergeLength++;
3045
+ newPtr->readID = sourcePtr->readID;
3046
+ setShortReadMarkerPosition(newPtr, -1);
3047
+ setShortReadMarkerOffset(newPtr, -1);
3048
+ newPtr++;
3049
+ newLength++;
3050
+ } else {
3051
+ finish =
3052
+ sourcePtr->position - sourcePtr->offset +
3053
+ lengths[sourcePtr->readID - 1];
3054
+ if (sourcePtr->position < revBreakpoint) {
3055
+ newPtr->readID = sourcePtr->readID;
3056
+ setShortReadMarkerPosition(newPtr,
3057
+ sourcePtr->
3058
+ position);
3059
+ setShortReadMarkerOffset(newPtr,
3060
+ sourcePtr->
3061
+ offset);
3062
+ newPtr++;
3063
+ newLength++;
3064
+
3065
+ // Saddle back reads:
3066
+ if (finish > revBreakpoint) {
3067
+ mergePtr->readID =
3068
+ sourcePtr->readID;
3069
+ setShortReadMarkerPosition
3070
+ (mergePtr, 0);
3071
+ setShortReadMarkerOffset(mergePtr,
3072
+ sourcePtr->
3073
+ offset +
3074
+ revBreakpoint
3075
+ -
3076
+ sourcePtr->
3077
+ position);
3078
+ mergePtr++;
3079
+ }
3080
+ } else if (finish > revBreakpoint) {
3081
+ mergePtr->readID = sourcePtr->readID;
3082
+ setShortReadMarkerPosition(mergePtr,
3083
+ sourcePtr->
3084
+ position - revBreakpoint);
3085
+ setShortReadMarkerOffset(mergePtr,
3086
+ sourcePtr->
3087
+ offset);
3088
+ mergePtr++;
3089
+ mergeLength++;
3090
+ }
3091
+ }
3092
+
3093
+ sourcePtr++;
3094
+ }
3095
+
3096
+ free(sourceArray);
3097
+ graph->nodeReads[sourceID] = newArray;
3098
+ graph->nodeReadCounts[sourceID] = newLength;
3099
+
3100
+ *length = mergeLength;
3101
+ return mergeArray;
3102
+ }
3103
+
3104
+ ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
3105
+ Graph * graph, IDnum * length,
3106
+ PassageMarkerI sourceMarker,
3107
+ ShortLength * lengths)
3108
+ {
3109
+ IDnum sourceID;
3110
+ IDnum mergeLength, newLength, sourceLength;
3111
+ IDnum sourceIndex;
3112
+ ShortReadMarker *mergeArray, *sourceArray, *newArray;
3113
+ ShortReadMarker *mergePtr, *sourcePtr, *newPtr;
3114
+ Coordinate finish;
3115
+
3116
+ if (graph->nodeReads == NULL) {
3117
+ *length = 0;
3118
+ return NULL;
3119
+ }
3120
+
3121
+ if (node == NULL) {
3122
+ *length = 0;
3123
+ return NULL;
3124
+ }
3125
+
3126
+ if (breakpoint == 0) {
3127
+ return
3128
+ commonNodeReads(getNode
3129
+ (getPreviousInSequence(sourceMarker)),
3130
+ node, graph, length);
3131
+ }
3132
+
3133
+ sourceID = getNodeID(node) + graph->nodeCount;
3134
+ sourceArray = graph->nodeReads[sourceID];
3135
+ sourceLength = graph->nodeReadCounts[sourceID];
3136
+
3137
+ if (sourceArray == NULL) {
3138
+ *length = 0;
3139
+ return NULL;
3140
+ }
3141
+
3142
+ mergeLength = 0;
3143
+ newLength = 0;
3144
+ sourcePtr = sourceArray;
3145
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3146
+ if (sourcePtr->position == -1) {
3147
+ mergeLength++;
3148
+ newLength++;
3149
+ } else {
3150
+ finish =
3151
+ sourcePtr->position - sourcePtr->offset +
3152
+ lengths[sourcePtr->readID - 1];
3153
+ if (sourcePtr->position < breakpoint)
3154
+ mergeLength++;
3155
+ if (finish > breakpoint)
3156
+ newLength++;
3157
+ }
3158
+ sourcePtr++;
3159
+ }
3160
+
3161
+ newArray = mallocOrExit(newLength, ShortReadMarker);
3162
+ mergeArray = mallocOrExit(mergeLength, ShortReadMarker);
3163
+
3164
+ mergePtr = mergeArray;
3165
+ newPtr = newArray;
3166
+ sourcePtr = sourceArray;
3167
+ for (sourceIndex = 0; sourceIndex < sourceLength; sourceIndex++) {
3168
+ if (sourcePtr->position == -1) {
3169
+ mergePtr->readID = sourcePtr->readID;
3170
+ setShortReadMarkerPosition(mergePtr, -1);
3171
+ setShortReadMarkerOffset(mergePtr, -1);
3172
+ mergePtr++;
3173
+
3174
+ newPtr->readID = sourcePtr->readID;
3175
+ setShortReadMarkerPosition(newPtr, -1);
3176
+ setShortReadMarkerOffset(newPtr, -1);
3177
+ newPtr++;
3178
+
3179
+ sourcePtr++;
3180
+ continue;
3181
+ } else {
3182
+ finish =
3183
+ sourcePtr->position - sourcePtr->offset +
3184
+ lengths[sourcePtr->readID - 1];
3185
+
3186
+ if (sourcePtr->position < breakpoint) {
3187
+ mergePtr->readID = sourcePtr->readID;
3188
+ setShortReadMarkerPosition(mergePtr,
3189
+ sourcePtr->
3190
+ position);
3191
+ setShortReadMarkerOffset(mergePtr,
3192
+ sourcePtr->
3193
+ offset);
3194
+ mergePtr++;
3195
+
3196
+ // Saddle back reads:
3197
+ if (finish > breakpoint) {
3198
+ newPtr->readID = sourcePtr->readID;
3199
+ setShortReadMarkerPosition(newPtr,
3200
+ 0);
3201
+ setShortReadMarkerOffset(newPtr,
3202
+ sourcePtr->
3203
+ offset +
3204
+ breakpoint
3205
+ -
3206
+ sourcePtr->
3207
+ position);
3208
+ newPtr++;
3209
+ }
3210
+ } else if (finish > breakpoint) {
3211
+ newPtr->readID = sourcePtr->readID;
3212
+ setShortReadMarkerPosition(newPtr,
3213
+ sourcePtr->
3214
+ position -
3215
+ breakpoint);
3216
+ setShortReadMarkerOffset(newPtr,
3217
+ sourcePtr->
3218
+ offset);
3219
+ newPtr++;
3220
+ }
3221
+ }
3222
+
3223
+ sourcePtr++;
3224
+ }
3225
+
3226
+ free(sourceArray);
3227
+ graph->nodeReads[sourceID] = newArray;
3228
+ graph->nodeReadCounts[sourceID] = newLength;
3229
+
3230
+ *length = mergeLength;
3231
+ return mergeArray;
3232
+ }
3233
+
3234
+ void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
3235
+ Graph * graph)
3236
+ {
3237
+ IDnum targetID, targetLength, targetIndex, targetVal;
3238
+ IDnum sourceLength, sourceIndex, sourceVal;
3239
+ IDnum mergeLength;
3240
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray;
3241
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr;
3242
+ Coordinate targetPosition;
3243
+ //ShortLength nodeLength = (ShortLength) getNodeLength(node);
3244
+ ShortLength targetOffset;
3245
+
3246
+ if (graph->nodeReads == NULL || reads == NULL || node == NULL)
3247
+ return;
3248
+
3249
+ targetID = getNodeID(node) + graph->nodeCount;
3250
+ targetArray = graph->nodeReads[targetID];
3251
+ targetLength = graph->nodeReadCounts[targetID];
3252
+ targetPtr = targetArray;
3253
+
3254
+ sourceArray = reads;
3255
+ sourceLength = readCount;
3256
+ sourcePtr = sourceArray;
3257
+
3258
+ if (targetArray == NULL) {
3259
+ mergeArray =
3260
+ mallocOrExit(sourceLength, ShortReadMarker);
3261
+ mergePtr = mergeArray;
3262
+
3263
+ sourceIndex = 0;
3264
+ while (sourceIndex < sourceLength) {
3265
+ mergePtr->readID = sourcePtr->readID;
3266
+ setShortReadMarkerPosition(mergePtr, -1);
3267
+ setShortReadMarkerOffset(mergePtr, -1);
3268
+ mergePtr++;
3269
+ sourcePtr++;
3270
+ sourceIndex++;
3271
+ }
3272
+
3273
+ graph->nodeReads[targetID] = mergeArray;
3274
+ graph->nodeReadCounts[targetID] = sourceLength;
3275
+ return;
3276
+ }
3277
+
3278
+ mergeArray =
3279
+ mallocOrExit(sourceLength +
3280
+ targetLength, ShortReadMarker);
3281
+ mergePtr = mergeArray;
3282
+
3283
+ mergeLength = 0;
3284
+ sourceIndex = 0;
3285
+ targetIndex = 0;
3286
+ sourceVal = sourcePtr->readID;
3287
+ targetVal = targetPtr->readID;
3288
+ targetPosition = targetPtr->position;
3289
+ targetOffset = targetPtr->offset;
3290
+
3291
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3292
+ if (sourceVal < targetVal) {
3293
+ mergePtr->readID = sourceVal;
3294
+ setShortReadMarkerPosition(mergePtr, -1);
3295
+ setShortReadMarkerOffset(mergePtr, -1);
3296
+ sourceIndex++;
3297
+ sourcePtr++;
3298
+ if (sourceIndex < sourceLength)
3299
+ sourceVal = sourcePtr->readID;
3300
+ } else if (sourceVal == targetVal) {
3301
+ mergePtr->readID = sourceVal;
3302
+ setShortReadMarkerPosition(mergePtr, -1);
3303
+ setShortReadMarkerOffset(mergePtr, -1);
3304
+ sourceIndex++;
3305
+ sourcePtr++;
3306
+ if (sourceIndex < sourceLength)
3307
+ sourceVal = sourcePtr->readID;
3308
+ targetIndex++;
3309
+ targetPtr++;
3310
+ if (targetIndex < targetLength) {
3311
+ targetVal = targetPtr->readID;
3312
+ targetPosition = targetPtr->position;
3313
+ targetOffset = targetPtr->offset;
3314
+ }
3315
+ } else {
3316
+ mergePtr->readID = targetVal;
3317
+ setShortReadMarkerPosition(mergePtr,
3318
+ targetPosition);
3319
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3320
+ targetIndex++;
3321
+ targetPtr++;
3322
+ if (targetIndex < targetLength) {
3323
+ targetVal = targetPtr->readID;
3324
+ targetPosition = targetPtr->position;
3325
+ targetOffset = targetPtr->offset;
3326
+ }
3327
+ }
3328
+
3329
+ mergeLength++;
3330
+ mergePtr++;
3331
+ }
3332
+
3333
+ while (sourceIndex < sourceLength) {
3334
+ mergePtr->readID = sourcePtr->readID;
3335
+ setShortReadMarkerPosition(mergePtr, -1);
3336
+ setShortReadMarkerOffset(mergePtr, -1);
3337
+ mergeLength++;
3338
+ mergePtr++;
3339
+ sourceIndex++;
3340
+ sourcePtr++;
3341
+ }
3342
+
3343
+ while (targetIndex < targetLength) {
3344
+ mergePtr->readID = targetPtr->readID;
3345
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3346
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3347
+ mergeLength++;
3348
+ mergePtr++;
3349
+ targetIndex++;
3350
+ targetPtr++;
3351
+ }
3352
+
3353
+ free(targetArray);
3354
+ graph->nodeReads[targetID] = mergeArray;
3355
+ graph->nodeReadCounts[targetID] = mergeLength;
3356
+ }
3357
+
3358
+ static inline Coordinate min(Coordinate A, Coordinate B)
3359
+ {
3360
+ return A < B ? A : B;
3361
+ }
3362
+
3363
+ static inline ShortLength min_short(ShortLength A, ShortLength B)
3364
+ {
3365
+ return A < B ? A : B;
3366
+ }
3367
+
3368
+ void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
3369
+ Node * target, Graph * graph)
3370
+ {
3371
+ IDnum targetID = getNodeID(target) + graph->nodeCount;
3372
+ ShortReadMarker *targetArray = graph->nodeReads[targetID];
3373
+ IDnum targetLength = graph->nodeReadCounts[targetID];
3374
+ ShortReadMarker *targetPtr = targetArray;
3375
+ ShortReadMarker *sourcePtr = sourceArray;
3376
+ ShortReadMarker *mergeArray, *mergePtr;
3377
+ IDnum mergeLength;
3378
+ Coordinate targetPosition, sourcePosition;
3379
+ ShortLength targetOffset, sourceOffset;
3380
+ IDnum targetIndex, targetVal, sourceIndex, sourceVal;
3381
+
3382
+ if (sourceLength == 0) {
3383
+ free(sourceArray);
3384
+ return;
3385
+ }
3386
+
3387
+ if (targetLength == 0) {
3388
+ free(targetArray);
3389
+ graph->nodeReads[targetID] = sourceArray;
3390
+ graph->nodeReadCounts[targetID] = sourceLength;
3391
+ return;
3392
+ }
3393
+
3394
+ mergeArray =
3395
+ mallocOrExit(sourceLength +
3396
+ targetLength, ShortReadMarker);
3397
+ mergePtr = mergeArray;
3398
+
3399
+ mergeLength = 0;
3400
+ sourceIndex = 0;
3401
+ targetIndex = 0;
3402
+ targetVal = targetPtr->readID;
3403
+ targetPosition = targetPtr->position;
3404
+ targetOffset = targetPtr->offset;
3405
+ sourceVal = sourcePtr->readID;
3406
+ sourcePosition = sourcePtr->position;
3407
+ sourceOffset = sourcePtr->offset;
3408
+
3409
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3410
+ if (sourceVal < targetVal) {
3411
+ mergePtr->readID = sourceVal;
3412
+ setShortReadMarkerPosition(mergePtr,
3413
+ sourcePosition);
3414
+ setShortReadMarkerOffset(mergePtr, sourceOffset);
3415
+ sourceIndex++;
3416
+ if (sourceIndex < sourceLength) {
3417
+ sourcePtr++;
3418
+ sourceVal = sourcePtr->readID;
3419
+ sourcePosition = sourcePtr->position;
3420
+ sourceOffset = sourcePtr->offset;
3421
+ }
3422
+ } else if (sourceVal == targetVal) {
3423
+ mergePtr->readID = sourceVal;
3424
+ if (sourcePosition == -1 && targetPosition == -1) {
3425
+ setShortReadMarkerPosition(mergePtr, -1);
3426
+ setShortReadMarkerOffset(mergePtr, -1);
3427
+ } else if (sourcePosition == -1) {
3428
+ setShortReadMarkerPosition(mergePtr,
3429
+ targetPosition);
3430
+ setShortReadMarkerOffset(mergePtr,
3431
+ targetOffset);
3432
+ } else if (targetPosition == -1) {
3433
+ setShortReadMarkerPosition(mergePtr,
3434
+ sourcePosition);
3435
+ setShortReadMarkerOffset(mergePtr,
3436
+ sourceOffset);
3437
+ } else {
3438
+ setShortReadMarkerPosition(mergePtr,
3439
+ min
3440
+ (sourcePosition,
3441
+ targetPosition));
3442
+ setShortReadMarkerOffset(mergePtr,
3443
+ min_short
3444
+ (sourceOffset,
3445
+ targetOffset));
3446
+ }
3447
+ sourceIndex++;
3448
+ if (sourceIndex < sourceLength) {
3449
+ sourcePtr++;
3450
+ sourceVal = sourcePtr->readID;
3451
+ sourcePosition = sourcePtr->position;
3452
+ sourceOffset = sourcePtr->offset;
3453
+ }
3454
+ targetIndex++;
3455
+ if (targetIndex < targetLength) {
3456
+ targetPtr++;
3457
+ targetVal = targetPtr->readID;
3458
+ targetPosition = targetPtr->position;
3459
+ targetOffset = targetPtr->offset;
3460
+ }
3461
+ } else {
3462
+ mergePtr->readID = targetVal;
3463
+ setShortReadMarkerPosition(mergePtr,
3464
+ targetPosition);
3465
+ setShortReadMarkerOffset(mergePtr, targetOffset);
3466
+ targetIndex++;
3467
+ if (targetIndex < targetLength) {
3468
+ targetPtr++;
3469
+ targetVal = targetPtr->readID;
3470
+ targetPosition = targetPtr->position;
3471
+ targetOffset = targetPtr->offset;
3472
+ }
3473
+ }
3474
+
3475
+ mergeLength++;
3476
+ mergePtr++;
3477
+ }
3478
+
3479
+ while (sourceIndex < sourceLength) {
3480
+ mergePtr->readID = sourcePtr->readID;
3481
+ setShortReadMarkerPosition(mergePtr, sourcePtr->position);
3482
+ setShortReadMarkerOffset(mergePtr, sourcePtr->offset);
3483
+ mergeLength++;
3484
+ mergePtr++;
3485
+ sourceIndex++;
3486
+ sourcePtr++;
3487
+ }
3488
+
3489
+ while (targetIndex < targetLength) {
3490
+ mergePtr->readID = targetPtr->readID;
3491
+ setShortReadMarkerPosition(mergePtr, targetPtr->position);
3492
+ setShortReadMarkerOffset(mergePtr, targetPtr->offset);
3493
+ mergeLength++;
3494
+ mergePtr++;
3495
+ targetIndex++;
3496
+ targetPtr++;
3497
+ }
3498
+
3499
+ free(targetArray);
3500
+ graph->nodeReads[targetID] = mergeArray;
3501
+ graph->nodeReadCounts[targetID] = mergeLength;
3502
+
3503
+ free(sourceArray);
3504
+ }
3505
+
3506
+ void mergeNodeReads(Node * target, Node * source, Graph * graph)
3507
+ {
3508
+ IDnum sourceID, sourceLength;
3509
+ ShortReadMarker *sourceArray;
3510
+
3511
+ if (graph->nodeReads == NULL || source == NULL || target == NULL)
3512
+ return;
3513
+
3514
+ sourceID = getNodeID(source) + graph->nodeCount;
3515
+ sourceArray = graph->nodeReads[sourceID];
3516
+ sourceLength = graph->nodeReadCounts[sourceID];
3517
+
3518
+ if (sourceArray == NULL)
3519
+ return;
3520
+
3521
+ graph->nodeReads[sourceID] = NULL;
3522
+ graph->nodeReadCounts[sourceID] = 0;
3523
+
3524
+ injectShortReads(sourceArray, sourceLength, target, graph);
3525
+ }
3526
+
3527
+ void foldSymmetricalNodeReads(Node * node, Graph * graph)
3528
+ {
3529
+ IDnum targetID, targetLength, targetIndex;
3530
+ IDnum sourceID, sourceLength, sourceIndex;
3531
+ IDnum targetVal = 0;
3532
+ IDnum sourceVal = 0;
3533
+ IDnum mergeLength;
3534
+ ShortReadMarker *sourceArray, *targetArray, *mergeArray,
3535
+ *mergeArray2;
3536
+ ShortReadMarker *sourcePtr, *targetPtr, *mergePtr, *mergePtr2;
3537
+
3538
+ if (graph->nodeReads == NULL || node == NULL)
3539
+ return;
3540
+
3541
+ sourceID = getNodeID(node) + graph->nodeCount;
3542
+ sourceArray = graph->nodeReads[sourceID];
3543
+ sourceLength = graph->nodeReadCounts[sourceID];
3544
+ sourcePtr = sourceArray;
3545
+
3546
+ targetID = -getNodeID(node) + graph->nodeCount;
3547
+ targetArray = graph->nodeReads[targetID];
3548
+ targetLength = graph->nodeReadCounts[targetID];
3549
+ targetPtr = targetArray;
3550
+
3551
+ if (sourceArray == NULL && targetArray == NULL)
3552
+ return;
3553
+
3554
+ mergeArray =
3555
+ mallocOrExit(sourceLength +
3556
+ targetLength, ShortReadMarker);
3557
+ mergeArray2 =
3558
+ mallocOrExit(sourceLength +
3559
+ targetLength, ShortReadMarker);
3560
+ mergePtr = mergeArray;
3561
+ mergePtr2 = mergeArray2;
3562
+
3563
+ mergeLength = 0;
3564
+ sourceIndex = 0;
3565
+ targetIndex = 0;
3566
+ if (targetIndex < targetLength)
3567
+ targetVal = targetPtr->readID;
3568
+ if (sourceIndex < sourceLength)
3569
+ sourceVal = sourcePtr->readID;
3570
+
3571
+ while (sourceIndex < sourceLength && targetIndex < targetLength) {
3572
+ if (sourceVal < targetVal) {
3573
+ mergePtr->readID = sourceVal;
3574
+ setShortReadMarkerPosition(mergePtr, -1);
3575
+ setShortReadMarkerOffset(mergePtr, -1);
3576
+ mergePtr2->readID = sourceVal;
3577
+ setShortReadMarkerPosition(mergePtr2, -1);
3578
+ setShortReadMarkerOffset(mergePtr2, -1);
3579
+ sourceIndex++;
3580
+ sourcePtr++;
3581
+ if (sourceIndex < sourceLength)
3582
+ sourceVal = sourcePtr->readID;
3583
+ } else if (sourceVal == targetVal) {
3584
+ mergePtr->readID = sourceVal;
3585
+ setShortReadMarkerPosition(mergePtr, -1);
3586
+ setShortReadMarkerOffset(mergePtr, -1);
3587
+ mergePtr2->readID = sourceVal;
3588
+ setShortReadMarkerPosition(mergePtr2, -1);
3589
+ setShortReadMarkerOffset(mergePtr2, -1);
3590
+ sourceIndex++;
3591
+ sourcePtr++;
3592
+ if (sourceIndex < sourceLength)
3593
+ sourceVal = sourcePtr->readID;
3594
+ targetIndex++;
3595
+ targetPtr++;
3596
+ if (targetIndex < targetLength)
3597
+ targetVal = targetPtr->readID;
3598
+ } else {
3599
+ mergePtr->readID = targetVal;
3600
+ setShortReadMarkerPosition(mergePtr, -1);
3601
+ setShortReadMarkerOffset(mergePtr, -1);
3602
+ mergePtr2->readID = targetVal;
3603
+ setShortReadMarkerPosition(mergePtr2, -1);
3604
+ setShortReadMarkerOffset(mergePtr2, -1);
3605
+ targetIndex++;
3606
+ targetPtr++;
3607
+ if (targetIndex < targetLength)
3608
+ targetVal = targetPtr->readID;
3609
+ }
3610
+
3611
+ mergeLength++;
3612
+ mergePtr++;
3613
+ mergePtr2++;
3614
+ }
3615
+
3616
+ while (sourceIndex < sourceLength) {
3617
+ mergePtr->readID = sourcePtr->readID;
3618
+ setShortReadMarkerPosition(mergePtr, -1);
3619
+ setShortReadMarkerOffset(mergePtr, -1);
3620
+ mergePtr2->readID = sourcePtr->readID;
3621
+ setShortReadMarkerPosition(mergePtr2, -1);
3622
+ setShortReadMarkerOffset(mergePtr2, -1);
3623
+ mergeLength++;
3624
+ mergePtr++;
3625
+ mergePtr2++;
3626
+ sourceIndex++;
3627
+ sourcePtr++;
3628
+ }
3629
+
3630
+ while (targetIndex < targetLength) {
3631
+ mergePtr->readID = targetPtr->readID;
3632
+ setShortReadMarkerPosition(mergePtr, -1);
3633
+ setShortReadMarkerOffset(mergePtr, -1);
3634
+ mergePtr2->readID = targetPtr->readID;
3635
+ setShortReadMarkerPosition(mergePtr2, -1);
3636
+ setShortReadMarkerOffset(mergePtr2, -1);
3637
+ mergeLength++;
3638
+ mergePtr++;
3639
+ mergePtr2++;
3640
+ targetIndex++;
3641
+ targetPtr++;
3642
+ }
3643
+
3644
+ free(targetArray);
3645
+ graph->nodeReads[targetID] = mergeArray;
3646
+ graph->nodeReadCounts[targetID] = mergeLength;
3647
+
3648
+ free(sourceArray);
3649
+ graph->nodeReads[sourceID] = mergeArray2;
3650
+ graph->nodeReadCounts[sourceID] = mergeLength;
3651
+ }
3652
+
3653
+ void shareReadStarts(Node * target, Node * source, Graph * graph)
3654
+ {
3655
+ ShortReadMarker *sourceArray;
3656
+ IDnum sourceLength, sourceID;
3657
+
3658
+ if (graph->nodeReads == NULL)
3659
+ return;
3660
+
3661
+ if (target == NULL || source == NULL)
3662
+ return;
3663
+
3664
+ sourceID = source->ID + graph->nodeCount;
3665
+ sourceArray = graph->nodeReads[sourceID];
3666
+ sourceLength = graph->nodeReadCounts[sourceID];
3667
+
3668
+ if (sourceArray == NULL)
3669
+ return;
3670
+
3671
+ spreadReadIDs(sourceArray, sourceLength, target, graph);
3672
+ }
3673
+
3674
+ ShortReadMarker **getNodeToReadMappings(Graph * graph)
3675
+ {
3676
+ return graph->nodeReads;
3677
+ }
3678
+
3679
+ IDnum getShortReadMarkerID(ShortReadMarker * marker)
3680
+ {
3681
+ return marker->readID;
3682
+ }
3683
+
3684
+ inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker)
3685
+ {
3686
+ return marker->offset;
3687
+ }
3688
+
3689
+ inline void setShortReadMarkerOffset(ShortReadMarker * marker,
3690
+ ShortLength offset)
3691
+ {
3692
+ marker->offset = offset;
3693
+ }
3694
+
3695
+ IDnum *getNodeReadCounts(Graph * graph)
3696
+ {
3697
+ return graph->nodeReadCounts;
3698
+ }
3699
+
3700
+ int getWordLength(Graph * graph)
3701
+ {
3702
+ return graph->wordLength;
3703
+ }
3704
+
3705
+ ShortReadMarker *getNodeReads(Node * node, Graph * graph)
3706
+ {
3707
+ IDnum id = node->ID + graph->nodeCount;
3708
+ return graph->nodeReads[id];
3709
+ }
3710
+
3711
+ IDnum getNodeReadCount(Node * node, Graph * graph)
3712
+ {
3713
+ IDnum id = node->ID + graph->nodeCount;
3714
+ return graph->nodeReadCounts[id];
3715
+ }
3716
+
3717
+ inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker)
3718
+ {
3719
+ return marker->position;
3720
+ }
3721
+
3722
+ inline void setShortReadMarkerPosition(ShortReadMarker * marker,
3723
+ Coordinate position)
3724
+ {
3725
+ if (position < -100)
3726
+ return;
3727
+
3728
+ marker->position = position;
3729
+ }
3730
+
3731
+ ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
3732
+ IDnum index)
3733
+ {
3734
+ return &(array[index]);
3735
+ }
3736
+
3737
+ void destroyGraph(Graph * graph)
3738
+ {
3739
+ IDnum index;
3740
+ Node *node;
3741
+ for (index = 1; index <= graph->nodeCount; index++) {
3742
+ node = getNodeInGraph(graph, index);
3743
+ if (node != NULL)
3744
+ destroyNode(node, graph);
3745
+ }
3746
+
3747
+ if (graph->gapMarkers)
3748
+ deactivateGapMarkers(graph);
3749
+
3750
+ free(graph->nodes);
3751
+ destroyRecycleBin(nodeMemory);
3752
+ destroyRecycleBin(arcMemory);
3753
+ destroyAllPassageMarkers();
3754
+ free(graph->arcLookupTable);
3755
+ free(graph->nodeReads);
3756
+ free(graph->nodeReadCounts);
3757
+ free(graph);
3758
+ }
3759
+
3760
+ void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
3761
+ Coordinate insertLength_std_dev)
3762
+ {
3763
+ graph->insertLengths[cat] = insertLength;
3764
+ graph->insertLengths_var[cat] =
3765
+ insertLength_std_dev * insertLength_std_dev;
3766
+ }
3767
+
3768
+ Coordinate getInsertLength(Graph * graph, Category cat)
3769
+ {
3770
+ return graph->insertLengths[cat / 2];
3771
+ }
3772
+
3773
+ double getInsertLength_var(Graph * graph, Category cat)
3774
+ {
3775
+ return graph->insertLengths_var[cat / 2];
3776
+ }
3777
+
3778
+ void activateGapMarkers(Graph * graph)
3779
+ {
3780
+ graph->gapMarkers =
3781
+ callocOrExit(graph->nodeCount + 1, GapMarker *);
3782
+ gapMarkerMemory = newRecycleBin(sizeof(GapMarker), GAPBLOCKSIZE);
3783
+ }
3784
+
3785
+ void deactivateGapMarkers(Graph * graph)
3786
+ {
3787
+ free(graph->gapMarkers);
3788
+ graph->gapMarkers = NULL;
3789
+ destroyRecycleBin(gapMarkerMemory);
3790
+ gapMarkerMemory = NULL;
3791
+ }
3792
+
3793
+ static GapMarker *allocateGapMarker()
3794
+ {
3795
+ return (GapMarker *) allocatePointer(gapMarkerMemory);
3796
+ }
3797
+
3798
+ void appendGap(Node * node, Coordinate length, Graph * graph)
3799
+ {
3800
+ IDnum nodeID = getNodeID(node);
3801
+ GapMarker *marker = allocateGapMarker();
3802
+ GapMarker *tmp;
3803
+
3804
+ marker->length = length;
3805
+
3806
+ if (nodeID > 0) {
3807
+ marker->position = node->length;
3808
+ marker->next = graph->gapMarkers[nodeID];
3809
+ graph->gapMarkers[nodeID] = marker;
3810
+ } else {
3811
+ for (tmp = graph->gapMarkers[-nodeID]; tmp != NULL;
3812
+ tmp = tmp->next)
3813
+ tmp->position += length;
3814
+
3815
+ marker->position = 0;
3816
+ marker->next = graph->gapMarkers[-nodeID];
3817
+ graph->gapMarkers[-nodeID] = marker;
3818
+ }
3819
+
3820
+ addBufferToDescriptor(node, length);
3821
+ }
3822
+
3823
+ void appendNodeGaps(Node * destination, Node * source, Graph * graph)
3824
+ {
3825
+ IDnum destinationID = getNodeID(destination);
3826
+ IDnum sourceID = getNodeID(source);
3827
+ GapMarker *marker;
3828
+
3829
+ if (graph->gapMarkers == NULL)
3830
+ return;
3831
+
3832
+ if (destinationID > 0 && sourceID > 0) {
3833
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3834
+ marker = marker->next)
3835
+ marker->position += destination->length;
3836
+ } else if (destinationID > 0 && sourceID < 0) {
3837
+ sourceID = -sourceID;
3838
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3839
+ marker = marker->next)
3840
+ marker->position =
3841
+ source->length + destination->length -
3842
+ marker->position - marker->length;
3843
+ } else if (destinationID < 0 && sourceID > 0) {
3844
+ destinationID = -destinationID;
3845
+ for (marker = graph->gapMarkers[destinationID];
3846
+ marker != NULL; marker = marker->next)
3847
+ marker->position += source->length;
3848
+
3849
+ for (marker = graph->gapMarkers[sourceID]; marker != NULL;
3850
+ marker = marker->next)
3851
+ marker->position =
3852
+ source->length - marker->position -
3853
+ marker->length;
3854
+ } else {
3855
+ destinationID = -destinationID;
3856
+ sourceID = -sourceID;
3857
+ for (marker = graph->gapMarkers[destinationID];
3858
+ marker != NULL; marker = marker->next)
3859
+ marker->position += source->length;
3860
+ }
3861
+
3862
+ if (graph->gapMarkers[destinationID] == NULL)
3863
+ graph->gapMarkers[destinationID] =
3864
+ graph->gapMarkers[sourceID];
3865
+ else {
3866
+ marker = graph->gapMarkers[destinationID];
3867
+ while (marker->next != NULL)
3868
+ marker = marker->next;
3869
+ marker->next = graph->gapMarkers[sourceID];
3870
+ }
3871
+
3872
+ graph->gapMarkers[sourceID] = NULL;
3873
+ }
3874
+
3875
+ GapMarker *getGap(Node * node, Graph * graph)
3876
+ {
3877
+ IDnum nodeID = getNodeID(node);
3878
+
3879
+ if (graph->gapMarkers == NULL)
3880
+ return NULL;
3881
+
3882
+ if (nodeID < 0)
3883
+ nodeID = -nodeID;
3884
+
3885
+ return graph->gapMarkers[nodeID];
3886
+ }
3887
+
3888
+ GapMarker *getNextGap(GapMarker * marker)
3889
+ {
3890
+ return marker->next;
3891
+ }
3892
+
3893
+ Coordinate getGapStart(GapMarker * marker)
3894
+ {
3895
+ return marker->position;
3896
+ }
3897
+
3898
+ Coordinate getGapFinish(GapMarker * marker)
3899
+ {
3900
+ return marker->position + marker->length;
3901
+ }
3902
+
3903
+ void reallocateNodeDescriptor(Node * node, Coordinate length) {
3904
+ Coordinate arrayLength, index, shift;
3905
+ Node * twin = node->twinNode;
3906
+ Descriptor * array;
3907
+ Nucleotide nucleotide;
3908
+
3909
+ if (length < node->length)
3910
+ exitErrorf(EXIT_FAILURE, true, "Sum of node lengths smaller than first!");
3911
+
3912
+ shift = length - node->length;
3913
+
3914
+ arrayLength = length / 4;
3915
+ if (length % 4)
3916
+ arrayLength++;
3917
+
3918
+ node->descriptor = reallocOrExit(node->descriptor, arrayLength, Descriptor);
3919
+
3920
+ array = callocOrExit(arrayLength, Descriptor);
3921
+ for (index = node->length - 1; index >= 0; index--) {
3922
+ nucleotide = getNucleotideInDescriptor(twin->descriptor, index);
3923
+ writeNucleotideInDescriptor(nucleotide, array, index + shift);
3924
+ }
3925
+
3926
+ free(twin->descriptor);
3927
+ twin->descriptor = array;
3928
+ }
3929
+
3930
+ boolean doubleStrandedGraph(Graph * graph) {
3931
+ return graph->double_stranded;
3932
+ }