finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,64 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _SSCAFFOLD_H_
22
+ #define _SCAFFOLD_H_
23
+
24
+ typedef struct connection_st Connection;
25
+
26
+ //General scaffold function
27
+ void buildScaffold(Graph * graph,
28
+ ReadSet * reads,
29
+ boolean * dubious,
30
+ boolean * shadows);
31
+ Connection *createNewConnection(IDnum nodeID, IDnum node2ID,
32
+ IDnum direct_count,
33
+ IDnum paired_count,
34
+ Coordinate distance,
35
+ double variance);
36
+ void readjustConnection(Connection * connect, Coordinate distance,
37
+ double variance, IDnum direct_count,
38
+ IDnum paired_count);
39
+ void destroyConnection(Connection * connect, IDnum nodeID);
40
+
41
+ void cleanScaffoldMemory();
42
+
43
+ void setUnreliableConnectionCutoff(int val);
44
+ void setPairedExpFraction(double x);
45
+
46
+ // Connection handlers
47
+ Connection * getConnection(Node * node);
48
+
49
+ Node * getConnectionDestination(Connection * connect);
50
+ Coordinate getConnectionDistance(Connection * connect);
51
+ Connection * getNextConnection(Connection * connect);
52
+ Connection * getTwinConnection(Connection * connect);
53
+ double getConnectionVariance(Connection * connect);
54
+ IDnum getConnectionDirectCount(Connection * connect);
55
+ IDnum getConnectionPairedCount(Connection * connect);
56
+
57
+ void incrementConnectionDistance(Connection * connect, Coordinate increment);
58
+ void printConnections(ReadSet * reads, boolean * shadows);
59
+ void printScaffold(Graph * graph,
60
+ ReadSet * reads,
61
+ boolean * dubious,
62
+ boolean * shadows);
63
+
64
+ #endif
@@ -0,0 +1,1243 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <time.h>
24
+ #include <math.h>
25
+
26
+ #include "globals.h"
27
+ #include "graph.h"
28
+ #include "concatenatedGraph.h"
29
+ #include "recycleBin.h"
30
+ #include "locallyCorrectedGraph.h"
31
+ #include "passageMarker.h"
32
+ #include "readSet.h"
33
+ #include "utility.h"
34
+ #include "scaffold.h"
35
+
36
+ #define BLOCK_SIZE 100000
37
+ #define LN2 1.4
38
+ #define BACKTRACK_CUTOFF 100
39
+
40
+ typedef struct miniConnection_st MiniConnection;
41
+
42
+ struct miniConnection_st {
43
+ Connection *frontReference;
44
+ Connection *backReference;
45
+ NodeList *nodeList;
46
+ float variance;
47
+ IDnum distance;
48
+ } ATTRIBUTE_PACKED;
49
+
50
+ // Global pointers
51
+ static Graph *graph;
52
+ static NodeList *markedNodes;
53
+ static RecycleBin *nodeListMemory = NULL;
54
+ static MiniConnection *localScaffold = NULL;
55
+
56
+ static NodeList *allocateNodeList()
57
+ {
58
+ if (nodeListMemory == NULL)
59
+ nodeListMemory =
60
+ newRecycleBin(sizeof(NodeList), BLOCK_SIZE);
61
+
62
+ return allocatePointer(nodeListMemory);
63
+ }
64
+
65
+ static void deallocateNodeList(NodeList * nodeList)
66
+ {
67
+ deallocatePointer(nodeListMemory, nodeList);
68
+ }
69
+
70
+ static NodeList *recordNode(Node * node)
71
+ {
72
+ NodeList *nodeList = allocateNodeList();
73
+ nodeList->node = node;
74
+ nodeList->next = markedNodes;
75
+ nodeList->previous = NULL;
76
+
77
+ if (markedNodes != NULL)
78
+ markedNodes->previous = nodeList;
79
+
80
+ markedNodes = nodeList;
81
+
82
+ return nodeList;
83
+ }
84
+
85
+ static void destroyNodeList(NodeList * nodeList)
86
+ {
87
+ //velvetLog("Destroy NL %p > %p > %p\n", nodeList->previous, nodeList, nodeList->next);
88
+
89
+ if (nodeList->previous != NULL)
90
+ nodeList->previous->next = nodeList->next;
91
+ else
92
+ markedNodes = nodeList->next;
93
+
94
+ if (nodeList->next != NULL)
95
+ nodeList->next->previous = nodeList->previous;
96
+
97
+ nodeList->previous = nodeList->next = NULL;
98
+
99
+ deallocateNodeList(nodeList);
100
+ }
101
+
102
+ static Node *popNodeRecord()
103
+ {
104
+ MiniConnection *localConnect;
105
+
106
+ NodeList *nodeList = markedNodes;
107
+ Node *node;
108
+
109
+ if (markedNodes == NULL)
110
+ return NULL;
111
+
112
+ node = nodeList->node;
113
+ markedNodes = nodeList->next;
114
+ if (markedNodes != NULL)
115
+ markedNodes->previous = NULL;
116
+
117
+ localConnect =
118
+ &localScaffold[getNodeID(nodeList->node) + nodeCount(graph)];
119
+ localConnect->nodeList = NULL;
120
+
121
+ deallocateNodeList(nodeList);
122
+ return node;
123
+ }
124
+
125
+ static void resetMiniConnection(Node * node, MiniConnection * localConnect,
126
+ Coordinate distance, double variance,
127
+ Connection * frontReference,
128
+ Connection * backReference, boolean status)
129
+ {
130
+ setSingleNodeStatus(node, status);
131
+ localConnect->distance = distance;
132
+ localConnect->variance = variance;
133
+ localConnect->frontReference = frontReference;
134
+ localConnect->backReference = backReference;
135
+ localConnect->nodeList = recordNode(node);
136
+ }
137
+
138
+ static void setEmptyMiniConnection(Node * node)
139
+ {
140
+ MiniConnection *localConnect =
141
+ &localScaffold[getNodeID(node) + nodeCount(graph)];
142
+ localConnect->distance = 0;
143
+ localConnect->variance = 1;
144
+ localConnect->frontReference = NULL;
145
+ localConnect->backReference = NULL;
146
+ localConnect->nodeList = recordNode(node);
147
+ setSingleNodeStatus(node, true);
148
+ }
149
+
150
+ static void readjustMiniConnection(Node * node,
151
+ MiniConnection * localConnect,
152
+ Coordinate distance,
153
+ Coordinate min_distance,
154
+ double variance,
155
+ Connection * frontReference,
156
+ Connection * backReference)
157
+ {
158
+
159
+ localConnect->distance =
160
+ (variance * localConnect->distance +
161
+ distance * localConnect->variance) / (variance +
162
+ localConnect->variance);
163
+ localConnect->variance =
164
+ (variance *
165
+ localConnect->variance) / (variance + localConnect->variance);
166
+
167
+ if (frontReference != NULL)
168
+ localConnect->frontReference = frontReference;
169
+ if (backReference != NULL)
170
+ localConnect->backReference = backReference;
171
+
172
+ if (localConnect->distance > min_distance)
173
+ setSingleNodeStatus(node, 1);
174
+ else
175
+ setSingleNodeStatus(node, -1);
176
+ }
177
+
178
+ static void integrateDerivativeDistances(Connection * connect,
179
+ Coordinate min_distance,
180
+ boolean direction)
181
+ {
182
+ Node *reference = getConnectionDestination(connect);
183
+ Node *destination;
184
+ IDnum destinationID;
185
+ Coordinate distance, baseDistance;
186
+ double variance, baseVariance;
187
+ Connection *connect2;
188
+ MiniConnection *localConnect;
189
+
190
+ // debug
191
+ IDnum counter = 0;
192
+
193
+ if (!getUniqueness(reference))
194
+ return;
195
+
196
+ //velvetLog("Opposite node %li length %li at %li ± %f\n", getNodeID(reference), getNodeLength(reference), getConnectionDistance(connect), getConnectionVariance(connect));
197
+
198
+ baseDistance = getConnectionDistance(connect);
199
+ baseVariance = getConnectionVariance(connect);
200
+
201
+ for (connect2 = getConnection(reference);
202
+ connect2 != NULL; connect2 = getNextConnection(connect2)) {
203
+ // Avoid null derivative
204
+ if (connect2 == getTwinConnection(connect))
205
+ continue;
206
+
207
+ destination = getConnectionDestination(connect2);
208
+
209
+ // Beware of directionality
210
+ if (!direction)
211
+ destination = getTwinNode(destination);
212
+
213
+ // Derivate values
214
+ destinationID = getNodeID(destination);
215
+ // Beware of directionality (bis)
216
+ if (direction)
217
+ distance = baseDistance - getConnectionDistance(connect2);
218
+ else
219
+ distance = getConnectionDistance(connect2) - baseDistance;
220
+ variance = getConnectionVariance(connect2) + baseVariance;
221
+ localConnect =
222
+ &localScaffold[destinationID + nodeCount(graph)];
223
+
224
+ // Avoid over-projection
225
+ if (distance < min_distance) {
226
+ //velvetLog("Node %li not at distance %li± %f (min %li)\n", destinationID, distance, variance, min_distance);
227
+ continue;
228
+ }
229
+
230
+ counter++;
231
+
232
+ if (getNodeStatus(destination)) {
233
+ readjustMiniConnection(destination, localConnect,
234
+ distance, min_distance,
235
+ variance, NULL, NULL);
236
+ } else
237
+ resetMiniConnection(destination, localConnect,
238
+ distance, variance, NULL, NULL,
239
+ true);
240
+
241
+ //velvetLog("Node %li now at distance %li\n", destinationID, localConnect->distance);
242
+ }
243
+
244
+ //velvetLog("%li secondary distances added\n", counter);
245
+ }
246
+
247
+ static void markInterestingNodes(Node * node)
248
+ {
249
+ Connection *connect;
250
+ Node *destination;
251
+ MiniConnection *localConnect;
252
+ Coordinate min_distance =
253
+ getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
254
+
255
+ // Mark own node
256
+ setEmptyMiniConnection(node);
257
+
258
+ // Loop thru primary scaffold
259
+ for (connect = getConnection(node); connect != NULL;
260
+ connect = getNextConnection(connect)) {
261
+ destination = getTwinNode(getConnectionDestination(connect));
262
+
263
+ localConnect =
264
+ &localScaffold[getNodeID(destination) +
265
+ nodeCount(graph)];
266
+
267
+ if (getNodeStatus(destination)) {
268
+ readjustMiniConnection(destination, localConnect,
269
+ getConnectionDistance(connect),
270
+ min_distance,
271
+ getConnectionVariance(connect), connect,
272
+ NULL);
273
+ localConnect->backReference = NULL;
274
+ } else {
275
+ resetMiniConnection(destination, localConnect,
276
+ getConnectionDistance(connect),
277
+ getConnectionVariance(connect), connect,
278
+ NULL, true);
279
+ }
280
+
281
+ integrateDerivativeDistances(connect, min_distance, true);
282
+ }
283
+
284
+ // Loop thru twin's primary scaffold
285
+ for (connect = getConnection(getTwinNode(node)); connect != NULL;
286
+ connect = getNextConnection(connect)) {
287
+ destination = getConnectionDestination(connect);
288
+ localConnect =
289
+ &localScaffold[getNodeID(destination) +
290
+ nodeCount(graph)];
291
+
292
+ if (getNodeStatus(destination))
293
+ readjustMiniConnection(destination, localConnect,
294
+ -getConnectionDistance(connect),
295
+ min_distance,
296
+ getConnectionVariance(connect), NULL,
297
+ connect);
298
+ else
299
+ resetMiniConnection(destination, localConnect,
300
+ -getConnectionDistance(connect),
301
+ getConnectionVariance(connect), NULL,
302
+ connect, -1);
303
+
304
+ integrateDerivativeDistances(connect, min_distance, false);
305
+ }
306
+ }
307
+
308
+ void unmarkNode(Node * node, MiniConnection * localConnect)
309
+ {
310
+ if (localConnect->frontReference != NULL
311
+ || localConnect->backReference != NULL) {
312
+ if (getNodeStatus(node) > 0)
313
+ setSingleNodeStatus(node, 10);
314
+ else
315
+ setSingleNodeStatus(node, -10);
316
+ } else {
317
+ setSingleNodeStatus(node, false);
318
+ destroyNodeList(localConnect->nodeList);
319
+ localConnect->frontReference = NULL;
320
+ localConnect->backReference = NULL;
321
+ localConnect->nodeList = NULL;
322
+ }
323
+ }
324
+
325
+ void handicapNode(Node * node)
326
+ {
327
+ if (getNodeStatus(node) > 0)
328
+ setSingleNodeStatus(node, 10);
329
+ else
330
+ setSingleNodeStatus(node, -10);
331
+ }
332
+
333
+ static void absorbExtension(Node * node, Node * extension)
334
+ {
335
+ Arc *arc;
336
+
337
+ appendNodeGaps(node, extension, graph);
338
+ appendDescriptors(node, extension);
339
+
340
+ // Destroy old nodes
341
+ while (getArc(node) != NULL)
342
+ destroyArc(getArc(node), graph);
343
+
344
+ // Create new
345
+ for (arc = getArc(extension); arc != NULL; arc = getNextArc(arc))
346
+ createAnalogousArc(node, getDestination(arc), arc, graph);
347
+ }
348
+
349
+ NodeList *getMarkedNodeList()
350
+ {
351
+ return markedNodes;
352
+ }
353
+
354
+ static void absorbExtensionInScaffold(Node * node, Node * source)
355
+ {
356
+ IDnum nodeID = getNodeID(node);
357
+ IDnum sourceID = getNodeID(source);
358
+ IDnum sourceIndex = sourceID + nodeCount(graph);
359
+ Node *twinSource = getTwinNode(source);
360
+ IDnum twinSourceIndex = getNodeID(twinSource) + nodeCount(graph);
361
+ Connection *connect, *original;
362
+ Node *destination;
363
+ IDnum destinationID;
364
+ Coordinate distance_shift =
365
+ (getNodeLength(node) - getNodeLength(source)) / 2;
366
+ Coordinate min_distance =
367
+ getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
368
+ MiniConnection *localConnect;
369
+ Coordinate distance;
370
+ double variance;
371
+ IDnum direct_count;
372
+ IDnum paired_count;
373
+
374
+ while ((connect = getConnection(source))) {
375
+ destination = getTwinNode(getConnectionDestination(connect));
376
+
377
+ if (destination == getTwinNode(node)) {
378
+ localConnect = &localScaffold[twinSourceIndex];
379
+ localConnect->frontReference = NULL;
380
+ unmarkNode(twinSource, localConnect);
381
+ destroyConnection(connect, sourceID);
382
+ continue;
383
+ }
384
+ if (destination == node) {
385
+ localConnect = &localScaffold[sourceIndex];
386
+ localConnect->backReference = NULL;
387
+ unmarkNode(source, localConnect);
388
+ destroyConnection(connect, sourceID);
389
+ continue;
390
+ }
391
+
392
+ destinationID = getNodeID(destination);
393
+ localConnect =
394
+ &localScaffold[destinationID + nodeCount(graph)];
395
+ incrementConnectionDistance(connect, distance_shift);
396
+ distance = getConnectionDistance(connect);
397
+ variance = getConnectionVariance(connect);
398
+ direct_count = getConnectionDirectCount(connect);
399
+ paired_count = getConnectionPairedCount(connect);
400
+
401
+ if (getNodeStatus(destination)) {
402
+ readjustMiniConnection(destination, localConnect,
403
+ distance, min_distance,
404
+ variance, NULL, NULL);
405
+ if ((original = localConnect->frontReference))
406
+ readjustConnection(original, distance,
407
+ variance, direct_count,
408
+ paired_count);
409
+ else
410
+ localConnect->frontReference =
411
+ createNewConnection(nodeID,
412
+ -destinationID,
413
+ direct_count,
414
+ paired_count,
415
+ distance,
416
+ variance);
417
+ } else
418
+ resetMiniConnection(destination, localConnect,
419
+ distance, variance,
420
+ createNewConnection(nodeID,
421
+ -destinationID,
422
+ direct_count,
423
+ paired_count,
424
+ distance,
425
+ variance),
426
+ NULL, true);
427
+
428
+ integrateDerivativeDistances(connect, min_distance, true);
429
+
430
+ destroyConnection(connect, sourceID);
431
+ }
432
+
433
+ // Loop thru twin's primary scaffold
434
+ while ((connect = getConnection(getTwinNode(source)))) {
435
+ destination = getConnectionDestination(connect);
436
+
437
+ if (destination == node) {
438
+ localConnect = &localScaffold[sourceIndex];
439
+ localConnect->frontReference = NULL;
440
+ unmarkNode(source, localConnect);
441
+ destroyConnection(connect, -sourceID);
442
+ continue;
443
+ }
444
+ if (destination == getTwinNode(node)) {
445
+ localConnect = &localScaffold[twinSourceIndex];
446
+ localConnect->backReference = NULL;
447
+ unmarkNode(twinSource, localConnect);
448
+ destroyConnection(connect, -sourceID);
449
+ continue;
450
+ }
451
+
452
+ destinationID = getNodeID(destination);
453
+
454
+ localConnect =
455
+ &localScaffold[destinationID + nodeCount(graph)];
456
+ incrementConnectionDistance(connect, -distance_shift);
457
+ distance = getConnectionDistance(connect);
458
+ variance = getConnectionVariance(connect);
459
+ direct_count = getConnectionDirectCount(connect);
460
+ paired_count = getConnectionPairedCount(connect);
461
+
462
+ if (distance > min_distance && getNodeStatus(destination) < 0) {
463
+ readjustMiniConnection(destination, localConnect,
464
+ -distance, min_distance,
465
+ variance, NULL, NULL);
466
+ if ((original = localConnect->backReference))
467
+ readjustConnection(original, distance,
468
+ variance, direct_count,
469
+ paired_count);
470
+ } else if (getNodeStatus(destination) < 0) {
471
+ if ((original = localConnect->backReference)) {
472
+ destroyConnection(original, -nodeID);
473
+ localConnect->backReference = NULL;
474
+ }
475
+ unmarkNode(destination, localConnect);
476
+ } else if (getNodeStatus(destination) > 0) {
477
+ if ((original = localConnect->frontReference)) {
478
+ destroyConnection(original, nodeID);
479
+ localConnect->frontReference = NULL;
480
+ }
481
+ unmarkNode(destination, localConnect);
482
+ } else if (distance > min_distance) {
483
+ resetMiniConnection(destination, localConnect,
484
+ -distance, variance, NULL,
485
+ createNewConnection(-nodeID,
486
+ destinationID,
487
+ direct_count,
488
+ paired_count,
489
+ distance,
490
+ variance),
491
+ -1);
492
+ integrateDerivativeDistances(connect, min_distance, true);
493
+ }
494
+
495
+ destroyConnection(connect, -sourceID);
496
+ }
497
+ }
498
+
499
+ static void recenterNode(Node * node, Coordinate oldLength)
500
+ {
501
+ IDnum nodeID = getNodeID(node);
502
+ Connection *connect, *next;
503
+ Coordinate distance_shift = (getNodeLength(node) - oldLength) / 2;
504
+ Coordinate min_distance =
505
+ getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
506
+ MiniConnection *localConnect;
507
+
508
+ //velvetLog("Recentering node\n");
509
+
510
+ for (connect = getConnection(node); connect != NULL;
511
+ connect = next) {
512
+ next = getNextConnection(connect);
513
+ incrementConnectionDistance(connect, -distance_shift);
514
+
515
+ if (getConnectionDistance(connect) < min_distance) {
516
+ //velvetLog("Unrecording %li\n",
517
+ // -getNodeID(getConnectionDestination(connect)));
518
+ localConnect =
519
+ &localScaffold[-getNodeID(getConnectionDestination(connect))
520
+ + nodeCount(graph)];
521
+ localConnect->frontReference = NULL;
522
+ unmarkNode(getTwinNode(getConnectionDestination(connect)),
523
+ localConnect);
524
+ destroyConnection(connect, nodeID);
525
+ } else if (getTwinConnection(connect) != NULL)
526
+ incrementConnectionDistance(getTwinConnection(connect), -distance_shift);
527
+ }
528
+
529
+ for (connect = getConnection(getTwinNode(node)); connect != NULL;
530
+ connect = next) {
531
+ next = getNextConnection(connect);
532
+ incrementConnectionDistance(connect, distance_shift);
533
+
534
+ if (getTwinConnection(connect) != NULL)
535
+ incrementConnectionDistance(getTwinConnection(connect), distance_shift);
536
+ }
537
+ }
538
+
539
+ static void recenterLocalScaffold(Node * node, Coordinate oldLength)
540
+ {
541
+ MiniConnection *localConnect;
542
+ Coordinate distance_shift = (getNodeLength(node) - oldLength) / 2;
543
+ Coordinate min_distance =
544
+ getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
545
+ NodeList *nodeList, *next;
546
+ IDnum node2ID;
547
+ Node *node2;
548
+
549
+ for (nodeList = markedNodes; nodeList != NULL; nodeList = next) {
550
+ next = nodeList->next;
551
+
552
+ node2 = nodeList->node;
553
+
554
+ if (node2 == node) {
555
+ setSingleNodeStatus(node2, 1);
556
+ continue;
557
+ }
558
+
559
+ node2ID = getNodeID(node2);
560
+ localConnect = &localScaffold[node2ID + nodeCount(graph)];
561
+ localConnect->distance -= distance_shift;
562
+
563
+ if (localConnect->distance < min_distance
564
+ && localConnect->backReference == NULL
565
+ && localConnect->frontReference == NULL)
566
+ unmarkNode(node2, localConnect);
567
+ else if (getNodeStatus(node2) > 0)
568
+ setSingleNodeStatus(node2, 1);
569
+ else if (getNodeStatus(node2) < 0)
570
+ setSingleNodeStatus(node2, -1);
571
+ }
572
+ }
573
+
574
+ static void adjustShortReads(Node * target, Node * source)
575
+ {
576
+ ShortReadMarker *targetArray, *marker;
577
+ IDnum targetLength, index;
578
+ Coordinate position, nodeLength;
579
+
580
+ if (!readStartsAreActivated(graph))
581
+ return;
582
+
583
+ targetArray = getNodeReads(getTwinNode(target), graph);
584
+ targetLength = getNodeReadCount(getTwinNode(target), graph);
585
+
586
+ nodeLength = getNodeLength(source);
587
+
588
+ for (index = 0; index < targetLength; index++) {
589
+ marker = getShortReadMarkerAtIndex(targetArray, index);
590
+ position = getShortReadMarkerPosition(marker);
591
+ if (position != -1) {
592
+ position += nodeLength;
593
+ setShortReadMarkerPosition(marker, position);
594
+ }
595
+ }
596
+ }
597
+
598
+ static void adjustLongReads(Node * target, Coordinate nodeLength)
599
+ {
600
+ PassageMarkerI marker;
601
+
602
+ for (marker = getMarker(target); marker != NULL_IDX;
603
+ marker = getNextInNode(marker))
604
+ incrementFinishOffset(marker, nodeLength);
605
+ }
606
+
607
+ static boolean goesToNode(PassageMarkerI marker, Node * node)
608
+ {
609
+ PassageMarkerI current;
610
+ Node * start = getNode(marker);
611
+
612
+ for (current = getNextInSequence(marker); current != NULL_IDX;
613
+ current = getNextInSequence(current)) {
614
+ if (getNode(current) == node)
615
+ return true;
616
+ else if (getNode(current) == start)
617
+ continue;
618
+ else if (getUniqueness(getNode(current)))
619
+ return false;
620
+ }
621
+
622
+ return false;
623
+ }
624
+
625
+ static boolean comesFromNode(PassageMarkerI marker, Node * node)
626
+ {
627
+ Node *source = getNode(getTwinMarker(marker));
628
+ Node *target = getTwinNode(node);
629
+ PassageMarkerI current;
630
+
631
+ for (current = getNextInSequence(getTwinMarker(marker)); current != NULL_IDX;
632
+ current = getNextInSequence(current)) {
633
+ if (getNode(current) == target)
634
+ return true;
635
+ else if (getNode(current) == source)
636
+ continue;
637
+ else if (getUniqueness(getNode(current)))
638
+ return false;
639
+ }
640
+
641
+ return false;
642
+ }
643
+
644
+ static void reconnectPassageMarker(PassageMarkerI marker, Node * node,
645
+ PassageMarkerI * ptr)
646
+ {
647
+ PassageMarkerI current;
648
+ PassageMarkerI next = getNextInSequence(marker);
649
+ PassageMarkerI tmpMarker;
650
+
651
+ for (current = marker; getNode(current) != node;
652
+ current = getPreviousInSequence(current));
653
+
654
+ setPreviousInSequence(current, next);
655
+ concatenatePassageMarkers(current, marker);
656
+
657
+ // Removing node and all intermediaries
658
+ while (marker != current) {
659
+ tmpMarker = getPreviousInSequence(marker);
660
+ if (*ptr == marker || *ptr == getTwinMarker(marker))
661
+ *ptr = getNextInNode(*ptr);
662
+ setNextInSequence(marker, NULL_IDX);
663
+ setPreviousInSequence(NULL_IDX, marker);
664
+ destroyPassageMarker(marker);
665
+ marker = tmpMarker;
666
+ }
667
+ }
668
+
669
+ // DEBUG
670
+ void checkNode(Node* node) {
671
+ PassageMarkerI marker1 = getMarker(node);
672
+
673
+ if (marker1 == NULL_IDX)
674
+ return;
675
+
676
+ PassageMarkerI marker2 = getNextInNode(marker1);
677
+
678
+ if (marker2 == NULL_IDX)
679
+ return;
680
+
681
+ if (getStartOffset(marker1) == getStartOffset(marker2))
682
+ abort();
683
+ if (getFinishOffset(marker1) == getFinishOffset(marker2))
684
+ abort();
685
+ printf(">>>> Node %li\n", (long) getNodeID(node));
686
+ printf("Marker1: %li - %li > %li (%li) \n", (long) getStartOffset(marker1), (long) getPassageMarkerLength(marker1), (long) (getNodeLength(node) - getFinishOffset(marker1)), (long) getFinishOffset(marker1));
687
+ printf("%s\n", readPassageMarker(marker1));
688
+ printf("Marker2: %li - %li > %li (%li) \n", (long) getStartOffset(marker2), (long) getPassageMarkerLength(marker2), (long) (getNodeLength(node) - getFinishOffset(marker2)), (long) getFinishOffset(marker2));
689
+
690
+ printf("%s\n", readPassageMarker(marker2));
691
+ if (getStartOffset(marker1) < getNodeLength(node) - getFinishOffset(marker2)
692
+ && getStartOffset(marker2) < getNodeLength(node) - getFinishOffset(marker1)) {
693
+ //abort();
694
+ ;
695
+ }
696
+ }
697
+
698
+ static void concatenateLongReads(Node * node, Node * candidate,
699
+ Graph * graph)
700
+ {
701
+ PassageMarkerI marker, tmpMarker;
702
+
703
+ // Passage marker management in node:
704
+ for (marker = getMarker(node); marker != NULL_IDX;
705
+ marker = getNextInNode(marker)) {
706
+ if (!goesToNode(marker, candidate))
707
+ incrementFinishOffset(marker,
708
+ getNodeLength(candidate));
709
+ }
710
+
711
+ // Swapping new born passageMarkers from candidate to node
712
+ for (marker = getMarker(candidate); marker != NULL_IDX;
713
+ marker = tmpMarker) {
714
+ tmpMarker = getNextInNode(marker);
715
+
716
+ if (!comesFromNode(marker, node)) {
717
+ extractPassageMarker(marker);
718
+ incrementStartOffset(marker,
719
+ getNodeLength(node));
720
+ transposePassageMarker(marker, node);
721
+ incrementFinishOffset(getTwinMarker(marker),
722
+ getNodeLength(node));
723
+ } else {
724
+ reconnectPassageMarker(marker, node, &tmpMarker);
725
+ }
726
+ }
727
+ }
728
+
729
+ static void adjustShortReadsByLength(Node * target, Coordinate nodeLength)
730
+ {
731
+ ShortReadMarker *targetArray, *marker;
732
+ IDnum targetLength, index;
733
+ Coordinate position;
734
+
735
+ if (!readStartsAreActivated(graph))
736
+ return;
737
+
738
+ targetArray = getNodeReads(getTwinNode(target), graph);
739
+ targetLength = getNodeReadCount(getTwinNode(target), graph);
740
+
741
+ for (index = 0; index < targetLength; index++) {
742
+ marker = getShortReadMarkerAtIndex(targetArray, index);
743
+ position = getShortReadMarkerPosition(marker);
744
+ if (position != -1) {
745
+ position += nodeLength;
746
+ setShortReadMarkerPosition(marker, position);
747
+ }
748
+ }
749
+ }
750
+
751
+ static boolean abs_bool(boolean val)
752
+ {
753
+ return val >= 0 ? val : -val;
754
+ }
755
+
756
+ static IDnum abs_ID(IDnum val)
757
+ {
758
+ return val >= 0 ? val : -val;
759
+ }
760
+
761
+ static NodeList *pathIsClear(Node * node, Node * oppositeNode,
762
+ Coordinate distance)
763
+ {
764
+ Arc *arc;
765
+ Node *candidate, *dest, *current;
766
+ Coordinate extension_distance = 0;
767
+ boolean maxRepeat = 1;
768
+ Node *repeatEntrance = NULL;
769
+ IDnum counter = 0;
770
+ NodeList *path = NULL;
771
+ NodeList *tail = path;
772
+
773
+ setSingleNodeStatus(node, 2);
774
+
775
+ current = node;
776
+ while (true) {
777
+
778
+ //////////////////////////////////
779
+ // Selecting destination //
780
+ //////////////////////////////////
781
+ candidate = NULL;
782
+
783
+ // First round for priority nodes
784
+ for (arc = getArc(current); arc != NULL;
785
+ arc = getNextArc(arc)) {
786
+ dest = getDestination(arc);
787
+
788
+ if (dest == node || dest == getTwinNode(node))
789
+ continue;
790
+
791
+ if (getNodeStatus(dest) <= 0)
792
+ continue;
793
+
794
+ if (candidate == NULL
795
+ || getNodeStatus(candidate) >
796
+ getNodeStatus(dest)
797
+ || (getNodeStatus(candidate) ==
798
+ getNodeStatus(dest)
799
+ && extension_distance >
800
+ localScaffold[getNodeID(dest) +
801
+ nodeCount(graph)].
802
+ distance - getNodeLength(dest) / 2)) {
803
+ extension_distance =
804
+ localScaffold[getNodeID(dest) +
805
+ nodeCount(graph)].
806
+ distance - getNodeLength(dest) / 2;
807
+ candidate = dest;
808
+ }
809
+ }
810
+
811
+ // In case of failure
812
+ if (candidate == NULL) {
813
+ for (arc = getArc(current); arc != NULL;
814
+ arc = getNextArc(arc)) {
815
+ dest = getDestination(arc);
816
+
817
+ if (getNodeStatus(dest) == 0)
818
+ continue;
819
+
820
+ if (dest == node
821
+ || dest == getTwinNode(node))
822
+ continue;
823
+
824
+ if (candidate == NULL
825
+ || getNodeStatus(candidate) <
826
+ getNodeStatus(dest)
827
+ || (getNodeStatus(candidate) ==
828
+ getNodeStatus(dest)
829
+ && extension_distance <
830
+ localScaffold[getNodeID(dest) +
831
+ nodeCount(graph)].
832
+ distance -
833
+ getNodeLength(dest) / 2)) {
834
+ extension_distance =
835
+ localScaffold[getNodeID(dest) +
836
+ nodeCount
837
+ (graph)].
838
+ distance -
839
+ getNodeLength(dest) / 2;
840
+ candidate = dest;
841
+ }
842
+ }
843
+ }
844
+ if (candidate == NULL) {
845
+ while (path) {
846
+ tail = path->next;
847
+ deallocateNodeList(path);
848
+ path = tail;
849
+ }
850
+ return false;
851
+ }
852
+ // Loop detection
853
+ if (candidate == repeatEntrance
854
+ && abs_bool(getNodeStatus(candidate)) ==
855
+ maxRepeat + 1) {
856
+ while (path) {
857
+ tail = path->next;
858
+ deallocateNodeList(path);
859
+ path = tail;
860
+ }
861
+ return false;
862
+ } else if (abs_bool(getNodeStatus(candidate)) > maxRepeat) {
863
+ maxRepeat = abs_bool(getNodeStatus(candidate));
864
+ repeatEntrance = candidate;
865
+ } else if (abs_bool(getNodeStatus(candidate)) == 1) {
866
+ maxRepeat = 1;
867
+ repeatEntrance = NULL;
868
+ }
869
+
870
+ if (getNodeStatus(candidate) > 0)
871
+ setSingleNodeStatus(candidate,
872
+ getNodeStatus(candidate) + 1);
873
+ else
874
+ setSingleNodeStatus(candidate,
875
+ getNodeStatus(candidate) - 1);
876
+
877
+
878
+ if (abs_bool(getNodeStatus(candidate)) > 100
879
+ || counter > nodeCount(graph)) {
880
+ while (path) {
881
+ tail = path->next;
882
+ deallocateNodeList(path);
883
+ path = tail;
884
+ }
885
+ return false;
886
+ }
887
+
888
+ // Missassembly detection
889
+ if (getUniqueness(candidate) && oppositeNode
890
+ && candidate != oppositeNode
891
+ && extension_distance > distance) {
892
+ while (path) {
893
+ tail = path->next;
894
+ deallocateNodeList(path);
895
+ path = tail;
896
+ }
897
+ return false;
898
+ }
899
+
900
+ if (path == NULL) {
901
+ path = allocateNodeList();
902
+ path->next = NULL;
903
+ path->node = candidate;
904
+ tail = path;
905
+ } else {
906
+ tail->next = allocateNodeList();
907
+ tail = tail->next;
908
+ tail->node = candidate;
909
+ tail->next = NULL;
910
+ }
911
+
912
+ if (getUniqueness(candidate))
913
+ return path;
914
+
915
+ current = candidate;
916
+ }
917
+ }
918
+
919
+ static boolean pushNeighbours(Node * node, Node * oppositeNode,
920
+ Coordinate distance, boolean force_jumps)
921
+ {
922
+ Node *candidate;
923
+ Coordinate oldLength = getNodeLength(node);
924
+ MiniConnection *localConnect;
925
+ NodeList *path, *tmp;
926
+
927
+ if ((path = pathIsClear(node, oppositeNode, distance))) {
928
+ while (path) {
929
+ candidate = path->node;
930
+ tmp = path->next;
931
+ deallocateNodeList(path);
932
+ path = tmp;
933
+
934
+ ///////////////////////////////////////
935
+ // Stepping forward to destination //
936
+ ///////////////////////////////////////
937
+
938
+ if (getUniqueness(candidate)) {
939
+ concatenateReadStarts(node, candidate,
940
+ graph);
941
+ concatenateLongReads(node, candidate,
942
+ graph);
943
+ absorbExtension(node, candidate);
944
+
945
+ // Scaffold changes
946
+ recenterNode(node, oldLength);
947
+ recenterLocalScaffold(node, oldLength);
948
+ absorbExtensionInScaffold(node, candidate);
949
+
950
+ // Read coverage
951
+ #ifndef SINGLE_COV_CAT
952
+ Category cat;
953
+ for (cat = 0; cat < CATEGORIES; cat++) {
954
+ incrementVirtualCoverage(node, cat,
955
+ getVirtualCoverage(candidate, cat));
956
+ incrementOriginalVirtualCoverage(node, cat,
957
+ getOriginalVirtualCoverage(candidate, cat));
958
+ }
959
+ #else
960
+ incrementVirtualCoverage(node, getVirtualCoverage(candidate));
961
+ #endif
962
+
963
+ if (getNodeStatus(candidate)) {
964
+ localConnect =
965
+ &localScaffold[getNodeID
966
+ (candidate) +
967
+ nodeCount
968
+ (graph)];
969
+ if (localConnect->frontReference) {
970
+ destroyConnection
971
+ (localConnect->
972
+ frontReference,
973
+ getNodeID(node));
974
+ localConnect->
975
+ frontReference = NULL;
976
+ }
977
+ if (localConnect->backReference) {
978
+ destroyConnection
979
+ (localConnect->
980
+ backReference,
981
+ -getNodeID(node));
982
+ localConnect->
983
+ backReference = NULL;
984
+ }
985
+ unmarkNode(candidate,
986
+ localConnect);
987
+ }
988
+ if (getNodeStatus(getTwinNode(candidate))) {
989
+ localConnect =
990
+ &localScaffold[-getNodeID
991
+ (candidate) +
992
+ nodeCount
993
+ (graph)];
994
+ if (localConnect->frontReference) {
995
+ destroyConnection
996
+ (localConnect->
997
+ frontReference,
998
+ getNodeID(node));
999
+ localConnect->
1000
+ frontReference = NULL;
1001
+ }
1002
+ if (localConnect->backReference) {
1003
+ destroyConnection
1004
+ (localConnect->
1005
+ backReference,
1006
+ -getNodeID(node));
1007
+ localConnect->
1008
+ backReference = NULL;
1009
+ }
1010
+ unmarkNode(getTwinNode(candidate),
1011
+ localConnect);
1012
+ }
1013
+ destroyNode(candidate, graph);
1014
+ return true;
1015
+ } else {
1016
+ adjustShortReads(node, candidate);
1017
+ adjustLongReads(node, getNodeLength(candidate));
1018
+ absorbExtension(node, candidate);
1019
+ }
1020
+ }
1021
+ }
1022
+
1023
+ if (force_jumps && oppositeNode
1024
+ && abs_ID(getNodeID(oppositeNode)) < abs_ID(getNodeID(node))) {
1025
+ distance -= getNodeLength(node) / 2;
1026
+ distance -= getNodeLength(oppositeNode) / 2;
1027
+ if (distance > 10) {
1028
+ adjustShortReadsByLength(node, distance);
1029
+ adjustLongReads(node, distance);
1030
+ appendGap(node, distance, graph);
1031
+ } else {
1032
+ adjustShortReadsByLength(node, 10);
1033
+ adjustLongReads(node, 10);
1034
+ appendGap(node, 10, graph);
1035
+ }
1036
+
1037
+ concatenateReadStarts(node, oppositeNode, graph);
1038
+ concatenateLongReads(node, oppositeNode, graph);
1039
+ absorbExtension(node, oppositeNode);
1040
+
1041
+ // Scaffold changes
1042
+ recenterNode(node, oldLength);
1043
+ recenterLocalScaffold(node, oldLength);
1044
+ absorbExtensionInScaffold(node, oppositeNode);
1045
+
1046
+ // Read coverage
1047
+ #ifndef SINGLE_COV_CAT
1048
+ Category cat;
1049
+ for (cat = 0; cat < CATEGORIES; cat++)
1050
+ incrementVirtualCoverage(node, cat,
1051
+ getVirtualCoverage(oppositeNode, cat));
1052
+ #else
1053
+ incrementVirtualCoverage(node, getVirtualCoverage(oppositeNode));
1054
+ #endif
1055
+
1056
+ if (getNodeStatus(oppositeNode)) {
1057
+ localConnect =
1058
+ &localScaffold[getNodeID(oppositeNode) +
1059
+ nodeCount(graph)];
1060
+ if (localConnect->frontReference) {
1061
+ destroyConnection(localConnect->
1062
+ frontReference,
1063
+ getNodeID(node));
1064
+ localConnect->frontReference = NULL;
1065
+ }
1066
+ if (localConnect->backReference) {
1067
+ destroyConnection(localConnect->
1068
+ backReference,
1069
+ -getNodeID(node));
1070
+ localConnect->backReference = NULL;
1071
+ }
1072
+ unmarkNode(oppositeNode, localConnect);
1073
+ }
1074
+ if (getNodeStatus(getTwinNode(oppositeNode))) {
1075
+ localConnect =
1076
+ &localScaffold[-getNodeID(oppositeNode) +
1077
+ nodeCount(graph)];
1078
+ if (localConnect->frontReference) {
1079
+ destroyConnection(localConnect->
1080
+ frontReference,
1081
+ getNodeID(node));
1082
+ localConnect->frontReference = NULL;
1083
+ }
1084
+ if (localConnect->backReference) {
1085
+ destroyConnection(localConnect->
1086
+ backReference,
1087
+ -getNodeID(node));
1088
+ localConnect->backReference = NULL;
1089
+ }
1090
+ unmarkNode(getTwinNode(oppositeNode),
1091
+ localConnect);
1092
+ }
1093
+
1094
+ destroyNode(oppositeNode, graph);
1095
+ }
1096
+
1097
+ return false;
1098
+ }
1099
+
1100
+ static void unmarkInterestingNodes()
1101
+ {
1102
+ Node *node;
1103
+ MiniConnection *localConnect;
1104
+
1105
+ while ((node = popNodeRecord())) {
1106
+ setSingleNodeStatus(node, false);
1107
+ localConnect =
1108
+ &localScaffold[getNodeID(node) + nodeCount(graph)];
1109
+ localConnect->frontReference = NULL;
1110
+ localConnect->backReference = NULL;
1111
+ localConnect->nodeList = NULL;
1112
+ }
1113
+ }
1114
+
1115
+ static void findOppositeNode(Node * node, Node ** oppositeNode,
1116
+ Coordinate * distance)
1117
+ {
1118
+ NodeList *nodeList;
1119
+ MiniConnection *localConnect;
1120
+ Node *node2;
1121
+ IDnum node2ID;
1122
+
1123
+ *oppositeNode = NULL;
1124
+ *distance = 0;
1125
+
1126
+ for (nodeList = markedNodes; nodeList != NULL;
1127
+ nodeList = nodeList->next) {
1128
+ node2 = nodeList->node;
1129
+ node2ID = getNodeID(node2);
1130
+ localConnect = &localScaffold[node2ID + nodeCount(graph)];
1131
+
1132
+ if (node2 == node)
1133
+ continue;
1134
+
1135
+ if (!getUniqueness(node2))
1136
+ continue;
1137
+
1138
+ if (localConnect->distance < 0)
1139
+ continue;
1140
+
1141
+ if (*oppositeNode == NULL
1142
+ || *distance > localConnect->distance) {
1143
+ *oppositeNode = node2;
1144
+ *distance = localConnect->distance;
1145
+ }
1146
+ }
1147
+ }
1148
+
1149
+ static boolean expandLongNode(Node * node, boolean force_jumps)
1150
+ {
1151
+ boolean hit = true;
1152
+ boolean modified = false;
1153
+ Node *oppositeNode;
1154
+ Coordinate distance = 0;
1155
+
1156
+ markInterestingNodes(node);
1157
+
1158
+ while (hit) {
1159
+ correctGraphLocally(node);
1160
+ findOppositeNode(node, &oppositeNode, &distance);
1161
+ hit =
1162
+ pushNeighbours(node, oppositeNode, distance,
1163
+ force_jumps);
1164
+ modified = modified || hit;
1165
+ }
1166
+
1167
+ unmarkInterestingNodes();
1168
+
1169
+ return modified;
1170
+ }
1171
+
1172
+ static boolean expandLongNodes(boolean force_jumps)
1173
+ {
1174
+ IDnum nodeID;
1175
+ Node *node;
1176
+ boolean modified = false;
1177
+
1178
+ for (nodeID = 1; nodeID <= nodeCount(graph); nodeID++) {
1179
+ node = getNodeInGraph(graph, nodeID);
1180
+
1181
+ if (node != NULL && getUniqueness(node)) {
1182
+ modified = expandLongNode(node, force_jumps)
1183
+ || modified;
1184
+ modified =
1185
+ expandLongNode(getTwinNode(node), force_jumps)
1186
+ || modified;
1187
+ }
1188
+ }
1189
+
1190
+ return modified;
1191
+ }
1192
+
1193
+ static void cleanMemory()
1194
+ {
1195
+ velvetLog("Cleaning memory\n");
1196
+
1197
+ cleanScaffoldMemory();
1198
+
1199
+ destroyRecycleBin(nodeListMemory);
1200
+ nodeListMemory = NULL;
1201
+
1202
+ free(localScaffold);
1203
+ }
1204
+
1205
+ void exploitShortReadPairs(Graph * argGraph,
1206
+ ReadSet * reads,
1207
+ boolean * dubious,
1208
+ boolean * shadows,
1209
+ boolean force_jumps)
1210
+ {
1211
+ boolean modified = true;
1212
+
1213
+ graph = argGraph;
1214
+
1215
+ if (!readStartsAreActivated(graph))
1216
+ return;
1217
+
1218
+ velvetLog("Starting pebble resolution...\n");
1219
+
1220
+ resetNodeStatus(graph);
1221
+
1222
+ // Prepare scaffold
1223
+ buildScaffold(graph, reads, dubious, shadows);
1224
+
1225
+ // Prepare graph
1226
+ prepareGraphForLocalCorrections(graph);
1227
+
1228
+ // Prepare local scaffold
1229
+ localScaffold =
1230
+ callocOrExit(2 * nodeCount(graph) + 1, MiniConnection);
1231
+
1232
+ // Loop until convergence
1233
+ while (modified)
1234
+ modified = expandLongNodes(force_jumps);
1235
+
1236
+ // Clean up memory
1237
+ cleanMemory();
1238
+ deactivateLocalCorrectionSettings();
1239
+
1240
+ sortGapMarkers(graph);
1241
+
1242
+ velvetLog("Pebble done.\n");
1243
+ }