finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,261 @@
1
+ class Bio::AssemblyGraphAlgorithms::SingleCoherentWanderer
2
+ include Bio::FinishM::Logging
3
+
4
+ # Like AcyclicConnectionFinder#depth_first_search_with_leash except use
5
+ # single read recoherence. The algorithm used is a generalisation of Dijkstra's
6
+ # shortest path algorithm, where instead of keeping track of the minimum
7
+ # distance to each node, the algorithm keeps track of the distance to a
8
+ # set of nodes long enough to invoke a recoherence kmer.
9
+ #
10
+ # Options:
11
+ # :max_explore_nodes: maximum number of nodes to explore from each node. If max is reached, don't make any connections (default: no maximum)
12
+ def wander(finishm_graph, leash_length, recoherence_kmer, sequence_hash, options={})
13
+ to_return = {}
14
+
15
+ # Take the probes and make them all into finishing nodes
16
+ finishing_nodes = []
17
+ finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
18
+ direction = finishm_graph.probe_node_directions[probe_node_index]
19
+ if direction == true
20
+ finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST]
21
+ else
22
+ finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]
23
+ end
24
+ end
25
+
26
+ # Search from each probed node in the graph
27
+ # TODO: is there a better way to implement this by somehow searching with
28
+ # all probe nodes at once, rather than starting fresh with each probe?
29
+ finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
30
+
31
+ # Don't explore from the last node, as no new connections are established
32
+ next if probe_node_index == finishm_graph.probe_nodes.length - 1
33
+
34
+ # Go all the way to the leash length,
35
+ # and then search to see if any of the other nodes have been come across
36
+ log.debug "Exploring from probe node \##{probe_node_index+1} (node #{probe_node.node_id}/#{finishm_graph.probe_node_directions[probe_node_index] })" if log.debug?
37
+ pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
38
+ initial = finishm_graph.initial_path_from_probe(probe_node_index)
39
+ if initial.nil?
40
+ log.warn "Unable to start searching from probe \##{probe_node_index+1}, because it was not found in the graph. Skipping."
41
+ next
42
+ end
43
+ initial_distanced = DistancedOrientedNodeSet.new
44
+ initial_distanced.oriented_trail = initial
45
+ initial_distanced.distance = 0
46
+
47
+ # The minimum distance found to get to the head nodes
48
+ minimum_head_nodes_distances = {}
49
+ # Which head node sets is each node connected to?
50
+ node_to_head_node_sets = {}
51
+ #for Logging
52
+ last_logged_node_count = 0
53
+ maxed_out = false
54
+
55
+ pqueue.enqueue initial_distanced, 0
56
+ # While there are more node sets in the queue
57
+ while distanced_head_nodes = pqueue.dequeue
58
+ log.debug "Dequeued #{distanced_head_nodes}" if log.debug?
59
+ if options[:max_explore_nodes] and node_to_head_node_sets.length > options[:max_explore_nodes]
60
+ log.warn "Hit maximum number of nodes (#{options[:max_explore_nodes] }) while exploring from probe \##{probe_node_index+1}"
61
+ maxed_out = true
62
+ break
63
+ end
64
+ if log.info? and node_to_head_node_sets.length % 1024 == 0 and node_to_head_node_sets.length > last_logged_node_count
65
+ if last_logged_node_count == 0
66
+ log.info "While exploring from probe \##{probe_node_index+1}.."
67
+ end
68
+ log.info "So far worked with #{node_to_head_node_sets.length} distinct nodes in the assembly graph, at min distance #{distanced_head_nodes.distance}"
69
+ last_logged_node_count = node_to_head_node_sets.length
70
+ end
71
+
72
+ settable = distanced_head_nodes.to_settable
73
+ if minimum_head_nodes_distances.key?(settable) and
74
+ distanced_head_nodes.distance >= minimum_head_nodes_distances[distanced_head_nodes.to_settable].distance
75
+ # This node has already been explored, and no shorter path has been found here. Go no further.
76
+ next
77
+ end
78
+ minimum_head_nodes_distances[settable] = distanced_head_nodes
79
+ last_settable = distanced_head_nodes.oriented_trail.last.to_settable
80
+ node_to_head_node_sets[last_settable] ||= Set.new
81
+ node_to_head_node_sets[last_settable] << distanced_head_nodes.to_settable
82
+
83
+ if distanced_head_nodes.distance <= leash_length
84
+ # Still within the leash. Push into the stack all the current node's neighbours in the graph
85
+ last = distanced_head_nodes.oriented_trail.last
86
+ neighbour_onodes = finishm_graph.graph.neighbours_of(last.node, last.first_side)
87
+ log.debug "Found #{neighbour_onodes.length} neighbours" if log.debug?
88
+ if neighbour_onodes.length > 1
89
+ # Fork detected. Apply recoherence, and only enqueue those that pass
90
+ log.debug "Multiple neighbours found"
91
+ neighbour_onodes.each do |neighbour|
92
+ candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
93
+ log.debug "Testing recoherence in candidate #{candidate.oriented_trail.to_s}" if log.debug?
94
+ if candidate.last_node_recoherent?(recoherence_kmer, sequence_hash)
95
+ log.debug "Candidate survived recoherence: #{candidate.to_s}" if log.debug?
96
+ pqueue.enqueue candidate, candidate.distance
97
+ elsif log.debug?
98
+ log.debug "Candidate did not survive recoherence #{candidate.oriented_trail.to_s}"
99
+ end
100
+ end
101
+ else
102
+ # One or none neighbours found. Enqueue if there is one
103
+ neighbour_onodes.each do |neighbour|
104
+ candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
105
+ pqueue.enqueue candidate, candidate.distance
106
+ end
107
+ end
108
+ else
109
+ # we are beyond the leash, go no further
110
+ end
111
+ end
112
+
113
+ if maxed_out
114
+ log.debug "Maxed out, exiting loop early" if log.debug?
115
+ next
116
+ end
117
+
118
+ # Now have a hash of minimum distances. Now need to go through those and determine
119
+ # which other nodes the current probe node is connected to
120
+ finishm_graph.probe_nodes.each_with_index do |node, i|
121
+ next if i < probe_node_index # only return the 'upper triangle' of the distance matrices
122
+
123
+ finish = finishing_nodes[i]
124
+ heads = node_to_head_node_sets[finish]
125
+ next if heads.nil? #no connection found
126
+
127
+ # There might be many head_sets that include the finishing node.
128
+ # Which one has the least distance?
129
+ overall_min_distanced_set = nil
130
+ heads.each do |head_set|
131
+ min_distanced_set = minimum_head_nodes_distances[head_set]
132
+ # If there is a new winner
133
+ if overall_min_distanced_set.nil? or
134
+ overall_min_distanced_set.distance > min_distanced_set.distance
135
+
136
+ if probes_on_single_node_ok?(finishm_graph, probe_node_index, i)
137
+ log.debug "Verified that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
138
+ else
139
+ #TODO: Possibly ok if contigs to be scaffolded are all on the same node. Unlikely in practice due to short tips, but still theoretically possible
140
+ log.debug "Failed to verify that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
141
+ next
142
+ end
143
+
144
+ overall_min_distanced_set = min_distanced_set
145
+ end
146
+ end
147
+ next if overall_min_distanced_set.nil? #no connection found - the only connection was a fake one
148
+
149
+ min_distance = overall_min_distanced_set.distance
150
+ log.debug "Found a connection between probes #{probe_node_index+1} and #{i+1}, distance: #{min_distance}" if log.debug?
151
+ to_return[[probe_node_index, i]] = min_distance
152
+ end
153
+ end
154
+ return to_return
155
+ end
156
+
157
+ # Check for position and orientation if start and finish nodes are
158
+ # on the same velvet node. Return true if OK as below or if the nodes
159
+ # are different
160
+ # --> <--- OK
161
+ # <-- --> not ok (unless the node is circular)
162
+ # <-- <-- not ok
163
+ # --> --> not ok
164
+ def probes_on_single_node_ok?(finishm_graph, start_node_index, end_node_index)
165
+ node1 = finishm_graph.probe_nodes[start_node_index]
166
+ node2 = finishm_graph.probe_nodes[end_node_index]
167
+ return true if node1.node_id != node2.node_id
168
+
169
+ node1_direction = finishm_graph.probe_node_directions[start_node_index]
170
+ node2_direction = finishm_graph.probe_node_directions[end_node_index]
171
+ node1_offset = direction_independent_offset_of_noded_read_from_start_of_node(
172
+ node1, finishm_graph.probe_node_reads[start_node_index])
173
+ node2_offset = direction_independent_offset_of_noded_read_from_start_of_node(
174
+ node1, finishm_graph.probe_node_reads[end_node_index])
175
+ log.debug "Validating for 1 node problems #{start_node_index}/#{end_node_index} #{node1_direction}/#{node2_direction} offsets #{node1_offset}/#{node2_offset}" if log.debug?
176
+
177
+ # true/false and probe1 left of probe2, immediately below, is the most intuitive.
178
+ # but false/true and probe1 right of probe2 is also valid
179
+ if node1_direction == true and node2_direction == false and
180
+ node1_offset < node2_offset
181
+ return true
182
+ end
183
+ if node1_direction == false and node2_direction == true and
184
+ node1_offset > node2_offset
185
+ return true
186
+ end
187
+
188
+ if node1_direction == true and node2_direction == false
189
+ onode = finishm_graph.velvet_oriented_node(start_node_index)
190
+ neighbours = finishm_graph.graph.neighbours_of(onode.node, onode.first_side).collect{|n| n.node_id}
191
+ return true if neighbours.include?(node1)
192
+ end
193
+
194
+ return false
195
+ end
196
+
197
+ private
198
+ def direction_independent_offset_of_noded_read_from_start_of_node(velvet_node, velvet_noded_read)
199
+ if velvet_noded_read.direction == true
200
+ return velvet_noded_read.offset_from_start_of_node
201
+ elsif velvet_noded_read.direction == false
202
+ return velvet_node.corresponding_contig_length - velvet_noded_read.offset_from_start_of_node
203
+ else
204
+ raise "programming error - velvet_noded_read does not have valid direction"
205
+ end
206
+ end
207
+
208
+ # An oriented node some distance from the origin of exploration
209
+ class DistancedOrientedNodeSet
210
+ attr_accessor :oriented_trail, :distance
211
+
212
+ # Using Set object, often we want two separate objects to be considered equal even if
213
+ # they are distinct objects
214
+ def to_settable
215
+ settable = []
216
+ @oriented_trail.each do |onode|
217
+ settable.push onode.node_id
218
+ settable.push onode.first_side
219
+ end
220
+ return settable
221
+ end
222
+
223
+ # Create a copy of this object, then add the given oriented_node
224
+ # to this object, and discard objects from the rear of the trail if they
225
+ # are now of no use for recoherence. Update the distance
226
+ def add_oriented_node_and_copy(oriented_node, recoherence_kmer)
227
+ d = DistancedOrientedNodeSet.new
228
+ new_trail = @oriented_trail.trail+[oriented_node]
229
+
230
+ # Remove unneeded rear nodes that cannot contribute to the recoherence
231
+ # calculation going forward
232
+ cumulative_length = 0
233
+ i = new_trail.length - 1
234
+ while i >= 0 and cumulative_length < recoherence_kmer
235
+ cumulative_length += new_trail[i].node.length_alone
236
+ i -= 1
237
+ end
238
+ i += 1
239
+ d.oriented_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
240
+ d.oriented_trail.trail = new_trail[i..-1]
241
+ # Update distance
242
+ d.distance = @distance+oriented_node.node.length_alone
243
+
244
+ return d
245
+ end
246
+
247
+ # Is the head nodes single recoherent? Return false if not, otherwise true
248
+ def last_node_recoherent?(recoherence_kmer, sequence_hash)
249
+ @@single_recoherencer ||= Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
250
+ return @@single_recoherencer.validate_last_node_of_path_by_recoherence(
251
+ @oriented_trail,
252
+ recoherence_kmer,
253
+ sequence_hash
254
+ )
255
+ end
256
+
257
+ def to_s
258
+ "#{@oriented_trail.to_s}(#{@distance})"
259
+ end
260
+ end
261
+ end
@@ -0,0 +1,441 @@
1
+ require 'ds'
2
+ require 'set'
3
+ require 'ruby-progressbar'
4
+
5
+ class Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
6
+ include Bio::FinishM::Logging
7
+
8
+ DEFAULT_MAX_TIP_LENGTH = 200
9
+ DEFAULT_MIN_CONTIG_SIZE = 500
10
+ DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS = 2
11
+
12
+ attr_accessor :graph
13
+
14
+ ASSEMBLY_OPTIONS = [
15
+ :max_tip_length,
16
+ :recoherence_kmer,
17
+ :min_confirming_recoherence_kmer_reads,
18
+ :sequences,
19
+ :leash_length,
20
+ :min_contig_size,
21
+ :max_coverage_at_fork,
22
+ ]
23
+ attr_accessor :assembly_options
24
+
25
+ # Create a new assembler given a velvet graph and velvet Sequences object
26
+ #
27
+ # Assembly options:
28
+ # :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
29
+ # :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer (requires :seqeunces)
30
+ # :sequences: the sequences of the actual reads, probably a Bio::Velvet::Underground::BinarySequenceStore object
31
+ # :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
32
+ # :min_coverage_of_start_nodes: only start exploring from nodes with this much coverage
33
+ # :min_contig_size: don't bother returning contigs shorter than this (default 500bp)
34
+ # :progressbar_io: given an IO object e.g. $stdout, write progress information
35
+ def initialize(graph, assembly_options={})
36
+ @graph = graph
37
+ @assembly_options = assembly_options
38
+ @assembly_options[:max_tip_length] ||= DEFAULT_MAX_TIP_LENGTH
39
+ @assembly_options[:min_contig_size] ||= DEFAULT_MIN_CONTIG_SIZE
40
+ @assembly_options[:min_confirming_recoherence_kmer_reads] ||= DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS
41
+ end
42
+
43
+ # Assemble everything in the graph into OrientedNodeTrail objects.
44
+ # Yields an OrientedNodeTrail if a block is
45
+ # given, otherwise returns an array of found paths. Options for
46
+ # assembly are specified in assembly_options
47
+ def assemble
48
+ paths = []
49
+
50
+ # Gather a list of nodes to try starting from
51
+ starting_nodes = gather_starting_nodes
52
+ log.info "Found #{starting_nodes.length} nodes to attempt assembly from"
53
+
54
+ seen_nodes = Set.new
55
+ progress = setup_progressbar starting_nodes.length
56
+
57
+ # For each starting node, start the assembly process
58
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
59
+ starting_nodes.each do |start_node|
60
+ log.debug "Trying to assemble from #{start_node.node_id}" if log.debug?
61
+
62
+ # If we've already covered this node, don't try it again
63
+ if seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
64
+ seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
65
+ log.debug "Already seen this node, not inspecting further" if log.debug?
66
+ next
67
+ end
68
+
69
+ # first attempt to go forward as far as possible, then reverse the path
70
+ # and continue until cannot go farther
71
+ reversed_path_forward = find_beginning_trail_from_node(start_node, seen_nodes)
72
+ if reversed_path_forward.nil?
73
+ log.debug "Could not find forward path from this node, giving up" if log.debug?
74
+ next
75
+ end
76
+ # Have we already seen this path before?
77
+ #TODO: add in recoherence logic here
78
+ if seen_last_in_path?(reversed_path_forward, seen_nodes)
79
+ log.debug "Already seen the last node of the reversed path forward: #{reversed_path_forward.trail[-1].to_shorthand}, giving up" if log.debug?
80
+ next
81
+ end
82
+ # Assemble ahead again
83
+ log.debug "reversed_path_forward: #{reversed_path_forward.to_shorthand}" if log.debug?
84
+ path, just_visited_onodes = assemble_from(reversed_path_forward)
85
+
86
+ # Remove nodes that have already been seen to prevent duplication
87
+ log.debug "Before removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
88
+ remove_seen_nodes_from_end_of_path(path, seen_nodes)
89
+ log.debug "After removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
90
+
91
+ # Add the now seen nodes to the list
92
+ just_visited_onodes.each do |onode_settable|
93
+ seen_nodes << onode_settable
94
+ end
95
+
96
+ # Record which nodes have already been visited, so they aren't visited again
97
+ seen_nodes.merge just_visited_onodes
98
+ unless progress.nil?
99
+ if @assembly_options[:min_coverage_of_start_nodes]
100
+ # TODO: this could be better by progress += (starting_nodes_just_visited.length)
101
+ progress.increment
102
+ else
103
+ progress.progress += just_visited_onodes.length
104
+ end
105
+ end
106
+
107
+ if path.length_in_bp < @assembly_options[:min_contig_size]
108
+ log.debug "Path length (#{path.length_in_bp}) less than min_contig_size (#{@assembly_options[:min_contig_size] }), not recording it" if log.debug?
109
+ next
110
+ end
111
+ log.debug "Found a seemingly legitimate path #{path.to_shorthand}" if log.debug?
112
+ if block_given?
113
+ yield path
114
+ else
115
+ paths.push path
116
+ end
117
+ end
118
+ progress.finish unless progress.nil?
119
+
120
+ return paths
121
+ end
122
+
123
+ def seen_last_in_path?(path, seen_nodes)
124
+ seen_nodes.include?(path[-1].to_settable)
125
+ end
126
+
127
+ def gather_starting_nodes
128
+ if @assembly_options[:min_coverage_of_start_nodes] or @assembly_options[:min_length_of_start_nodes]
129
+ starting_nodes = []
130
+ graph.nodes.each do |node|
131
+ if (@assembly_options[:min_coverage_of_start_nodes].nil? or
132
+ node.coverage >= @assembly_options[:min_coverage_of_start_nodes]) and
133
+ (@assembly_options[:min_length_of_start_nodes].nil? or
134
+ node.length_alone >= @assembly_options[:min_length_of_start_nodes])
135
+
136
+ starting_nodes.push node
137
+ end
138
+ end
139
+ return starting_nodes
140
+ else
141
+ return graph.nodes
142
+ end
143
+ end
144
+
145
+ def setup_progressbar(num_nodes)
146
+ progress = nil
147
+ if @assembly_options[:progressbar_io]
148
+ progress = ProgressBar.create(
149
+ :title => "Assembly",
150
+ :format => '%a %bᗧ%i %p%% %E %t',
151
+ :progress_mark => ' ',
152
+ :remainder_mark => '・',
153
+ :total => num_nodes,
154
+ :output => @assembly_options[:progressbar_io]
155
+ )
156
+ end
157
+ return progress
158
+ end
159
+
160
+ # Given a node, return a path that does not include any short tips, or nil if none is
161
+ # connected to this node.
162
+ # With this path, you can explore forwards. This isn't very clear commenting, but
163
+ # I'm just making this stuff up
164
+ def find_beginning_trail_from_node(node, previously_seen_nodes)
165
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
166
+ onode.node = node
167
+ onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST #go backwards first, because the path will later be reversed
168
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
169
+ dummy_trail.trail = [onode]
170
+
171
+ find_node_from_non_short_tip = lambda do |dummy_trail|
172
+ # go all the way forwards
173
+ path, visited_nodes = assemble_from(dummy_trail)
174
+
175
+ # Remove already seen nodes from the end of the trail, because
176
+ # they are already included in other paths and this shows
177
+ # up as duplicated contig stretches and this is not correct
178
+ log.debug "Before removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
179
+ remove_seen_nodes_from_end_of_path(path, previously_seen_nodes)
180
+ log.debug "After removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
181
+
182
+ # reverse the path
183
+ path.reverse!
184
+ # peel back up we aren't in a short tip (these lost nodes might be
185
+ # re-added later on)
186
+ cannot_remove_any_more_nodes = false
187
+ log.debug "Before pruning back, trail is #{path.to_shorthand}" if log.debug?
188
+ is_tip, whatever = is_short_tip?(path[-1])
189
+ while is_tip
190
+ if path.length == 1
191
+ cannot_remove_any_more_nodes = true
192
+ break
193
+ end
194
+ path.delete_at(path.length-1)
195
+ log.debug "After pruning back, trail is now #{path.to_shorthand}" if log.debug?
196
+ is_tip, whatever = is_short_tip?(path[-1])
197
+ end
198
+
199
+ if cannot_remove_any_more_nodes
200
+ nil
201
+ else
202
+ path
203
+ end
204
+ end
205
+
206
+ log.debug "Finding nearest find_connected_node_on_a_path #{node.node_id}" if log.debug?
207
+ if !is_short_tip?(onode)[0]
208
+ log.debug "fwd direction not a short tip, going with that" if log.debug?
209
+ path = find_node_from_non_short_tip.call(dummy_trail)
210
+ if !path.nil?
211
+ return path
212
+ end
213
+ end
214
+
215
+ log.debug "rev direction is short tip, now testing reverse" if log.debug?
216
+ onode.reverse!
217
+ if is_short_tip?(onode)[0]
218
+ log.debug "short tip in both directions, there is no good neighbour" if log.debug?
219
+ #short tip in both directions, so not a real contig
220
+ return nil
221
+ else
222
+ log.debug "reverse direction not a short tip, going with that" if log.debug?
223
+ return find_node_from_non_short_tip.call(dummy_trail)
224
+ end
225
+ end
226
+
227
+ def remove_seen_nodes_from_end_of_path(path, seen_nodes)
228
+ log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of size #{seen_nodes.length}" if log.debug?
229
+ while !path.trail.empty?
230
+ last_node_index = path.length-1
231
+ last_node = path[last_node_index]
232
+
233
+ if seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
234
+ seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
235
+ path.trail.delete_at(last_node_index)
236
+ else
237
+ # Last node is not previously seen, chop no further.
238
+ break
239
+ end
240
+ end
241
+ return path
242
+ end
243
+
244
+ # Assemble considering reads all reads as single ended. Options:
245
+ # :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
246
+ # :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer
247
+ # :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
248
+ def assemble_from(initial_path, visited_onodes=Set.new)
249
+ options = @assembly_options
250
+
251
+ recoherencer = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
252
+
253
+ path = initial_path.copy
254
+ #visited_onodes = Set.new
255
+ initial_path[0...-1].each do |onode| #Add all except the last node to already seen nodes list
256
+ visited_onodes << onode.to_settable
257
+ end
258
+
259
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
260
+ oneighbours = nil
261
+ while true
262
+ log.debug "Now assembling from #{path[-1].to_shorthand}" if log.debug?
263
+ if visited_onodes.include?(path[-1].to_settable)
264
+ log.debug "Found circularisation in path, going no further" if log.debug?
265
+ break
266
+ else
267
+ visited_onodes << path[-1].to_settable
268
+ end
269
+
270
+ if options[:leash_length] and path.length_in_bp-@graph.hash_length > options[:leash_length]
271
+ log.debug "Beyond leash length, going to further with assembly" if log.debug?
272
+ break
273
+ end
274
+
275
+ oneighbours = path.neighbours_of_last_node(@graph)
276
+ if oneighbours.length == 0
277
+ log.debug "Found a dead end, last node is #{path[-1].to_shorthand}" if log.debug?
278
+ break
279
+
280
+ elsif oneighbours.length == 1
281
+ to_add = oneighbours[0]
282
+ log.debug "Only one way to go, so going there, to #{to_add.to_shorthand}" if log.debug?
283
+ path.add_oriented_node to_add
284
+
285
+ else
286
+ # Reached a fork (or 3 or 4-fork), which way to go?
287
+
288
+ # Remove neighbours that are short tips
289
+ oneighbours, visiteds = remove_tips(oneighbours, @assembly_options[:max_tip_length])
290
+ visiteds.each do |onode_settable|
291
+ visited_onodes << onode_settable
292
+ end
293
+
294
+ if oneighbours.length == 0
295
+ log.debug "Found a dead end at a fork, last node is #{path[-1].to_shorthand}" if log.debug?
296
+ break
297
+ elsif oneighbours.length == 1
298
+ log.debug "Clipped short tip(s) off, and then there was only one way to go" if log.debug?
299
+ path.add_oriented_node oneighbours[0]
300
+ elsif options[:recoherence_kmer].nil?
301
+ if log.debug?
302
+ neighbours_string = oneighbours.collect do |oneigh|
303
+ oneigh.to_shorthand
304
+ end.join(' or ')
305
+ log.debug "Came across what appears to be a legitimate fork to nodes #{neighbours_string} and no recoherence kmer given, so giving up" if log.debug?
306
+ end
307
+ break
308
+ else
309
+ unless options[:recoherence_kmer].nil?
310
+ log.debug "Attempting to resolve fork by recoherence" if log.debug?
311
+ oneighbours.select! do |oneigh|
312
+ dummy_trail.trail = path.trail+[oneigh]
313
+ recoherencer.validate_last_node_of_path_by_recoherence(
314
+ dummy_trail,
315
+ options[:recoherence_kmer],
316
+ options[:sequences],
317
+ options[:min_confirming_recoherence_kmer_reads]
318
+ )
319
+ end
320
+ end
321
+ if oneighbours.length == 0
322
+ log.debug "no neighbours passed recoherence, giving up" if log.debug?
323
+ break
324
+ elsif oneighbours.length == 1
325
+ log.debug "After recoherence there's only one way to go, going there"
326
+ path.add_oriented_node oneighbours[0]
327
+ elsif options[:max_coverage_at_fork]
328
+ oneighbours.select! do |oneigh|
329
+ oneigh.node.coverage <= options[:max_coverage_at_fork]
330
+ end
331
+ log.debug "Found #{oneighbours.length} neighbours after removing nodes over max coverage" if log.debug?
332
+
333
+ if oneighbours.length == 1
334
+ log.debug "After removing too much coverage neighbours there's only one way to go, going there"
335
+ path.add_oriented_node oneighbours[0]
336
+ else
337
+ log.debug "After removing max coverage nodes, #{oneighbours.length} neighbours found (#{oneighbours.collect{|o| o.to_shorthand}.join(",") }), giving up" if log.debug?
338
+ break
339
+ end
340
+
341
+
342
+ else
343
+ log.debug "Still forked after recoherence (to #{oneighbours.collect{|on| on.to_shorthand}.join(' & ') }), so seems to be a legitimate fork, giving up" if log.debug?
344
+ break
345
+ end
346
+ end
347
+ end
348
+ end
349
+
350
+ visited_onodes << path[-1].to_settable
351
+
352
+ return path, visited_onodes
353
+ end
354
+
355
+ # Given a list of possibilities for neighbours of a node, return the
356
+ # neighbour(s) that are not short tips, or the longest of the short tips
357
+ # if all are tips. Also return an enumerable of nodes visited from the cut off
358
+ # short tips
359
+ def remove_tips(oriented_neighbours, tip_distance)
360
+ return [], [] if oriented_neighbours.empty?
361
+
362
+ neighbours_and_triples = oriented_neighbours.collect do |oneigh|
363
+ [
364
+ oneigh,
365
+ find_tip_distance(oneigh, tip_distance)
366
+ ]
367
+ end
368
+ non_tips, tips = neighbours_and_triples.partition{|nt| nt[1][0] == false}
369
+
370
+ visiteds = Set.new
371
+ process_tip = lambda do |tip|
372
+ visiteds << tip[0].to_settable
373
+ tip[1][2].each {|v| visiteds << v}
374
+ end
375
+
376
+ if non_tips.length > 0
377
+ tips.each do |tip|
378
+ process_tip.call tip
379
+ end
380
+ return non_tips.collect{|t| t[0]}, visiteds
381
+ else
382
+ # no long distances here. Just go with the longest path
383
+ best_tip = tips.max{|nt| nt[1][1]}
384
+ tips.each do |tip|
385
+ unless tip == best_tip
386
+ process_tip.call tip
387
+ end
388
+ end
389
+ return [best_tip[0]], visiteds
390
+ end
391
+ end
392
+
393
+ # Returns false iff there is a path longer than max_tip_length
394
+ # starting at the given oriented_node. Currently works as a depth
395
+ # first search, which may or may not be optimal
396
+ def is_short_tip?(oriented_node)
397
+ max_tip_length = @assembly_options[:max_tip_length]
398
+ is_tip, max_distance, visited_onodes = find_tip_distance(oriented_node, max_tip_length)
399
+ return is_tip, visited_onodes
400
+ end
401
+
402
+ # The workhorse function of is_short_tip?
403
+ #
404
+ #
405
+ def find_tip_distance(oriented_node, max_tip_length)
406
+ stack = DS::Stack.new
407
+ first = MaxDistancedOrientedNode.new
408
+ first.onode = oriented_node
409
+ first.distance = oriented_node.node.length_alone
410
+ stack.push first
411
+
412
+ cache = {}
413
+ max_dist = first.distance
414
+
415
+ while current_max_distanced_onode = stack.pop
416
+ if current_max_distanced_onode.distance > max_tip_length
417
+ return false, current_max_distanced_onode.distance, []
418
+ end
419
+
420
+ max_dist = [max_dist, current_max_distanced_onode.distance].max
421
+
422
+ current_max_distanced_onode.onode.next_neighbours(@graph).each do |oneigh|
423
+ neighbour_distance = current_max_distanced_onode.distance + oneigh.node.length_alone
424
+ next if cache[oneigh.to_settable] and cache[oneigh.to_settable] >= neighbour_distance
425
+ distanced_node = MaxDistancedOrientedNode.new
426
+ distanced_node.onode = oneigh
427
+ distanced_node.distance = neighbour_distance
428
+ log.debug "The distance of #{distanced_node.onode.node_id} is at least #{neighbour_distance}" if log.debug?
429
+ cache[oneigh.to_settable] = neighbour_distance
430
+ stack.push distanced_node
431
+ end
432
+ end
433
+
434
+ log.debug "Found insufficient max tip length #{max_dist} for #{oriented_node}" if log.debug?
435
+ return true, max_dist, cache.collect{|donode| donode[0]}
436
+ end
437
+
438
+ class MaxDistancedOrientedNode
439
+ attr_accessor :onode, :distance
440
+ end
441
+ end