finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,261 @@
1
+ class Bio::AssemblyGraphAlgorithms::SingleCoherentWanderer
2
+ include Bio::FinishM::Logging
3
+
4
+ # Like AcyclicConnectionFinder#depth_first_search_with_leash except use
5
+ # single read recoherence. The algorithm used is a generalisation of Dijkstra's
6
+ # shortest path algorithm, where instead of keeping track of the minimum
7
+ # distance to each node, the algorithm keeps track of the distance to a
8
+ # set of nodes long enough to invoke a recoherence kmer.
9
+ #
10
+ # Options:
11
+ # :max_explore_nodes: maximum number of nodes to explore from each node. If max is reached, don't make any connections (default: no maximum)
12
+ def wander(finishm_graph, leash_length, recoherence_kmer, sequence_hash, options={})
13
+ to_return = {}
14
+
15
+ # Take the probes and make them all into finishing nodes
16
+ finishing_nodes = []
17
+ finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
18
+ direction = finishm_graph.probe_node_directions[probe_node_index]
19
+ if direction == true
20
+ finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST]
21
+ else
22
+ finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]
23
+ end
24
+ end
25
+
26
+ # Search from each probed node in the graph
27
+ # TODO: is there a better way to implement this by somehow searching with
28
+ # all probe nodes at once, rather than starting fresh with each probe?
29
+ finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
30
+
31
+ # Don't explore from the last node, as no new connections are established
32
+ next if probe_node_index == finishm_graph.probe_nodes.length - 1
33
+
34
+ # Go all the way to the leash length,
35
+ # and then search to see if any of the other nodes have been come across
36
+ log.debug "Exploring from probe node \##{probe_node_index+1} (node #{probe_node.node_id}/#{finishm_graph.probe_node_directions[probe_node_index] })" if log.debug?
37
+ pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
38
+ initial = finishm_graph.initial_path_from_probe(probe_node_index)
39
+ if initial.nil?
40
+ log.warn "Unable to start searching from probe \##{probe_node_index+1}, because it was not found in the graph. Skipping."
41
+ next
42
+ end
43
+ initial_distanced = DistancedOrientedNodeSet.new
44
+ initial_distanced.oriented_trail = initial
45
+ initial_distanced.distance = 0
46
+
47
+ # The minimum distance found to get to the head nodes
48
+ minimum_head_nodes_distances = {}
49
+ # Which head node sets is each node connected to?
50
+ node_to_head_node_sets = {}
51
+ #for Logging
52
+ last_logged_node_count = 0
53
+ maxed_out = false
54
+
55
+ pqueue.enqueue initial_distanced, 0
56
+ # While there are more node sets in the queue
57
+ while distanced_head_nodes = pqueue.dequeue
58
+ log.debug "Dequeued #{distanced_head_nodes}" if log.debug?
59
+ if options[:max_explore_nodes] and node_to_head_node_sets.length > options[:max_explore_nodes]
60
+ log.warn "Hit maximum number of nodes (#{options[:max_explore_nodes] }) while exploring from probe \##{probe_node_index+1}"
61
+ maxed_out = true
62
+ break
63
+ end
64
+ if log.info? and node_to_head_node_sets.length % 1024 == 0 and node_to_head_node_sets.length > last_logged_node_count
65
+ if last_logged_node_count == 0
66
+ log.info "While exploring from probe \##{probe_node_index+1}.."
67
+ end
68
+ log.info "So far worked with #{node_to_head_node_sets.length} distinct nodes in the assembly graph, at min distance #{distanced_head_nodes.distance}"
69
+ last_logged_node_count = node_to_head_node_sets.length
70
+ end
71
+
72
+ settable = distanced_head_nodes.to_settable
73
+ if minimum_head_nodes_distances.key?(settable) and
74
+ distanced_head_nodes.distance >= minimum_head_nodes_distances[distanced_head_nodes.to_settable].distance
75
+ # This node has already been explored, and no shorter path has been found here. Go no further.
76
+ next
77
+ end
78
+ minimum_head_nodes_distances[settable] = distanced_head_nodes
79
+ last_settable = distanced_head_nodes.oriented_trail.last.to_settable
80
+ node_to_head_node_sets[last_settable] ||= Set.new
81
+ node_to_head_node_sets[last_settable] << distanced_head_nodes.to_settable
82
+
83
+ if distanced_head_nodes.distance <= leash_length
84
+ # Still within the leash. Push into the stack all the current node's neighbours in the graph
85
+ last = distanced_head_nodes.oriented_trail.last
86
+ neighbour_onodes = finishm_graph.graph.neighbours_of(last.node, last.first_side)
87
+ log.debug "Found #{neighbour_onodes.length} neighbours" if log.debug?
88
+ if neighbour_onodes.length > 1
89
+ # Fork detected. Apply recoherence, and only enqueue those that pass
90
+ log.debug "Multiple neighbours found"
91
+ neighbour_onodes.each do |neighbour|
92
+ candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
93
+ log.debug "Testing recoherence in candidate #{candidate.oriented_trail.to_s}" if log.debug?
94
+ if candidate.last_node_recoherent?(recoherence_kmer, sequence_hash)
95
+ log.debug "Candidate survived recoherence: #{candidate.to_s}" if log.debug?
96
+ pqueue.enqueue candidate, candidate.distance
97
+ elsif log.debug?
98
+ log.debug "Candidate did not survive recoherence #{candidate.oriented_trail.to_s}"
99
+ end
100
+ end
101
+ else
102
+ # One or none neighbours found. Enqueue if there is one
103
+ neighbour_onodes.each do |neighbour|
104
+ candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
105
+ pqueue.enqueue candidate, candidate.distance
106
+ end
107
+ end
108
+ else
109
+ # we are beyond the leash, go no further
110
+ end
111
+ end
112
+
113
+ if maxed_out
114
+ log.debug "Maxed out, exiting loop early" if log.debug?
115
+ next
116
+ end
117
+
118
+ # Now have a hash of minimum distances. Now need to go through those and determine
119
+ # which other nodes the current probe node is connected to
120
+ finishm_graph.probe_nodes.each_with_index do |node, i|
121
+ next if i < probe_node_index # only return the 'upper triangle' of the distance matrices
122
+
123
+ finish = finishing_nodes[i]
124
+ heads = node_to_head_node_sets[finish]
125
+ next if heads.nil? #no connection found
126
+
127
+ # There might be many head_sets that include the finishing node.
128
+ # Which one has the least distance?
129
+ overall_min_distanced_set = nil
130
+ heads.each do |head_set|
131
+ min_distanced_set = minimum_head_nodes_distances[head_set]
132
+ # If there is a new winner
133
+ if overall_min_distanced_set.nil? or
134
+ overall_min_distanced_set.distance > min_distanced_set.distance
135
+
136
+ if probes_on_single_node_ok?(finishm_graph, probe_node_index, i)
137
+ log.debug "Verified that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
138
+ else
139
+ #TODO: Possibly ok if contigs to be scaffolded are all on the same node. Unlikely in practice due to short tips, but still theoretically possible
140
+ log.debug "Failed to verify that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
141
+ next
142
+ end
143
+
144
+ overall_min_distanced_set = min_distanced_set
145
+ end
146
+ end
147
+ next if overall_min_distanced_set.nil? #no connection found - the only connection was a fake one
148
+
149
+ min_distance = overall_min_distanced_set.distance
150
+ log.debug "Found a connection between probes #{probe_node_index+1} and #{i+1}, distance: #{min_distance}" if log.debug?
151
+ to_return[[probe_node_index, i]] = min_distance
152
+ end
153
+ end
154
+ return to_return
155
+ end
156
+
157
+ # Check for position and orientation if start and finish nodes are
158
+ # on the same velvet node. Return true if OK as below or if the nodes
159
+ # are different
160
+ # --> <--- OK
161
+ # <-- --> not ok (unless the node is circular)
162
+ # <-- <-- not ok
163
+ # --> --> not ok
164
+ def probes_on_single_node_ok?(finishm_graph, start_node_index, end_node_index)
165
+ node1 = finishm_graph.probe_nodes[start_node_index]
166
+ node2 = finishm_graph.probe_nodes[end_node_index]
167
+ return true if node1.node_id != node2.node_id
168
+
169
+ node1_direction = finishm_graph.probe_node_directions[start_node_index]
170
+ node2_direction = finishm_graph.probe_node_directions[end_node_index]
171
+ node1_offset = direction_independent_offset_of_noded_read_from_start_of_node(
172
+ node1, finishm_graph.probe_node_reads[start_node_index])
173
+ node2_offset = direction_independent_offset_of_noded_read_from_start_of_node(
174
+ node1, finishm_graph.probe_node_reads[end_node_index])
175
+ log.debug "Validating for 1 node problems #{start_node_index}/#{end_node_index} #{node1_direction}/#{node2_direction} offsets #{node1_offset}/#{node2_offset}" if log.debug?
176
+
177
+ # true/false and probe1 left of probe2, immediately below, is the most intuitive.
178
+ # but false/true and probe1 right of probe2 is also valid
179
+ if node1_direction == true and node2_direction == false and
180
+ node1_offset < node2_offset
181
+ return true
182
+ end
183
+ if node1_direction == false and node2_direction == true and
184
+ node1_offset > node2_offset
185
+ return true
186
+ end
187
+
188
+ if node1_direction == true and node2_direction == false
189
+ onode = finishm_graph.velvet_oriented_node(start_node_index)
190
+ neighbours = finishm_graph.graph.neighbours_of(onode.node, onode.first_side).collect{|n| n.node_id}
191
+ return true if neighbours.include?(node1)
192
+ end
193
+
194
+ return false
195
+ end
196
+
197
+ private
198
+ def direction_independent_offset_of_noded_read_from_start_of_node(velvet_node, velvet_noded_read)
199
+ if velvet_noded_read.direction == true
200
+ return velvet_noded_read.offset_from_start_of_node
201
+ elsif velvet_noded_read.direction == false
202
+ return velvet_node.corresponding_contig_length - velvet_noded_read.offset_from_start_of_node
203
+ else
204
+ raise "programming error - velvet_noded_read does not have valid direction"
205
+ end
206
+ end
207
+
208
+ # An oriented node some distance from the origin of exploration
209
+ class DistancedOrientedNodeSet
210
+ attr_accessor :oriented_trail, :distance
211
+
212
+ # Using Set object, often we want two separate objects to be considered equal even if
213
+ # they are distinct objects
214
+ def to_settable
215
+ settable = []
216
+ @oriented_trail.each do |onode|
217
+ settable.push onode.node_id
218
+ settable.push onode.first_side
219
+ end
220
+ return settable
221
+ end
222
+
223
+ # Create a copy of this object, then add the given oriented_node
224
+ # to this object, and discard objects from the rear of the trail if they
225
+ # are now of no use for recoherence. Update the distance
226
+ def add_oriented_node_and_copy(oriented_node, recoherence_kmer)
227
+ d = DistancedOrientedNodeSet.new
228
+ new_trail = @oriented_trail.trail+[oriented_node]
229
+
230
+ # Remove unneeded rear nodes that cannot contribute to the recoherence
231
+ # calculation going forward
232
+ cumulative_length = 0
233
+ i = new_trail.length - 1
234
+ while i >= 0 and cumulative_length < recoherence_kmer
235
+ cumulative_length += new_trail[i].node.length_alone
236
+ i -= 1
237
+ end
238
+ i += 1
239
+ d.oriented_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
240
+ d.oriented_trail.trail = new_trail[i..-1]
241
+ # Update distance
242
+ d.distance = @distance+oriented_node.node.length_alone
243
+
244
+ return d
245
+ end
246
+
247
+ # Is the head nodes single recoherent? Return false if not, otherwise true
248
+ def last_node_recoherent?(recoherence_kmer, sequence_hash)
249
+ @@single_recoherencer ||= Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
250
+ return @@single_recoherencer.validate_last_node_of_path_by_recoherence(
251
+ @oriented_trail,
252
+ recoherence_kmer,
253
+ sequence_hash
254
+ )
255
+ end
256
+
257
+ def to_s
258
+ "#{@oriented_trail.to_s}(#{@distance})"
259
+ end
260
+ end
261
+ end
@@ -0,0 +1,441 @@
1
+ require 'ds'
2
+ require 'set'
3
+ require 'ruby-progressbar'
4
+
5
+ class Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
6
+ include Bio::FinishM::Logging
7
+
8
+ DEFAULT_MAX_TIP_LENGTH = 200
9
+ DEFAULT_MIN_CONTIG_SIZE = 500
10
+ DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS = 2
11
+
12
+ attr_accessor :graph
13
+
14
+ ASSEMBLY_OPTIONS = [
15
+ :max_tip_length,
16
+ :recoherence_kmer,
17
+ :min_confirming_recoherence_kmer_reads,
18
+ :sequences,
19
+ :leash_length,
20
+ :min_contig_size,
21
+ :max_coverage_at_fork,
22
+ ]
23
+ attr_accessor :assembly_options
24
+
25
+ # Create a new assembler given a velvet graph and velvet Sequences object
26
+ #
27
+ # Assembly options:
28
+ # :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
29
+ # :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer (requires :seqeunces)
30
+ # :sequences: the sequences of the actual reads, probably a Bio::Velvet::Underground::BinarySequenceStore object
31
+ # :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
32
+ # :min_coverage_of_start_nodes: only start exploring from nodes with this much coverage
33
+ # :min_contig_size: don't bother returning contigs shorter than this (default 500bp)
34
+ # :progressbar_io: given an IO object e.g. $stdout, write progress information
35
+ def initialize(graph, assembly_options={})
36
+ @graph = graph
37
+ @assembly_options = assembly_options
38
+ @assembly_options[:max_tip_length] ||= DEFAULT_MAX_TIP_LENGTH
39
+ @assembly_options[:min_contig_size] ||= DEFAULT_MIN_CONTIG_SIZE
40
+ @assembly_options[:min_confirming_recoherence_kmer_reads] ||= DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS
41
+ end
42
+
43
+ # Assemble everything in the graph into OrientedNodeTrail objects.
44
+ # Yields an OrientedNodeTrail if a block is
45
+ # given, otherwise returns an array of found paths. Options for
46
+ # assembly are specified in assembly_options
47
+ def assemble
48
+ paths = []
49
+
50
+ # Gather a list of nodes to try starting from
51
+ starting_nodes = gather_starting_nodes
52
+ log.info "Found #{starting_nodes.length} nodes to attempt assembly from"
53
+
54
+ seen_nodes = Set.new
55
+ progress = setup_progressbar starting_nodes.length
56
+
57
+ # For each starting node, start the assembly process
58
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
59
+ starting_nodes.each do |start_node|
60
+ log.debug "Trying to assemble from #{start_node.node_id}" if log.debug?
61
+
62
+ # If we've already covered this node, don't try it again
63
+ if seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
64
+ seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
65
+ log.debug "Already seen this node, not inspecting further" if log.debug?
66
+ next
67
+ end
68
+
69
+ # first attempt to go forward as far as possible, then reverse the path
70
+ # and continue until cannot go farther
71
+ reversed_path_forward = find_beginning_trail_from_node(start_node, seen_nodes)
72
+ if reversed_path_forward.nil?
73
+ log.debug "Could not find forward path from this node, giving up" if log.debug?
74
+ next
75
+ end
76
+ # Have we already seen this path before?
77
+ #TODO: add in recoherence logic here
78
+ if seen_last_in_path?(reversed_path_forward, seen_nodes)
79
+ log.debug "Already seen the last node of the reversed path forward: #{reversed_path_forward.trail[-1].to_shorthand}, giving up" if log.debug?
80
+ next
81
+ end
82
+ # Assemble ahead again
83
+ log.debug "reversed_path_forward: #{reversed_path_forward.to_shorthand}" if log.debug?
84
+ path, just_visited_onodes = assemble_from(reversed_path_forward)
85
+
86
+ # Remove nodes that have already been seen to prevent duplication
87
+ log.debug "Before removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
88
+ remove_seen_nodes_from_end_of_path(path, seen_nodes)
89
+ log.debug "After removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
90
+
91
+ # Add the now seen nodes to the list
92
+ just_visited_onodes.each do |onode_settable|
93
+ seen_nodes << onode_settable
94
+ end
95
+
96
+ # Record which nodes have already been visited, so they aren't visited again
97
+ seen_nodes.merge just_visited_onodes
98
+ unless progress.nil?
99
+ if @assembly_options[:min_coverage_of_start_nodes]
100
+ # TODO: this could be better by progress += (starting_nodes_just_visited.length)
101
+ progress.increment
102
+ else
103
+ progress.progress += just_visited_onodes.length
104
+ end
105
+ end
106
+
107
+ if path.length_in_bp < @assembly_options[:min_contig_size]
108
+ log.debug "Path length (#{path.length_in_bp}) less than min_contig_size (#{@assembly_options[:min_contig_size] }), not recording it" if log.debug?
109
+ next
110
+ end
111
+ log.debug "Found a seemingly legitimate path #{path.to_shorthand}" if log.debug?
112
+ if block_given?
113
+ yield path
114
+ else
115
+ paths.push path
116
+ end
117
+ end
118
+ progress.finish unless progress.nil?
119
+
120
+ return paths
121
+ end
122
+
123
+ def seen_last_in_path?(path, seen_nodes)
124
+ seen_nodes.include?(path[-1].to_settable)
125
+ end
126
+
127
+ def gather_starting_nodes
128
+ if @assembly_options[:min_coverage_of_start_nodes] or @assembly_options[:min_length_of_start_nodes]
129
+ starting_nodes = []
130
+ graph.nodes.each do |node|
131
+ if (@assembly_options[:min_coverage_of_start_nodes].nil? or
132
+ node.coverage >= @assembly_options[:min_coverage_of_start_nodes]) and
133
+ (@assembly_options[:min_length_of_start_nodes].nil? or
134
+ node.length_alone >= @assembly_options[:min_length_of_start_nodes])
135
+
136
+ starting_nodes.push node
137
+ end
138
+ end
139
+ return starting_nodes
140
+ else
141
+ return graph.nodes
142
+ end
143
+ end
144
+
145
+ def setup_progressbar(num_nodes)
146
+ progress = nil
147
+ if @assembly_options[:progressbar_io]
148
+ progress = ProgressBar.create(
149
+ :title => "Assembly",
150
+ :format => '%a %bᗧ%i %p%% %E %t',
151
+ :progress_mark => ' ',
152
+ :remainder_mark => '・',
153
+ :total => num_nodes,
154
+ :output => @assembly_options[:progressbar_io]
155
+ )
156
+ end
157
+ return progress
158
+ end
159
+
160
+ # Given a node, return a path that does not include any short tips, or nil if none is
161
+ # connected to this node.
162
+ # With this path, you can explore forwards. This isn't very clear commenting, but
163
+ # I'm just making this stuff up
164
+ def find_beginning_trail_from_node(node, previously_seen_nodes)
165
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
166
+ onode.node = node
167
+ onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST #go backwards first, because the path will later be reversed
168
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
169
+ dummy_trail.trail = [onode]
170
+
171
+ find_node_from_non_short_tip = lambda do |dummy_trail|
172
+ # go all the way forwards
173
+ path, visited_nodes = assemble_from(dummy_trail)
174
+
175
+ # Remove already seen nodes from the end of the trail, because
176
+ # they are already included in other paths and this shows
177
+ # up as duplicated contig stretches and this is not correct
178
+ log.debug "Before removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
179
+ remove_seen_nodes_from_end_of_path(path, previously_seen_nodes)
180
+ log.debug "After removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
181
+
182
+ # reverse the path
183
+ path.reverse!
184
+ # peel back up we aren't in a short tip (these lost nodes might be
185
+ # re-added later on)
186
+ cannot_remove_any_more_nodes = false
187
+ log.debug "Before pruning back, trail is #{path.to_shorthand}" if log.debug?
188
+ is_tip, whatever = is_short_tip?(path[-1])
189
+ while is_tip
190
+ if path.length == 1
191
+ cannot_remove_any_more_nodes = true
192
+ break
193
+ end
194
+ path.delete_at(path.length-1)
195
+ log.debug "After pruning back, trail is now #{path.to_shorthand}" if log.debug?
196
+ is_tip, whatever = is_short_tip?(path[-1])
197
+ end
198
+
199
+ if cannot_remove_any_more_nodes
200
+ nil
201
+ else
202
+ path
203
+ end
204
+ end
205
+
206
+ log.debug "Finding nearest find_connected_node_on_a_path #{node.node_id}" if log.debug?
207
+ if !is_short_tip?(onode)[0]
208
+ log.debug "fwd direction not a short tip, going with that" if log.debug?
209
+ path = find_node_from_non_short_tip.call(dummy_trail)
210
+ if !path.nil?
211
+ return path
212
+ end
213
+ end
214
+
215
+ log.debug "rev direction is short tip, now testing reverse" if log.debug?
216
+ onode.reverse!
217
+ if is_short_tip?(onode)[0]
218
+ log.debug "short tip in both directions, there is no good neighbour" if log.debug?
219
+ #short tip in both directions, so not a real contig
220
+ return nil
221
+ else
222
+ log.debug "reverse direction not a short tip, going with that" if log.debug?
223
+ return find_node_from_non_short_tip.call(dummy_trail)
224
+ end
225
+ end
226
+
227
+ def remove_seen_nodes_from_end_of_path(path, seen_nodes)
228
+ log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of size #{seen_nodes.length}" if log.debug?
229
+ while !path.trail.empty?
230
+ last_node_index = path.length-1
231
+ last_node = path[last_node_index]
232
+
233
+ if seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
234
+ seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
235
+ path.trail.delete_at(last_node_index)
236
+ else
237
+ # Last node is not previously seen, chop no further.
238
+ break
239
+ end
240
+ end
241
+ return path
242
+ end
243
+
244
+ # Assemble considering reads all reads as single ended. Options:
245
+ # :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
246
+ # :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer
247
+ # :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
248
+ def assemble_from(initial_path, visited_onodes=Set.new)
249
+ options = @assembly_options
250
+
251
+ recoherencer = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
252
+
253
+ path = initial_path.copy
254
+ #visited_onodes = Set.new
255
+ initial_path[0...-1].each do |onode| #Add all except the last node to already seen nodes list
256
+ visited_onodes << onode.to_settable
257
+ end
258
+
259
+ dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
260
+ oneighbours = nil
261
+ while true
262
+ log.debug "Now assembling from #{path[-1].to_shorthand}" if log.debug?
263
+ if visited_onodes.include?(path[-1].to_settable)
264
+ log.debug "Found circularisation in path, going no further" if log.debug?
265
+ break
266
+ else
267
+ visited_onodes << path[-1].to_settable
268
+ end
269
+
270
+ if options[:leash_length] and path.length_in_bp-@graph.hash_length > options[:leash_length]
271
+ log.debug "Beyond leash length, going to further with assembly" if log.debug?
272
+ break
273
+ end
274
+
275
+ oneighbours = path.neighbours_of_last_node(@graph)
276
+ if oneighbours.length == 0
277
+ log.debug "Found a dead end, last node is #{path[-1].to_shorthand}" if log.debug?
278
+ break
279
+
280
+ elsif oneighbours.length == 1
281
+ to_add = oneighbours[0]
282
+ log.debug "Only one way to go, so going there, to #{to_add.to_shorthand}" if log.debug?
283
+ path.add_oriented_node to_add
284
+
285
+ else
286
+ # Reached a fork (or 3 or 4-fork), which way to go?
287
+
288
+ # Remove neighbours that are short tips
289
+ oneighbours, visiteds = remove_tips(oneighbours, @assembly_options[:max_tip_length])
290
+ visiteds.each do |onode_settable|
291
+ visited_onodes << onode_settable
292
+ end
293
+
294
+ if oneighbours.length == 0
295
+ log.debug "Found a dead end at a fork, last node is #{path[-1].to_shorthand}" if log.debug?
296
+ break
297
+ elsif oneighbours.length == 1
298
+ log.debug "Clipped short tip(s) off, and then there was only one way to go" if log.debug?
299
+ path.add_oriented_node oneighbours[0]
300
+ elsif options[:recoherence_kmer].nil?
301
+ if log.debug?
302
+ neighbours_string = oneighbours.collect do |oneigh|
303
+ oneigh.to_shorthand
304
+ end.join(' or ')
305
+ log.debug "Came across what appears to be a legitimate fork to nodes #{neighbours_string} and no recoherence kmer given, so giving up" if log.debug?
306
+ end
307
+ break
308
+ else
309
+ unless options[:recoherence_kmer].nil?
310
+ log.debug "Attempting to resolve fork by recoherence" if log.debug?
311
+ oneighbours.select! do |oneigh|
312
+ dummy_trail.trail = path.trail+[oneigh]
313
+ recoherencer.validate_last_node_of_path_by_recoherence(
314
+ dummy_trail,
315
+ options[:recoherence_kmer],
316
+ options[:sequences],
317
+ options[:min_confirming_recoherence_kmer_reads]
318
+ )
319
+ end
320
+ end
321
+ if oneighbours.length == 0
322
+ log.debug "no neighbours passed recoherence, giving up" if log.debug?
323
+ break
324
+ elsif oneighbours.length == 1
325
+ log.debug "After recoherence there's only one way to go, going there"
326
+ path.add_oriented_node oneighbours[0]
327
+ elsif options[:max_coverage_at_fork]
328
+ oneighbours.select! do |oneigh|
329
+ oneigh.node.coverage <= options[:max_coverage_at_fork]
330
+ end
331
+ log.debug "Found #{oneighbours.length} neighbours after removing nodes over max coverage" if log.debug?
332
+
333
+ if oneighbours.length == 1
334
+ log.debug "After removing too much coverage neighbours there's only one way to go, going there"
335
+ path.add_oriented_node oneighbours[0]
336
+ else
337
+ log.debug "After removing max coverage nodes, #{oneighbours.length} neighbours found (#{oneighbours.collect{|o| o.to_shorthand}.join(",") }), giving up" if log.debug?
338
+ break
339
+ end
340
+
341
+
342
+ else
343
+ log.debug "Still forked after recoherence (to #{oneighbours.collect{|on| on.to_shorthand}.join(' & ') }), so seems to be a legitimate fork, giving up" if log.debug?
344
+ break
345
+ end
346
+ end
347
+ end
348
+ end
349
+
350
+ visited_onodes << path[-1].to_settable
351
+
352
+ return path, visited_onodes
353
+ end
354
+
355
+ # Given a list of possibilities for neighbours of a node, return the
356
+ # neighbour(s) that are not short tips, or the longest of the short tips
357
+ # if all are tips. Also return an enumerable of nodes visited from the cut off
358
+ # short tips
359
+ def remove_tips(oriented_neighbours, tip_distance)
360
+ return [], [] if oriented_neighbours.empty?
361
+
362
+ neighbours_and_triples = oriented_neighbours.collect do |oneigh|
363
+ [
364
+ oneigh,
365
+ find_tip_distance(oneigh, tip_distance)
366
+ ]
367
+ end
368
+ non_tips, tips = neighbours_and_triples.partition{|nt| nt[1][0] == false}
369
+
370
+ visiteds = Set.new
371
+ process_tip = lambda do |tip|
372
+ visiteds << tip[0].to_settable
373
+ tip[1][2].each {|v| visiteds << v}
374
+ end
375
+
376
+ if non_tips.length > 0
377
+ tips.each do |tip|
378
+ process_tip.call tip
379
+ end
380
+ return non_tips.collect{|t| t[0]}, visiteds
381
+ else
382
+ # no long distances here. Just go with the longest path
383
+ best_tip = tips.max{|nt| nt[1][1]}
384
+ tips.each do |tip|
385
+ unless tip == best_tip
386
+ process_tip.call tip
387
+ end
388
+ end
389
+ return [best_tip[0]], visiteds
390
+ end
391
+ end
392
+
393
+ # Returns false iff there is a path longer than max_tip_length
394
+ # starting at the given oriented_node. Currently works as a depth
395
+ # first search, which may or may not be optimal
396
+ def is_short_tip?(oriented_node)
397
+ max_tip_length = @assembly_options[:max_tip_length]
398
+ is_tip, max_distance, visited_onodes = find_tip_distance(oriented_node, max_tip_length)
399
+ return is_tip, visited_onodes
400
+ end
401
+
402
+ # The workhorse function of is_short_tip?
403
+ #
404
+ #
405
+ def find_tip_distance(oriented_node, max_tip_length)
406
+ stack = DS::Stack.new
407
+ first = MaxDistancedOrientedNode.new
408
+ first.onode = oriented_node
409
+ first.distance = oriented_node.node.length_alone
410
+ stack.push first
411
+
412
+ cache = {}
413
+ max_dist = first.distance
414
+
415
+ while current_max_distanced_onode = stack.pop
416
+ if current_max_distanced_onode.distance > max_tip_length
417
+ return false, current_max_distanced_onode.distance, []
418
+ end
419
+
420
+ max_dist = [max_dist, current_max_distanced_onode.distance].max
421
+
422
+ current_max_distanced_onode.onode.next_neighbours(@graph).each do |oneigh|
423
+ neighbour_distance = current_max_distanced_onode.distance + oneigh.node.length_alone
424
+ next if cache[oneigh.to_settable] and cache[oneigh.to_settable] >= neighbour_distance
425
+ distanced_node = MaxDistancedOrientedNode.new
426
+ distanced_node.onode = oneigh
427
+ distanced_node.distance = neighbour_distance
428
+ log.debug "The distance of #{distanced_node.onode.node_id} is at least #{neighbour_distance}" if log.debug?
429
+ cache[oneigh.to_settable] = neighbour_distance
430
+ stack.push distanced_node
431
+ end
432
+ end
433
+
434
+ log.debug "Found insufficient max tip length #{max_dist} for #{oriented_node}" if log.debug?
435
+ return true, max_dist, cache.collect{|donode| donode[0]}
436
+ end
437
+
438
+ class MaxDistancedOrientedNode
439
+ attr_accessor :onode, :distance
440
+ end
441
+ end