finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,46 @@
1
+ require 'yaml'
2
+
3
+ class Bio::FinishM::BadFormatWriter
4
+ def initialize
5
+ @to_yamlify = []
6
+ end
7
+
8
+ def add_metapath(name, metapath)
9
+ to_write = []
10
+ metapath.each do |onode_or_bubble|
11
+ if onode_or_bubble.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
12
+ next_to_write = {}
13
+ next_to_write['type'] = 'regular'
14
+ next_to_write['node'] = onode_or_bubble.to_shorthand
15
+ next_to_write['coverage'] = onode_or_bubble.node.coverage
16
+ to_write << next_to_write
17
+ else
18
+ # bubble
19
+ paths = []
20
+ onode_or_bubble.each_path do |path|
21
+ next_to_write = {}
22
+ next_to_write['nodes'] = path.to_shorthand
23
+ next_to_write['coverage'] = path.coverage
24
+ paths << next_to_write
25
+ end
26
+ to_write << {
27
+ 'type' => 'bubble',
28
+ 'paths' => paths
29
+ }
30
+ end
31
+ end
32
+
33
+ @to_yamlify << {
34
+ 'contig_name' => name,
35
+ 'graph' => to_write.to_yaml
36
+ }
37
+ end
38
+
39
+ def yaml
40
+ @to_yamlify.to_yaml
41
+ end
42
+
43
+ def write(output_io)
44
+ output_io.print yaml
45
+ end
46
+ end
@@ -0,0 +1,48 @@
1
+ require 'bio-samtools'
2
+
3
+ module Bio
4
+ module AssemblyGraphAlgorithms
5
+ class BamProbeReadSelector
6
+ include Bio::FinishM::Logging
7
+
8
+ # Given an indexed bam file of reads mapped onto contigs,
9
+ # an array of one or more [contig_name, position, direction] entries (i.e. places in the contigs to locate reads for),
10
+ # a kmer (the match has to be at least one perfect kmer overlapping the position) and a
11
+ # path to a CnyUnifiedSeq.names file, return an Array of read_IDs of reads that can be used to locate the contig
12
+ # ends in the velvet graph.
13
+ #
14
+ # This assumes that velvet hasn't done anything to clean up the graph as cleaning might remove reads
15
+ # of interest
16
+ def find_probes(indexed_bam_file, contig_names_positions_directions, kmer, path_to_cny_unified_seq_names_file)
17
+ # need to check the sequence of the aligned read is the same as what is in the cny_unified_seq_names_file
18
+ end
19
+
20
+ # Given a contig name and a side, together with a path to an indexed bam file,
21
+ # pick out a read that can be used to 'locate'
22
+ # the contig end in the assembly, and return a Bio::DB::Alignment object of it
23
+ def find_probe_read_alignment_from_contig_end(indexed_bam_file, contig_name, direction, position, kmer)
24
+ # Search for all reads that overlap the overhang base, and are in the correct direction
25
+ sam = Bio::DB::Sam.new(:bam => indexed_bam_file)
26
+ position_hash = {:chr => contig_name}
27
+
28
+ # The probes must overlap the position, to one back from
29
+ # the contig end
30
+ if direction
31
+ position_hash[:start] = position-1
32
+ position_hash[:stop] = position
33
+ else
34
+ position_hash[:start] = position
35
+ position_hash[:stop] = position+1
36
+ end
37
+ sam.each_alignment(position_hash) do |alignment|
38
+ # Reject reads that do not have matching stretches of DNA that are at least kmer length long
39
+ # as these will not be included in the assembly.
40
+ # If it passes, then return the alignment
41
+
42
+ end
43
+
44
+ # Return the 'best' read's name and sequence.
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,842 @@
1
+ require 'set'
2
+ require 'ds'
3
+
4
+ # Like DS::PriorityQueue except give the ability to define how priority is given
5
+ class DS::AnyPriorityQueue < DS::PriorityQueue
6
+ #Create new priority queue. Internaly uses heap to store elements.
7
+ def initialize
8
+ @store = DS::BinaryHeap.new {|parent,child| yield parent.key, child.key}
9
+ end
10
+
11
+ def each
12
+ @store.to_a.each do |pair|
13
+ yield pair.value
14
+ end
15
+ end
16
+ end
17
+
18
+
19
+
20
+ class Bio::AssemblyGraphAlgorithms::BubblyAssembler < Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
21
+ include Bio::FinishM::Logging
22
+
23
+ DEFAULT_MAX_BUBBLE_LENGTH = 500
24
+ DEFAULT_BUBBLE_NODE_COUNT_LIMIT = 20 #so, so very 'un-educated' guess
25
+ DEFAULT_BUBBLE_FORK_LIMIT = 20
26
+ DEFAULT_MAX_CYCLES = 1
27
+
28
+ def initialize(graph, assembly_options={})
29
+ opts = assembly_options
30
+ opts[:max_bubble_length] ||= DEFAULT_MAX_BUBBLE_LENGTH
31
+ opts[:bubble_node_count_limit] ||= DEFAULT_BUBBLE_NODE_COUNT_LIMIT
32
+ opts[:bubble_fork_limit] ||= DEFAULT_BUBBLE_FORK_LIMIT
33
+ opts[:max_cycles] ||= DEFAULT_MAX_CYCLES
34
+ super graph, opts
35
+ end
36
+
37
+ # Starting at a node within a graph, walk through the graph
38
+ # accepting forks, so long as the fork paths converge within some finite
39
+ # length in the graph (the leash length, measured in number of base pairs).
40
+ #
41
+ # Return an Array of Path arrays, a MetaPath, where each path array are the different paths
42
+ # that can be taken at each fork point
43
+ def assemble_from(starting_path, visited_oriented_node_settables=Set.new)
44
+ leash_length = @assembly_options[:max_bubble_length]
45
+ if log.info? and starting_path.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail)
46
+ log.info "Assembling from: #{starting_path.to_shorthand}"
47
+ end
48
+
49
+ filter_neighbours = lambda do |neighbours|
50
+ legit_neighbours, visiteds = remove_tips(neighbours, @assembly_options[:max_tip_length])
51
+ visiteds.each do |onode|
52
+ log.debug "Adding #{onode} to list of visited nodes" if log.debug?
53
+ visited_oriented_node_settables << onode
54
+ end
55
+ legit_neighbours
56
+ end
57
+
58
+ filterVisited = lambda do |oneigh|
59
+ visited_oriented_node_settables.include? oneigh.to_settable
60
+ end
61
+
62
+ # set up basic dynamic programming problem
63
+ baseProblem = lambda do |oneigh|
64
+ new_problem = DynamicProgrammingProblem.new
65
+ new_problem.distance = 0
66
+ new_path = Bio::Velvet::Graph::OrientedNodeTrail.new
67
+ new_path.add_oriented_node oneigh
68
+ new_problem.path = new_path
69
+ new_problem.ubiquitous_oriented_nodes = Set.new
70
+ new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
71
+ new_problem.visited_oriented_nodes = Set.new
72
+ new_problem.visited_oriented_nodes << oneigh.to_settable
73
+ new_problem
74
+ end
75
+
76
+ # extend dynamic programming problem
77
+ extendedProblem = lambda do |problem, oneigh|
78
+ new_problem = DynamicProgrammingProblem.new
79
+ new_problem.distance = problem.distance + problem.path[-1].node.length_alone
80
+ new_path = problem.path.copy
81
+ new_path.add_oriented_node oneigh
82
+ new_problem.path = new_path
83
+ new_problem.ubiquitous_oriented_nodes = Set.new problem.ubiquitous_oriented_nodes
84
+ new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
85
+ new_problem.visited_oriented_nodes = Set.new problem.visited_oriented_nodes
86
+ new_problem.visited_oriented_nodes << oneigh.to_settable
87
+ new_problem.circular_path_detected = true if problem.visited_oriented_nodes.include? oneigh.to_settable
88
+ new_problem
89
+ end
90
+
91
+ current_bubble = nil
92
+ metapath = MetaPath.new
93
+ starting_path.each do |oriented_node|
94
+ log.debug "adding onode at the start: #{oriented_node.to_shorthand}" if log.debug?
95
+ metapath << oriented_node
96
+ end
97
+
98
+ # Keep track of nodes visited in this trajectory already so circuits can be avoided
99
+ #visited_oriented_node_settables = Set.new
100
+ starting_path.each do |e|
101
+ if e.kind_of?(Bubble)
102
+ e.oriented_nodes do |onode|
103
+ visited_oriented_node_settables << onode.to_settable
104
+ end
105
+ else
106
+ visited_oriented_node_settables << e.to_settable
107
+ end
108
+ end
109
+ #log.debug "Starting with visited nodes #{visited_oriented_node_settables.to_a.join(',')}" if log.debug?
110
+
111
+ current_mode = :linear # :linear, :bubble, or :finished
112
+
113
+ while current_mode != :finished
114
+ if current_mode == :linear
115
+ log.debug "Starting a non-bubble from #{metapath.to_shorthand}" if log.debug?
116
+ while true
117
+ oriented_neighbours = metapath.last_oriented_node.next_neighbours(@graph)
118
+ log.debug "Found oriented neighbours #{oriented_neighbours.collect{|onode| onode.to_shorthand} }" if log.debug?
119
+
120
+ legit_neighbours = nil
121
+ # Cut off tips unless it is the only way
122
+ if oriented_neighbours.length == 1
123
+ legit_neighbours = oriented_neighbours
124
+ else
125
+ legit_neighbours = filter_neighbours.call(oriented_neighbours)
126
+ end
127
+
128
+ if legit_neighbours.empty?
129
+ # This is just a straight out dead end, and we can go no further.
130
+ log.debug "Dead end reached" if log.debug?
131
+ metapath.fate = MetaPath::DEAD_END_FATE
132
+ current_mode = :finished
133
+ break
134
+ elsif legit_neighbours.length == 1
135
+ # Linear thing here, just keep moving forward
136
+ neighbour = legit_neighbours[0]
137
+
138
+ # Stop if a circuit is detected
139
+ # Tim - Always stop on a circuit in linear mode. "We cannot get out." - Book of Mazarbul.
140
+ if visited_oriented_node_settables.include?(neighbour.to_settable)
141
+ log.debug "Detected circuit in linear mode by running into #{neighbour.to_settable}" if log.debug?
142
+ metapath.fate = MetaPath::CIRCUIT_FATE
143
+ current_mode = :finished
144
+ break
145
+ else
146
+ visited_oriented_node_settables << neighbour.to_settable
147
+ metapath << neighbour
148
+ end
149
+
150
+ else
151
+ # Reached a fork in the graph here, the point of this algorithm, really.
152
+ current_bubble = Bubble.new metapath.last_oriented_node
153
+ log.debug "Starting a bubble forking from metapath #{metapath.to_shorthand}" if log.debug?
154
+
155
+ if legit_neighbours.all? &filterVisited
156
+ log.debug "Detected fork in linear mode where all neighbours have been previously traversed. This is effectively a dead end." if log.debug?
157
+ metapath.fate = MetaPath::CIRCUIT_FATE
158
+ current_mode = :finished
159
+ end
160
+
161
+ legit_neighbours.each do |oneigh|
162
+ new_problem = baseProblem.call oneigh
163
+ log.debug "Adding problem to bubble: #{new_problem}" if log.debug?
164
+
165
+ current_bubble.enqueue new_problem
166
+ current_mode = :bubble
167
+ end
168
+ break
169
+ end
170
+ end
171
+
172
+
173
+ elsif current_mode == :bubble
174
+ # We are in a bubble. Go get some.
175
+ log.debug "entering bubble mode" if log.debug?
176
+
177
+ # next problem = queue.shift. while distance of next problem is not beyond the leash length
178
+ while current_mode == :bubble
179
+ problem = current_bubble.shift
180
+
181
+ if problem.nil?
182
+ # Getting here seems improbable if not impossible.
183
+ # The current bubble doesn't converge and just has short tips at the end, don't add it to the metapath
184
+ metapath.fate = MetaPath::DEAD_END_FATE
185
+ current_mode = :finished
186
+ log.debug "Reached a dead end, ignoring this path" if log.debug?
187
+ break
188
+ end
189
+
190
+ log.debug "Dequeued #{problem.to_shorthand}" if log.debug?
191
+ if !leash_length.nil? and problem.distance > leash_length
192
+ # The current bubble doesn't converge, don't add it to the metapath
193
+ metapath.fate = MetaPath::DIVERGES_FATE
194
+ current_mode = :finished
195
+ log.debug "Bubble is past the leash length of #{leash_length}, giving up" if log.debug?
196
+ break
197
+ elsif current_bubble.convergent_on?(problem)
198
+ log.debug "Bubble #{current_bubble.to_shorthand} convergent on #{problem.to_shorthand}" if log.debug?
199
+ current_bubble.converge_on problem
200
+ # convergement!
201
+ # Bubble ended in a convergent fashion
202
+
203
+ metapath << current_bubble
204
+ # Add the nodes in the bubble to the list of visited nodes
205
+ current_bubble.oriented_nodes do |onode|
206
+ visited_oriented_node_settables << onode.to_settable
207
+ end
208
+
209
+ current_bubble = nil
210
+ current_mode = :linear
211
+ break
212
+ else
213
+ # otherwise we must search on in the bubble
214
+ # get all neighbours that are not short tips
215
+ log.debug "Bubble not convergent on #{problem.to_shorthand}" if log.debug?
216
+
217
+ neighbours = problem.path.neighbours_of_last_node(@graph)
218
+
219
+ # If there is only 1 way to go, go there
220
+ if neighbours.length == 1
221
+ log.debug "Only one way to go from this node, going there" if log.debug?
222
+
223
+ oneigh = neighbours[0]
224
+ new_problem = extendedProblem.call problem, oneigh
225
+ current_bubble.enqueue new_problem
226
+ log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
227
+
228
+ # check to make sure we aren't going overboard in the bubbly-ness
229
+ if !@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit]
230
+ log.debug "Too complex a bubble detected, giving up" if log.debug?
231
+ metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
232
+ current_mode = :finished
233
+ break
234
+ end
235
+ else
236
+ legit_neighbours = filter_neighbours.call(neighbours)
237
+
238
+ if legit_neighbours.length == 0
239
+ # this is a kind of 'long' tip, possibly unlikely to happen much.
240
+ # Forget about it and progress to the next problem having effectively
241
+ # removed it from the bubble
242
+ log.debug "Found no neighbours to re-enqueue" if log.debug?
243
+ else
244
+ # Increment complexity counter if this is a real fork
245
+ if legit_neighbours.length > 1
246
+ current_bubble.num_legit_forks += 1
247
+ end
248
+
249
+ legit_neighbours.each do |oneigh|
250
+ new_problem = extendedProblem.call problem, oneigh
251
+ current_bubble.enqueue new_problem
252
+ log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
253
+
254
+ # check to make sure we aren't going overboard in the bubbly-ness
255
+ if (!@assembly_options[:bubble_fork_limit].nil? and current_bubble.num_legit_forks > @assembly_options[:bubble_fork_limit]) or
256
+ (!@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit])
257
+ log.debug "Too complex a bubble detected, giving up" if log.debug?
258
+ metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
259
+ current_mode = :finished
260
+ break
261
+ end
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
267
+ else
268
+ raise "Programming error: Unexpected mode: #{current_mode}"
269
+ end
270
+
271
+ log.debug "Reached end of main loop in mode #{current_mode}" if log.debug?
272
+ end
273
+
274
+ return metapath, visited_oriented_node_settables
275
+ end
276
+
277
+ def seen_last_in_path?(path, seen_nodes)
278
+ last = path[-1]
279
+ if last.kind_of?(Bubble)
280
+ return remove_seen_nodes_from_end_of_path(path, seen_nodes).length < path.length
281
+ else
282
+ return seen_nodes.include?(path[-1].to_settable)
283
+ end
284
+ end
285
+
286
+
287
+ def remove_seen_nodes_from_end_of_path(path, seen_nodes)
288
+ log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of length #{seen_nodes.length}" if log.debug?
289
+
290
+ node_seen = lambda do |oriented_node|
291
+ seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
292
+ seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
293
+ end
294
+
295
+ while !path.empty?
296
+ last_node_or_bubble_index = path.length-1
297
+ last_node_or_bubble = path[last_node_or_bubble_index]
298
+
299
+ delete = false
300
+ if last_node_or_bubble.kind_of?(Bubble)
301
+ last_node_or_bubble.oriented_nodes do |onode|
302
+ if node_seen.call(onode)
303
+ delete = true
304
+ break
305
+ end
306
+ end
307
+ else
308
+ delete = node_seen.call(last_node_or_bubble)
309
+ end
310
+
311
+ if delete
312
+ path.delete_at last_node_or_bubble_index
313
+ else
314
+ # Last node is not previously seen, chop no further.
315
+ break
316
+ end
317
+ end
318
+
319
+ return path
320
+ end
321
+
322
+
323
+ class MetaPath
324
+ DIVERGES_FATE = 'diverges'
325
+ DEAD_END_FATE = 'dead end'
326
+ CIRCUIT_FATE = 'circuit'
327
+ NODE_COUNT_LIMIT_REACHED = 'too many nodes in bubble'
328
+ #CIRCUIT_WITHIN_BUBBLE_FATE = 'circuit within bubble' #Tim - shouldn't end metapath
329
+
330
+ # How does this metapath end?
331
+ attr_accessor :fate
332
+
333
+ include Enumerable
334
+
335
+ def initialize
336
+ @internal_array = []
337
+ end
338
+
339
+ def each
340
+ @internal_array.each do |e|
341
+ yield e
342
+ end
343
+ end
344
+
345
+ def [](index)
346
+ @internal_array[index]
347
+ end
348
+
349
+ def delete_at(index)
350
+ @internal_array.delete_at index
351
+ end
352
+
353
+ def empty?
354
+ @internal_array.empty?
355
+ end
356
+
357
+ def last_oriented_node
358
+ e = @internal_array[-1]
359
+ if e.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
360
+ return e
361
+ else
362
+ # it is a bubble
363
+ return e.converging_oriented_node
364
+ end
365
+ end
366
+
367
+ def <<(oriented_node_or_bubble)
368
+ @internal_array << oriented_node_or_bubble
369
+ end
370
+ alias_method :push, :<<
371
+
372
+ def to_shorthand
373
+ @internal_array.collect{|e| e.to_shorthand}.join(',')
374
+ end
375
+
376
+ def reverse!
377
+ # Do regular reversal
378
+ @internal_array.reverse!
379
+
380
+ # Reverse all the internal parts
381
+ @internal_array.each do |e|
382
+ e.reverse!
383
+ end
384
+
385
+ return nil
386
+ end
387
+
388
+ def length
389
+ @internal_array.length
390
+ end
391
+
392
+ # Yield all oriented nodes anywhere in the regular or bubble
393
+ # bits.
394
+ def each_oriented_node
395
+ @internal_array.each do |e|
396
+ if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
397
+ e.oriented_nodes.each do |onode|
398
+ yield onode
399
+ end
400
+ else
401
+ yield e
402
+ end
403
+ end
404
+ end
405
+
406
+ def length_in_bp
407
+ sum = 0
408
+ each do |e|
409
+ if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
410
+ sum += e.reference_trail.length_in_bp_within_path
411
+ else
412
+ sum += e.node.length_alone
413
+ end
414
+ end
415
+ return sum
416
+ end
417
+
418
+ def reference_trail
419
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
420
+
421
+ trail.trail = collect do |e|
422
+ if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
423
+ e.reference_trail.trail
424
+ else
425
+ e
426
+ end
427
+ end.flatten
428
+
429
+ return trail
430
+ end
431
+
432
+ def sequence
433
+ reference_trail.sequence
434
+ end
435
+
436
+ def coverage
437
+ coverages = []
438
+ lengths = []
439
+ each do |onode_or_bubble|
440
+ if onode_or_bubble.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
441
+ # Length isn't obvious, but let's go with reference path length just coz that's easy
442
+ this_length = onode_or_bubble.reference_trail.length_in_bp_within_path
443
+ lengths.push this_length
444
+
445
+ # Coverage of a bubble is the coverage of each node in the bubble
446
+ # each weighted by their length
447
+ coverages.push onode_or_bubble.coverage
448
+ else
449
+ #regular node. So simple average coverage
450
+ coverages.push onode_or_bubble.node.coverage
451
+ lengths.push onode_or_bubble.node.length_alone
452
+ end
453
+ end
454
+
455
+ # Then a simple weighted average
456
+ i = -1
457
+ total_length = lengths.reduce(:+)
458
+
459
+ answer = coverages.reduce(0.0) do |sum, cov|
460
+ i += 1
461
+ sum + (cov * lengths[i].to_f / total_length)
462
+ end
463
+ answer
464
+ end
465
+ end
466
+
467
+
468
+ # Tim - use 'waiting train' algorithm (made up by me).
469
+ # Problems collect the nodes they visit, adding them to hashes of 'ubiquitous' and 'visited' nodes
470
+ # (metaphor: 'train' (problem) visiting 'stations' (nodes)).
471
+ # Each time a problem is dequeued, new problems are enqueued for all neighbours to the problem node
472
+ # (metaphor: 'trains' (problems) magically duplicate for each path to a new 'station' (node) (methaphor
473
+ # breaks a bit here)).
474
+ # At each step the algorithm dequeues a problem, prioritising problems by shortest distance of any path
475
+ # to the problem node, meaning if a a problem is enqueued for a node that is already known, then that
476
+ # problem is prioritised (metaphor: when a train leaves a station (problem is deqeued) other 'trains' will
477
+ # wait in case it catches up, or otherwise reaches a more distant station).
478
+ # If a new problem is enqueued for a problem node that is currently in enqueued, the new problem is added
479
+ # to known problems removed from queue, and when a problem is dequeued, its ubiquitous and visited nodes
480
+ # are set to the ubiquitous and visited nodes of all known problems for the node (metaphor: the carriages
481
+ # of all trains at a station are merged into one train).
482
+ # Cycles occur when a problem reaches a node that is in its visited nodes hash (metaphor: a station that
483
+ # one of the train carriages has previously visited).
484
+ # Queued cyclic problems are added to known problems and then dropped.
485
+ # Bubble is converged when all current problems have a ubiquitous node in common (metaphor: all carriages
486
+ # of all current trains have visited a station).
487
+ class Bubble
488
+ include Bio::FinishM::Logging
489
+
490
+ # The DynamicProgrammingProblem this bubble converges on
491
+ attr_reader :converging_oriented_node_settable, :is_reverse, :root
492
+
493
+ # how many legit forks have been explored
494
+ attr_accessor :num_legit_forks
495
+
496
+ def initialize(bubble_root, options = {})
497
+ @queue = DS::AnyPriorityQueue.new {|a,b| a<=b}
498
+ @known_problems = {}
499
+ @current_problems = Set.new
500
+ @num_legit_forks = 0
501
+ @max_cycles = options[:max_cycles] || DEFAULT_MAX_CYCLES
502
+ @root = bubble_root
503
+ end
504
+
505
+ # Return the next closest dynamic programming problem,
506
+ # removing it from the bubble
507
+ def shift
508
+ prob = @queue.shift
509
+ unless prob.nil?
510
+ prob.ubiquitous_oriented_nodes = ubiquitous_oriented_nodes(prob)
511
+ prob.visited_oriented_nodes = visited_oriented_nodes(prob)
512
+ @current_problems.delete prob.to_settable
513
+ end
514
+ return prob
515
+ end
516
+
517
+ def visited_oriented_nodes(prob)
518
+ #all visited nodes for relevant problems
519
+ @known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
520
+ memo + problem.ubiquitous_oriented_nodes
521
+ end
522
+ end
523
+
524
+ def ubiquitous_oriented_nodes(prob)
525
+ #only ubiquitous nodes from relevant problems
526
+ @known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
527
+ memo & problem.ubiquitous_oriented_nodes
528
+ end
529
+ end
530
+
531
+ def shortest_problem_distance(prob)
532
+ # prioritise by the shortest distance for current problem
533
+ @known_problems[prob.to_settable].collect{|prob| prob.distance}.min
534
+ end
535
+
536
+ def enqueue(dynamic_programming_problem)
537
+ settable = dynamic_programming_problem.to_settable
538
+
539
+
540
+ @known_problems[settable] ||= []
541
+ @known_problems[settable].push dynamic_programming_problem
542
+
543
+ # don't requeue current problem or circular problem
544
+ unless dynamic_programming_problem.circular_path_detected == true or @current_problems.include? settable
545
+ @queue.enqueue dynamic_programming_problem, shortest_problem_distance(dynamic_programming_problem)
546
+ @current_problems << settable
547
+ end
548
+ end
549
+
550
+
551
+ # return true if the given problem converges the bubble, else false
552
+ def convergent_on?(dynamic_programming_problem)
553
+ settable = dynamic_programming_problem.to_settable
554
+
555
+ @queue.each do |problem| #convergent until not
556
+ return false unless ubiquitous_oriented_nodes(problem).include? settable
557
+ end
558
+ return true
559
+ end
560
+
561
+ # Finish off the bubble, assuming convergent_on? the given problem == true
562
+ def converge_on(dynamic_programming_problem)
563
+ @converging_oriented_node_settable = dynamic_programming_problem.to_settable
564
+ #free some memory
565
+ @queue = nil
566
+ @current_problems = nil
567
+ end
568
+
569
+ # yield or failing that return an Array of the list of oriented_nodes found
570
+ # in at least one path in this (presumed converged) bubble
571
+ def oriented_nodes
572
+ raise unless converged?
573
+ seen_nodes = {}
574
+ stack = DS::Stack.new
575
+ initial_solution = @known_problems[@converging_oriented_node_settable][0]
576
+ converging_onode = initial_solution.path[-1]
577
+ stack.push converging_onode
578
+
579
+ while onode = stack.pop
580
+ settable = onode.to_settable
581
+ next if seen_nodes.key?(settable)
582
+
583
+ if block_given?
584
+ if @is_reverse
585
+ yield onode.reverse
586
+ else
587
+ yield onode
588
+ end
589
+ end
590
+
591
+ seen_nodes[settable] = onode
592
+
593
+ # queue neighbours for paths that don't contain the converging onode
594
+ @known_problems[settable].each do |dpp|
595
+ stack.push dpp.path[-2] unless dpp.path.length < 2 or dpp.path[0...-1].include? converging_onode
596
+ end
597
+ end
598
+
599
+ return nil if block_given?
600
+ return seen_nodes.values
601
+ end
602
+
603
+ def num_known_problems
604
+ @known_problems.length
605
+ end
606
+
607
+
608
+ # Iterate over the paths returning each as an OrientedNodeTrail.
609
+ # Assumes the path is convergent.
610
+ def each_path(options = {})
611
+ raise unless converged?
612
+ max_cycles = options[:max_cycles] || @max_cycles
613
+
614
+ # Metric used to prioritise each_path
615
+ comparator = lambda do |problem1, problem2|
616
+ onode1 = nil
617
+ onode2 = nil
618
+ if problem1.path.length == 1 and problem2.path.length > 1
619
+ # Here the comparison cannot be made on 2nd last node coverages
620
+ # since one of the paths goes straight from the initial to the terminal
621
+ # node. Choose instead based on if the second last node has higher or lower
622
+ # coverage than the final node
623
+ onode1 = problem1.path[-1]
624
+ onode2 = problem2.path[-2]
625
+ elsif problem2.path.length == 1 and problem1.path.length > 1
626
+ onode1 = problem1.path[-2]
627
+ onode2 = problem2.path[-1]
628
+ else
629
+ onode1 = problem1.path[-2]
630
+ onode2 = problem2.path[-2]
631
+ end
632
+ #log.debug "Comparing nodes #{onode1.node.node_id} and #{onode2.node.node_id}" if log.debug?
633
+
634
+ if onode1.node.coverage == onode2.node.coverage
635
+ -(onode1.node.node_id <=> onode2.node.node_id)
636
+ else
637
+ onode1.node.coverage <=> onode2.node.coverage
638
+ end
639
+ end
640
+
641
+ log.debug "Iterating through each path of bubble" if log.debug?
642
+
643
+ # Tim - use stack and push paths with lowest coverage first
644
+ stack = DS::Stack.new
645
+ counter = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder::CycleCounter.new max_cycles
646
+ initial_solution = @known_problems[@converging_oriented_node_settable][0]
647
+ stack.push [initial_solution.path, []]
648
+ converging_onode = converging_oriented_node
649
+ #log.debug "Pushed to stack #{initial_solution.path.to_shorthand}" if log.debug?
650
+
651
+
652
+ while path_parts = stack.pop
653
+ direct_node_trail = path_parts[0]
654
+ second_part = path_parts[1]
655
+ #log.debug "Popped #{direct_node_trail.to_shorthand} and [#{second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
656
+
657
+
658
+ if direct_node_trail.trail.length == 0
659
+
660
+ # check for cycles through bubble root
661
+ if second_part.include? @root
662
+ #log.debug "Found cycle through bubble root." if log.debug?
663
+ @circuitous = true unless @circuitous
664
+ if max_cycles == 0 or max_cycles < counter.path_cycle_count([@root]+second_part)
665
+ #log.debug "Not finishing cyclic path with too many cycles." if log.debug?
666
+ next
667
+ end
668
+ end
669
+
670
+ yield_path = Bio::Velvet::Graph::OrientedNodeTrail.new
671
+ yield_path.trail = second_part
672
+ if @is_reverse
673
+ yield_path = yield_path.reverse
674
+ end
675
+ log.debug "Yielded #{yield_path.to_shorthand}" if log.debug?
676
+ yield yield_path
677
+ else
678
+ # go down the path, looking for other paths
679
+ head_onode = direct_node_trail.trail[-1]
680
+ new_second_part = [head_onode]+second_part
681
+ if second_part.length > 1 and head_onode == converging_oriented_node
682
+ #log.debug "Ignoring path with cycle through converged node." if log.debug?
683
+ next
684
+ end
685
+ if second_part.include? head_onode
686
+ #log.debug "Cycle at node #{head_onode.node_id} in path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
687
+ @circuitous = true unless @circuitous
688
+ if max_cycles == 0 or max_cycles < counter.path_cycle_count(new_second_part)
689
+ #log.debug "Not finishing cyclic path with too many cycles." if log.debug?
690
+ next
691
+ end
692
+ end
693
+
694
+ new_problems = @known_problems[head_onode.to_settable]
695
+ #log.debug "Found new problems: #{new_problems.collect{|prob| prob.to_shorthand}.join(' ') }" if log.debug?
696
+
697
+ problem_leads = Set.new
698
+ filtered_problems = new_problems.reject do |new_problem|
699
+ # Only enqueue paths where the second-to-head onode is not already queued
700
+ unless new_problem.path.length < 2
701
+ lead_settable = new_problem.path[-2].to_settable
702
+ if problem_leads.include? lead_settable
703
+ #log.debug "Ignoring duplicate neighbour problem #{new_problem.to_shorthand}" if log.debug?
704
+ next true
705
+ end
706
+ problem_leads << lead_settable
707
+ end
708
+ false
709
+ end
710
+
711
+ filtered_problems.sort(&comparator).each do |new_problem|
712
+ # TODO: deal with circuits
713
+ new_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
714
+ new_trail.trail = new_problem.path[0...-1]
715
+ #log.debug "Enqueuing #{new_trail.to_shorthand} and [#{new_second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
716
+ stack.push [new_trail, new_second_part]
717
+ end
718
+ end
719
+ end
720
+ end
721
+
722
+ def paths
723
+ to_return = []
724
+ each_path do |path|
725
+ to_return.push path
726
+ end
727
+ to_return
728
+ end
729
+
730
+ def converged?
731
+ !@converging_oriented_node_settable.nil?
732
+ end
733
+
734
+ # Return the OrientedNode that converges this bubble, behaviour
735
+ # undefined if bubble is not converged
736
+ def converging_oriented_node
737
+ @known_problems[@converging_oriented_node_settable][0].path[-1]
738
+ end
739
+
740
+ def to_shorthand
741
+ shorts = []
742
+ if converged?
743
+ shorts = paths.sort{|a,b| a.to_shorthand <=> b.to_shorthand }.collect{|path| path.to_shorthand}
744
+ else
745
+ @queue.each do |problem|
746
+ shorts.push problem.to_shorthand
747
+ end
748
+ end
749
+ return "{#{shorts.join('|') }}"
750
+ end
751
+
752
+ def reverse!
753
+ @is_reverse ||= false
754
+ @is_reverse = !@is_reverse
755
+ end
756
+
757
+ # This doesn't make sense unless this is a converged bubble and the index == -1
758
+ # because otherwise there is multiple answers
759
+ def [](index)
760
+ raise unless index == -1
761
+ return Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(
762
+ @converging_oriented_node_settable[0],
763
+ @converging_oriented_node_settable[1]
764
+ )
765
+ end
766
+
767
+ # Return one trail that exemplifies the paths through this bubble.
768
+ # Current method of path selection is simply greedy, taking the highest coverage node
769
+ # at each fork (or failing that the node with the lower node_id).
770
+ def reference_trail(max_cycles = @max_cycles)
771
+ raise unless converged?
772
+
773
+ converging_onode = converging_oriented_node
774
+ log.debug "Finding reference trail from node #{converging_onode.node.node_id}" if log.debug?
775
+
776
+ reference_trail = nil
777
+ each_path do |path|
778
+ #break when first path is found
779
+ reference_trail = path
780
+ break
781
+ end
782
+
783
+ return reference_trail
784
+ end
785
+
786
+ # Does this (coverged) bubble contain any circuits?
787
+ def circuitous?
788
+ raise unless converged?
789
+ if @circuitous.nil?
790
+ each_path({:max_cycles => 0}) {|| break if @circuitous}
791
+ @circuitous ||= false
792
+ end
793
+ @circuitous
794
+ end
795
+
796
+ # Coverage of a bubble is the coverage of each node in the bubble
797
+ # each weighted by their length
798
+ def coverage
799
+ sum = 0.0
800
+ length = 0
801
+ oriented_nodes do |onode|
802
+ node_length = onode.node.length_alone
803
+ sum += onode.node.coverage * node_length
804
+ length += node_length
805
+ end
806
+ return sum / length
807
+ end
808
+ end
809
+
810
+ class DynamicProgrammingProblem
811
+ attr_accessor :path, :ubiquitous_oriented_nodes, :visited_oriented_nodes, :distance, :circular_path_detected
812
+
813
+ def initialize
814
+ @path = []
815
+ @ubiquitous_oriented_nodes = Set.new
816
+ end
817
+
818
+ def to_settable
819
+ @path[-1].to_settable
820
+ end
821
+
822
+ def to_s
823
+ ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
824
+ "#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
825
+ end
826
+ return "DPP #{self.object_id}: #{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
827
+ end
828
+
829
+ def to_shorthand
830
+ ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
831
+ "#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
832
+ end
833
+ "#{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
834
+ end
835
+ end
836
+
837
+ class ComparableArray < Array
838
+ include Comparable
839
+ end
840
+
841
+ class CircuitousPathDetected < Exception; end
842
+ end