finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,126 @@
1
+ require 'bio'
2
+
3
+ class Bio::FinishM::ScaffoldBreaker
4
+ include Bio::FinishM::Logging
5
+
6
+ class UnscaffoldedContig
7
+ attr_accessor :scaffold_position_start, :scaffold_position_end
8
+
9
+ # The Scaffold to which this contig once belonged
10
+ attr_accessor :scaffold
11
+
12
+ # The actual nucleotide sequence of this contig, from scaffold start position to
13
+ # end (not revcom)
14
+ attr_accessor :sequence
15
+
16
+ def length
17
+ @scaffold_position_end - @scaffold_position_start +1
18
+ end
19
+
20
+ def name
21
+ contig_number = scaffold.contigs.find_index(self)+1
22
+ if contig_number.nil?
23
+ raise "A contig finds itself unexpectedly not in the scaffold it is supposed to belong to"
24
+ end
25
+ return "#{scaffold.name}_#{contig_number}of#{scaffold.contigs.length}_#{scaffold_position_start}to#{scaffold_position_end}"
26
+ end
27
+ end
28
+
29
+ class Scaffold
30
+ # unscaffolded contigs from this scaffold, as an array in sorted order.
31
+ attr_accessor :contigs
32
+
33
+ # Name of sequence found in the fasta file
34
+ attr_accessor :name
35
+
36
+ # Return an array of Gap objects
37
+ def gaps
38
+ gaps = []
39
+ last_contig = nil
40
+ @contigs.each_with_index do |contig, i|
41
+ if i!=0
42
+ gap = Bio::FinishM::ScaffoldBreaker::Gap.new
43
+ gap.scaffold = self
44
+ gap.start = last_contig.scaffold_position_end + 1
45
+ gap.stop = contig.scaffold_position_start - 1
46
+ gap.number = i-1
47
+ gaps.push gap
48
+ end
49
+ last_contig = contig
50
+ end
51
+ return gaps
52
+ end
53
+
54
+ def sequence
55
+ to_return = []
56
+ last_contig = nil
57
+ @contigs.each_with_index do |contig, i|
58
+ if i==0
59
+ to_return.push contig.sequence
60
+ else
61
+ gap_start = last_contig.scaffold_position_end + 1
62
+ gap_stop = contig.scaffold_position_start - 1
63
+ to_return.push 'N'*(gap_stop-gap_start+1)
64
+ to_return.push contig.sequence
65
+ end
66
+ last_contig = contig
67
+ end
68
+ return to_return.join
69
+ end
70
+
71
+ # Which contig number is this, in the scaffold?
72
+ def contig_number(contig)
73
+ @contigs.each_with_index do |current_contig, i|
74
+ return i if contig==current_contig
75
+ end
76
+ raise "Contig not found in scaffold"
77
+ end
78
+ end
79
+
80
+ class Gap
81
+ attr_accessor :scaffold, :start, :stop, :number
82
+
83
+ def coords
84
+ @scaffold.name+':'+(@start+1).to_s+'-'+(@stop).to_s
85
+ end
86
+
87
+ #i.e. the number of N characters that would represent this gap
88
+ def length
89
+ @stop-@start+1
90
+ end
91
+ end
92
+
93
+ # Given a path to a scaffold fasta file, read in the scaffolds, and break them apart
94
+ # into constituent contigs. Then return an array of Scaffold objects containing the
95
+ # contig information therein.
96
+ def break_scaffolds(contigs_filename)
97
+ scaffolds = []
98
+ Bio::FlatFile.foreach(Bio::FastaFormat, contigs_filename) do |seq|
99
+ scaffold = Scaffold.new
100
+ scaffold.name = seq.definition
101
+
102
+ unless seq.seq.match(/^[ATGCN]+$/i)
103
+ example = seq.seq.match(/([^ATGCN])/i)[1]
104
+ log.warn "Found unexpected characters in the sequence #{seq.definition} e.g. #{example}, continuing optimistically, but not quite sure what will happen.. good luck"
105
+ end
106
+
107
+ if seq.seq.match(/^N+$/i)
108
+ raise "Found a scaffold that contains all N characters, ignoring this (perhaps your input is mangled?): #{scaffold.name}"
109
+ end
110
+
111
+ # Find all Ns in the current sequence
112
+ seq.seq.scan(/([^N]+)/i) do
113
+ contig = UnscaffoldedContig.new
114
+ contig.scaffold = scaffold
115
+ contig.scaffold_position_start = $~.offset(0)[0]+1#Convert to 1-based indices in line with bioruby
116
+ contig.scaffold_position_end = $~.offset(0)[1]
117
+ contig.sequence = $~.to_s
118
+ scaffold.contigs ||= []
119
+ scaffold.contigs.push contig
120
+ end
121
+ scaffolds.push scaffold
122
+ end
123
+ log.info "Detected #{scaffolds.length} scaffolds, containing #{scaffolds.collect{|s| s.contigs.length}.reduce(:+)} different contigs"
124
+ return scaffolds
125
+ end
126
+ end
@@ -0,0 +1,71 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::SequenceHasher
5
+ include Bio::FinishM::Logging
6
+
7
+ #
8
+ def extend_overlap(graph, oriented_onode, overlap, options={})
9
+ trails = []
10
+
11
+ current_path = DistancedOrientedNodeTrail.new
12
+ current_path.add_oriented_node oriented_onode
13
+ current_path.distance = 0
14
+
15
+ stack = DS::Stack.new
16
+ stack.push current_path
17
+
18
+ # While there is more on the stack
19
+ while current_path = stack.pop
20
+
21
+ current_distance = current_path.distance
22
+
23
+ if current_distance >= overlap
24
+ # Found all the sequence we need
25
+ trails.push current_path
26
+ next
27
+ end
28
+
29
+ # Find neighbouring nodes
30
+ neighbours = nil
31
+ if options[:neighbour_finder]
32
+ neighbours = options[:neighbour_finder].neighbours(oriented_onode)
33
+ else
34
+ neighbours = oriented_node.next_neighbours(graph)
35
+ end
36
+
37
+ neighbours.each do |onode|
38
+ new_distance = current_distance
39
+ if options[:neighbour_finder]
40
+ if onode.distance
41
+ new_distance += onode.distance
42
+ else
43
+ new_distance += 0
44
+ end
45
+ end
46
+ new_distance += onode.node.length_alone
47
+
48
+ new_path = current_path.copy
49
+ new_path.add_oriented_node onode
50
+ new_path.distance = new_distance
51
+ stack.push new_path
52
+ end
53
+ end
54
+ end
55
+
56
+ class DistancedOrientedNodeTrail < Bio::Velvet::Graph::OrientedNodeTrail
57
+ attr_accessor :distance
58
+
59
+ def copy
60
+ o = DistancedOrientedNodeTrail.new
61
+ o.trail = Array.new(@trail.collect{|onode| onode.copy})
62
+ o.distance = @distance
63
+ return o
64
+ end
65
+
66
+ def to_s
67
+ "DistancedOrientedTrail: #{object_id}: #{to_shorthand} distance=#{@distance}"
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,533 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder
5
+ include Bio::FinishM::Logging
6
+
7
+ SINGLE_BASE_REVCOM = {
8
+ 'A'=>'T',
9
+ 'T'=>'A',
10
+ 'G'=>'C',
11
+ 'C'=>'G',
12
+ }
13
+
14
+ # Find all paths between the initial and terminal node in the graph.
15
+ # Don't search in the graph when the distance in base pairs exceeds the leash length.
16
+ # Recohere reads (singled ended only) in an attempt to remove bubbles.
17
+ #
18
+ # Options:
19
+ # * max_gapfill_paths: the maxmimum number of paths to return. If this maximum is exceeded, an empty solution set is returned
20
+ def find_all_connections_between_two_nodes(graph, initial_path, terminal_oriented_node,
21
+ leash_length, recoherence_kmer, sequence_hash, options={})
22
+
23
+ problems = find_all_problems(graph, initial_path, terminal_oriented_node, leash_length, recoherence_kmer, sequence_hash, options)
24
+
25
+ paths = find_paths_from_problems(problems, recoherence_kmer, options)
26
+ return paths
27
+ end
28
+
29
+ # Options:
30
+ #
31
+ # :max_explore_nodes: only explore this many nodes, not further.
32
+ def find_all_problems(graph, initial_path, terminal_node, leash_length, recoherence_kmer, sequence_hash, options={})
33
+ # setup dynamic programming cache
34
+ problems = ProblemSet.new
35
+
36
+ # setup stack to keep track of initial nodes
37
+ finder = ProblemTrailFinder.new(graph, initial_path)
38
+
39
+ #current_oriented_node_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
40
+ #last_number_of_problems_observed_checkpoint = 0
41
+
42
+ while current_path = finder.dequeue
43
+ path_length = current_path.length_in_bp
44
+ log.debug "considering #{current_path}, path length: #{path_length}" if log.debug?
45
+
46
+ # Have we solved this before? If so, add this path to that solved problem.
47
+ set_key = path_to_settable current_path, recoherence_kmer
48
+ log.debug "Set key is #{set_key}" if log.debug?
49
+
50
+ # Unless the path validates, forget it.
51
+ if recoherence_kmer.nil?
52
+ # Continue, assume that it validates if there is no recoherence_kmer
53
+ elsif !validate_last_node_of_path_by_recoherence(current_path, recoherence_kmer, sequence_hash)
54
+ log.debug "Path did not validate, skipping" if log.debug?
55
+ next
56
+ elsif log.debug?
57
+ log.debug "Path validates"
58
+ end
59
+
60
+ if current_path.last == terminal_node
61
+ log.debug "last is terminal" if log.debug?
62
+ problems[set_key] ||= DynamicProgrammingProblem.new
63
+ problems[set_key].known_paths ||= []
64
+ problems[set_key].known_paths.push current_path
65
+
66
+ problems.terminal_node_keys ||= Set.new
67
+ problems.terminal_node_keys << set_key
68
+
69
+ elsif problems[set_key]
70
+ log.debug "Already seen this problem" if log.debug?
71
+ prob = problems[set_key]
72
+ prob.known_paths.push current_path
73
+
74
+ # If a lesser min distance is found, then we need to start exploring from the
75
+ # current place again
76
+ if path_length < prob.min_distance
77
+ log.debug "Found a node with min_distance greater than path length.." if log.debug?
78
+ prob.min_distance = path_length
79
+ finder.push_next_neighbours current_path
80
+ end
81
+ elsif !leash_length.nil? and path_length > leash_length
82
+ # we are past the leash length, give up
83
+ log.debug "Past leash length, giving up" if log.debug?
84
+ else
85
+ log.debug "New dynamic problem being solved" if log.debug?
86
+ # new problem being solved here
87
+ problem = DynamicProgrammingProblem.new
88
+ problem.min_distance = path_length
89
+ problem.known_paths.push current_path.copy
90
+ problems[set_key] = problem
91
+
92
+ num_done = problems.length
93
+ if num_done > 0 and num_done % 512 == 0
94
+ log.info "So far worked with #{num_done} head node sets, up to distance #{path_length}" if log.info?
95
+ end
96
+ if options[:max_explore_nodes] and num_done > options[:max_explore_nodes]
97
+ log.warn "Explored too many nodes (#{num_done}), giving up.."
98
+ problems = ProblemSet.new
99
+ break
100
+ end
101
+
102
+ # explore the forward neighbours
103
+ finder.push_next_neighbours current_path
104
+ end
105
+ log.debug "Priority queue size: #{finder.length}" if log.debug?
106
+ end
107
+
108
+ return problems
109
+ end
110
+
111
+ def path_to_settable(path, recoherence_kmer)
112
+ log.debug "Making settable a path: #{path}" if log.debug?
113
+ return array_trail_to_settable(path.trail, recoherence_kmer)
114
+ end
115
+
116
+ def array_trail_to_settable(trail, recoherence_kmer)
117
+ return trail.last.to_settable if recoherence_kmer.nil?
118
+
119
+ cumulative_length = 0
120
+ i = trail.length - 1
121
+ while i >= 0 and cumulative_length < recoherence_kmer
122
+ cumulative_length += trail[i].node.length_alone
123
+ i -= 1
124
+ end
125
+ i += 1
126
+ # 'Return' an array made up of the settables
127
+ to_return = trail[i..-1].collect{|t| t.to_settable}.flatten
128
+ log.debug "'Returning' settable version of path: #{to_return}" if log.debug?
129
+ to_return
130
+ end
131
+
132
+ # Given an OrientedNodeTrail, and an expected number of
133
+ def validate_last_node_of_path_by_recoherence(path, recoherence_kmer, sequence_hash, min_concurring_reads=1)
134
+ #not possible to fail on a 1 or 2 node path, by debruijn graph definition.
135
+ #TODO: that ain't true! If one of the two nodes is sufficiently long, reads may not agree.
136
+ return true if path.length < 3
137
+
138
+ # Walk backwards along the path from the 2nd last node,
139
+ # collecting nodes until the length in bp of the nodes is > recoherence_kmer
140
+ collected_nodes = []
141
+ length_of_nodes = lambda do |nodes|
142
+ if nodes.empty?
143
+ 0
144
+ else
145
+ hash_offset = nodes[0].node.parent_graph.hash_length-1
146
+ nodes.reduce(hash_offset) do |sum, node|
147
+ sum += node.node.length_alone
148
+ end
149
+ end
150
+ end
151
+ i = path.length-2
152
+ while i >= 0
153
+ collected_nodes.push path.trail[i]
154
+ i -= 1
155
+ # break if the recoherence_kmer doesn't cover
156
+ break if length_of_nodes.call(collected_nodes) + 1 >= recoherence_kmer
157
+ end
158
+ log.debug "validate: Collected nodes: #{collected_nodes}" if log.debug?
159
+ if collected_nodes.length < 2
160
+ log.debug "Only #{collected_nodes.length+1} nodes being tested for validation, so returning validated" if log.debug?
161
+ return true
162
+ end
163
+
164
+ # There should be at least 1 read that spans the collected nodes and the last node
165
+ # The trail validates if the above statement is true.
166
+ #TODO: there's a possible 'bug' here in that there's guarantee that the read is overlays the
167
+ # nodes in a consecutive and gapless manner. But I suspect that is unlikely to be a problem in practice.
168
+ final_node = path.trail[-1].node
169
+ possible_reads = final_node.short_reads.collect{|nr| nr.read_id}
170
+ log.debug "validate starting from #{final_node.node_id}: Initial short reads: #{possible_reads.join(',') }" if log.debug?
171
+ collected_nodes.each do |node|
172
+ log.debug "Validating node #{node}" if log.debug?
173
+ current_set = Set.new node.node.short_reads.collect{|nr| nr.read_id}
174
+ possible_reads.select! do |r|
175
+ current_set.include? r
176
+ end
177
+ if possible_reads.length < min_concurring_reads
178
+ log.debug "First line validation failed, now detecting sub-kmer sequence overlap" if log.debug?
179
+ trail_to_validate = path.trail[i+1..-1]
180
+ return sub_kmer_sequence_overlap?(trail_to_validate, sequence_hash, min_concurring_reads)
181
+ end
182
+ end
183
+ log.debug "Found #{possible_reads.length} reads that concurred with validation e.g. #{possible_reads[0]}" if log.debug?
184
+ return true
185
+ end
186
+
187
+ # Is there overlap across the given nodes, even if the overlap
188
+ # does not include an entire kmer?
189
+ # nodes: an OrientedNodeTrail. To validate, there must be at least 1 read that spans all of these nodes
190
+ # sequence_hash: Bio::Velvet::Sequence object with the sequences from the reads in the nodes
191
+ def sub_kmer_sequence_overlap?(nodes, sequence_hash, min_concurring_reads=1)
192
+ raise if nodes.length < 3 #should not get here - this is taken care of above
193
+ log.debug "validating by sub-kmer sequence overlap with min #{min_concurring_reads}: #{nodes}" if log.debug?
194
+
195
+ # Only reads that are in the second last node are possible, by de-bruijn graph definition.
196
+ candidate_noded_reads = nodes[-2].node.short_reads
197
+ middle_nodes_length = nodes[1..-2].reduce(0){|sum, n| sum += n.node.length}+
198
+ +nodes[0].node.parent_graph.hash_length-1
199
+ log.debug "Found middle nodes length #{middle_nodes_length}" if log.debug?
200
+
201
+ num_confirming_reads = 0
202
+
203
+ candidate_noded_reads.each do |read|
204
+ # Ignore reads that don't come in at the start of the node
205
+ log.debug "Considering read #{read.inspect}" if log.debug?
206
+ if read.offset_from_start_of_node != 0
207
+ log.debug "Read doesn't start at beginning of node, skipping" if log.debug?
208
+ next
209
+ else
210
+ seq = sequence_hash[read.read_id]
211
+ raise "No sequence stored for #{read.read_id}, programming fail." if seq.nil?
212
+
213
+ if read.start_coord == 0
214
+ log.debug "start_coord Insufficient length of read" if log.debug?
215
+ next
216
+ elsif seq.length-read.start_coord-middle_nodes_length < 1
217
+ log.debug "other_side Insufficient length of read" if log.debug?
218
+ next
219
+ end
220
+
221
+ # Now ensure that the sequence matches correctly
222
+ # left base, the base from the first node
223
+ first_node = nodes[0].node
224
+ left_base = !(read.direction ^ nodes[-2].starts_at_start?) ?
225
+ SINGLE_BASE_REVCOM[seq[read.start_coord-1]] :
226
+ seq[read.start_coord+middle_nodes_length]
227
+ left_comparison_base = nodes[0].starts_at_start? ?
228
+ first_node.ends_of_kmers_of_twin_node[0] :
229
+ first_node.ends_of_kmers_of_node[0]
230
+ if left_base != left_comparison_base
231
+ log.debug "left comparison base mismatch, this is not a validating read" if log.debug?
232
+ next
233
+ end
234
+
235
+ # right base, overlapping the last node
236
+ last_node = nodes[-1].node
237
+ right_base = !(read.direction ^ nodes[-2].starts_at_start?) ?
238
+ seq[read.start_coord+middle_nodes_length] :
239
+ SINGLE_BASE_REVCOM[seq[read.start_coord-1]]
240
+ right_comparison_base = nodes[-1].starts_at_start? ?
241
+ last_node.ends_of_kmers_of_node[0] :
242
+ last_node.ends_of_kmers_of_twin_node[0]
243
+ if right_base != right_comparison_base
244
+ log.debug "right comparison base mismatch, this is not a validating read" if log.debug?
245
+ next
246
+ end
247
+
248
+ log.debug "Read validates path"
249
+ num_confirming_reads += 1
250
+ if num_confirming_reads >= min_concurring_reads
251
+ return true #gauntlet passed, this is enough confirmatory reads, and so the path is validated.
252
+ end
253
+ end
254
+ end
255
+ return false #no candidate reads pass
256
+ end
257
+
258
+
259
+ # Separate stacks for valid paths and paths which exceed the maximum allowed
260
+ # cycle count.
261
+ # Each backtrack spawns a set of new paths, which are cycle counted. If any cycle
262
+ # is repeated more than max_cycles, the new path is pushed to the max_cycle_stack,
263
+ # otherwise the path is pushed to the main stack. Main stack paths are prioritised.
264
+ # The max_cycle_stack paths must be tracked until cycle repeats in second_part exceed
265
+ # max_cycles, as they can spawn valid paths with backtracking.
266
+ def find_paths_from_problems(problems, recoherence_kmer, options={})
267
+ max_num_paths = options[:max_gapfill_paths]
268
+ max_num_paths ||= 2196
269
+ max_cycles = options[:max_cycles] || 1
270
+
271
+ counter = CycleCounter.new(max_cycles)
272
+ decide_stack = lambda do |to_push|
273
+ if max_cycles < counter.path_cycle_count(to_push.flatten)
274
+ log.debug "Pushing #{to_push.collect{|part| part.collect{|onode| onode.node.node_id}.join(',')}.join(' and ') } to secondary stack" if log.debug?
275
+ return true
276
+ else
277
+ log.debug "Pushing #{to_push.collect{|part| part.collect{|onode| onode.node.node_id}.join(',')}.join(' and ') } to main stack" if log.debug?
278
+ return false
279
+ end
280
+ end
281
+
282
+ stack = DualStack.new &decide_stack
283
+ to_return = Bio::AssemblyGraphAlgorithms::TrailSet.new
284
+
285
+ # if there is no solutions to the overall problem then there is no solution at all
286
+ if problems.terminal_node_keys.nil? or problems.terminal_node_keys.empty?
287
+ to_return.trails = []
288
+ return to_return
289
+ end
290
+
291
+ # push all solutions to the "ending in the final node" solutions to the stack
292
+ problems.terminal_node_keys.each do |key|
293
+ overall_solution = problems[key]
294
+ first_part = overall_solution.known_paths[0].to_a
295
+ stack.push [first_part, []]
296
+ end
297
+
298
+ all_paths_hash = {}
299
+ while path_parts = stack.pop
300
+ log.debug path_parts.collect{|half| half.collect{|onode| onode.node.node_id}.join(',')}.join(' and ') if log.debug?
301
+ first_part = path_parts[0]
302
+ second_part = path_parts[1]
303
+
304
+ if first_part.length == 0
305
+ # If we've tracked all the way to the beginning,
306
+ # then there's no need to track further
307
+
308
+ # add this solution if required
309
+ # I've had some trouble getting the Ruby Set to work here, but this is effectively the same thing.
310
+ log.debug "Found solution: #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
311
+ key = second_part.hash
312
+ all_paths_hash[key] ||= second_part
313
+ else
314
+ last = first_part.last
315
+
316
+ if second_part.include? last
317
+ log.debug "Cycle at node #{last.node_id} detected in previous path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
318
+ to_return.circular_paths_detected = true
319
+ if max_cycles == 0 or max_cycles < counter.path_cycle_count([last, second_part].flatten)
320
+ log.debug "Not finishing cyclic path with too many repeated cycles." if log.debug?
321
+ next
322
+ end
323
+ end
324
+
325
+ paths_to_last = problems[array_trail_to_settable(first_part, recoherence_kmer)].known_paths
326
+ paths_to_last.each do |path|
327
+ stack.push [path[0...(path.length-1)], [last,second_part].flatten]
328
+ end
329
+ end
330
+
331
+ # max_num_paths parachute
332
+ # The parachute can kill the search once the main stack exceeds max_gapfill_paths,
333
+ # since all paths on it are valid.
334
+ if !max_num_paths.nil? and (stack.sizes[0] + all_paths_hash.length) > max_num_paths
335
+ log.info "Exceeded the maximum number of allowable paths in this gapfill" if log.info?
336
+ to_return.max_path_limit_exceeded = true
337
+ all_paths_hash = {}
338
+ break
339
+ end
340
+ end
341
+
342
+ to_return.trails = all_paths_hash.values
343
+ return to_return
344
+ end
345
+
346
+ class DualStack
347
+ def initialize(&block)
348
+ @checker = block
349
+ @stack = DS::Stack.new
350
+ @dual_stack = DS::Stack.new
351
+ end
352
+
353
+ def push to_push
354
+ if @checker.call to_push
355
+ @dual_stack.push to_push
356
+ else
357
+ @stack.push to_push
358
+ end
359
+ end
360
+
361
+ def pop
362
+ @stack.pop || @dual_stack.pop
363
+ end
364
+
365
+ def sizes
366
+ return @stack.size, @dual_stack.size
367
+ end
368
+ end
369
+
370
+ # Count occurrences of cycles in paths through an assembly graph. Works by building a hash of paths and
371
+ # the frequency of the modal cycle in that path (up to the cut-off max_cycles). For an unknown path, looks
372
+ # for a subset of the path in hash by removing nodes from start (or end if :forward option is set), and
373
+ # then extends the subset by iteratively re-adding a single node and adding to the hash of paths the larger
374
+ # of the subset count or the frequency for the modal cycle beginning with the re-added node.
375
+ class CycleCounter
376
+ include Bio::FinishM::Logging
377
+
378
+ def initialize(max_cycles, options = {})
379
+ @max_cycles = max_cycles
380
+ @path_cache = Hash.new # Cache max_cycles for previously seen paths
381
+ @forward = options[:forward] || false # By default builds hash assuming backtracking from end of path. This flag will reverse path direction and build hash assuming moving forwards.
382
+ end
383
+
384
+
385
+ # Iterate through unique nodes of path and find maximal cycle counts
386
+ def path_cycle_count(path)
387
+ log.debug "Finding cycles in path #{path.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
388
+ first_part = []
389
+ second_part = path
390
+ keys = []
391
+ count = nil
392
+ reached_max_cycles = false
393
+
394
+ second_part = second_part.reverse if @forward
395
+
396
+ # Iterate along path and look for the remaining path in cache. Remember the iterated
397
+ # path and the remaining path. Stop if a cache count is found, else use zero.
398
+ while count.nil? and !second_part.empty?
399
+ key = second_part.collect{|onode| onode.to_settable}.flatten
400
+
401
+ # Check if path value is cached
402
+ if @path_cache.has_key? key
403
+ count = @path_cache[key]
404
+ #log.debug "Found cached count #{count} for path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
405
+ break
406
+ else
407
+ first_part = [first_part, second_part.first].flatten
408
+ second_part = second_part[1..-1]
409
+ end
410
+ end
411
+
412
+ if second_part.empty?
413
+ #log.debug "Reached end of path without finding cached count." if log.debug?
414
+ count = 0
415
+ end
416
+
417
+ # The max cycle count for a path is the largest of:
418
+ # I. Cycle count for initial node of path in remaining path (without initial
419
+ # node), or
420
+ # II. Max cycle count of remaining path.
421
+
422
+ # We then iterate back through the iterated path. If count does not exceed
423
+ # max_cycles, we count cycles for each node in the remaining path, then
424
+ # backtrack by moving the node to the remaining path set. We record the count
425
+ # for each remaining path
426
+ while !first_part.empty?
427
+
428
+ node = first_part.last
429
+ if !reached_max_cycles
430
+ #log.debug "Next node is #{node.node.node_id}." if log.debug?
431
+ node_count = path_cycle_count_for_node(node, second_part, @max_cycles)
432
+ count = [count, node_count].max
433
+ reached_max_cycles = count > @max_cycles
434
+ end
435
+
436
+ second_part = [node, second_part].flatten
437
+ first_part = first_part[0...-1]
438
+
439
+ key = second_part.collect{|onode| onode.to_settable}.flatten
440
+ @path_cache[key] = count
441
+ #log.debug "Caching cycle count #{count} for path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
442
+ end
443
+ if reached_max_cycles and log.debug?
444
+ log.debug "Most repeated cycle in path occured #{count} or more times."
445
+ elsif log.debug?
446
+ log.debug "Most repeated cycle in path occured #{count} times."
447
+ end
448
+ return count
449
+ end
450
+
451
+ # For an initial node, find and count unique 'simple' cycles in a path that begin at the initial
452
+ # node, up to a max_cycles. Return count for the maximally repeated cycle if less than max_cycles,
453
+ # or max_cycles.
454
+ def path_cycle_count_for_node(node, path, max_cycles=1)
455
+ #log.debug "Finding all simple cycles for node #{node.node_id} in path #{path.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
456
+ remaining = path
457
+ cycles = Hash.new
458
+
459
+ remaining = remaining.reverse if @forward
460
+
461
+ while remaining.include?(node)
462
+ position = remaining.index(node)
463
+ cycle = remaining[0..position]
464
+ remaining = remaining[(position+1)..-1]
465
+ #log.debug "Found cycle: #{cycle.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
466
+
467
+ set_key = cycle.collect{|onode| onode.to_settable}.flatten
468
+ cycles[set_key] ||= 0
469
+ cycles[set_key] += 1
470
+ #log.debug "Found repeat #{cycles[set_key]}." if log.debug?
471
+
472
+ if cycles[set_key] > max_cycles
473
+ #log.debug "Max cycles #{max_cycles} exceeded." if log.debug?
474
+ return cycles[set_key]
475
+ end
476
+ end
477
+ if cycles.empty?
478
+ max_counts = 0
479
+ else
480
+ max_counts = cycles.values.max
481
+ end
482
+ #log.debug "Most cycles found #{max_counts}." if log.debug?
483
+ return max_counts
484
+ end
485
+ end
486
+
487
+ class DynamicProgrammingProblem
488
+ attr_accessor :min_distance, :known_paths
489
+
490
+ def initialize
491
+ @known_paths = []
492
+ end
493
+ end
494
+
495
+ # Like a Hash, but also contains a list of keys that end in the
496
+ # terminal node
497
+ class ProblemSet < Hash
498
+ # Array of keys to this hash that end in the terminal onode
499
+ attr_accessor :terminal_node_keys
500
+ end
501
+
502
+ class ProblemTrailFinder
503
+ include Bio::FinishM::Logging
504
+
505
+ def initialize(graph, initial_path)
506
+ @graph = graph
507
+ @pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
508
+ @pqueue.enqueue initial_path.copy, 0
509
+ end
510
+
511
+ def dequeue
512
+ @pqueue.dequeue
513
+ end
514
+
515
+ def length
516
+ @pqueue.length
517
+ end
518
+
519
+ def push_next_neighbours(current_path)
520
+ next_nodes = current_path.neighbours_of_last_node(@graph)
521
+ log.debug "Pushing #{next_nodes.length} new neighbours of #{current_path.last}" if log.debug?
522
+ #TODO: not neccessary to copy all paths, can just continue one of them
523
+ next_nodes.each do |n|
524
+ log.debug "Pushing neighbour to stack: #{n}" if log.debug?
525
+ path = current_path.copy
526
+ path.add_oriented_node n
527
+ @pqueue.enqueue path, path.length_in_bp
528
+ end
529
+ end
530
+ end
531
+ end
532
+
533
+