finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,85 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module AssemblyGraphAlgorithms
6
+ class GraphExplorer
7
+ # Return all paths that emenate from a given node, in the graph
8
+ def explore_from_node(graph, initial_path, leash_length)
9
+ # Do a simple depth first search, forking at each node. Vanilla graph traversal.
10
+ depth_first_search_stack = DS::Stack.new
11
+ first_path = ExplorationPath.new initial_path
12
+ depth_first_search_stack.push first_path
13
+ found_paths = []
14
+ # While there's more paths to explore
15
+ while current_path = depth_first_search_stack.pop
16
+ last = current_path.path.last
17
+ if !leash_length.nil? and current_path.path.length_in_bp > leash_length
18
+ current_path.termination_type = 'Leashed'
19
+ found_paths.push current_path
20
+ else
21
+ neighbours = current_path.path.neighbours_of_last_node(graph)
22
+ if neighbours.empty?
23
+ current_path.termination_type = 'Dead end / coverage'
24
+ found_paths.push current_path
25
+ else
26
+ neighbours_to_add = []
27
+ neighbours.each do |oriented_neighbour|
28
+ # Test for loops, I'm only interested in acyclic paths for the moment
29
+ if current_path.include?(oriented_neighbour)
30
+ #loop found, terminate path
31
+ new_path = current_path.copy
32
+ new_path.add_node oriented_neighbour
33
+ new_path.termination_type = 'Loop'
34
+ found_paths.push new_path
35
+ else
36
+ neighbours_to_add.push oriented_neighbour
37
+ end
38
+ end
39
+ neighbours_to_add.each_with_index do |oriented_neighbour, i|
40
+ # If the last neighbour is being added here, reuse the path
41
+ next_path = nil
42
+ if i == neighbours_to_add.length-1
43
+ next_path = current_path
44
+ else
45
+ next_path = current_path.copy
46
+ end
47
+ next_path.add_node oriented_neighbour
48
+ depth_first_search_stack.push next_path
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ return found_paths
55
+ end
56
+
57
+ class ExplorationPath
58
+ attr_accessor :path, :set_of_nodes, :termination_type
59
+
60
+ def initialize(path)
61
+ @path = path
62
+ @set_of_nodes = Set.new path.collect{|n| n.to_settable}
63
+ end
64
+
65
+ def include?(oriented_node)
66
+ @set_of_nodes.include?(oriented_node.to_settable)
67
+ end
68
+
69
+ def add_node(onode)
70
+ path.add_oriented_node onode
71
+ @set_of_nodes << onode.to_settable
72
+ end
73
+
74
+ def copy
75
+ anew = ExplorationPath.new @path.copy
76
+ return anew
77
+ end
78
+
79
+ def to_s
80
+ @path.collect{|on| on.node_id}.join(',')
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,315 @@
1
+ require 'bio-velvet'
2
+ require 'bio'
3
+ require 'pry'
4
+
5
+ class Bio::FinishM::GraphGenerator
6
+ include Bio::FinishM::Logging
7
+
8
+ DEFAULT_OPTIONS = {
9
+ :velvet_kmer_size => 51,
10
+ :assembly_coverage_cutoff => 3.5,
11
+ }
12
+
13
+ def add_options(option_parser, options)
14
+ options.merge!(DEFAULT_OPTIONS)
15
+ option_parser.on("--assembly-kmer NUMBER", "when assembling, use this kmer length [default: #{options[:velvet_kmer_size] }]") do |arg|
16
+ options[:velvet_kmer_size] = arg.to_i
17
+ end
18
+ option_parser.on("--assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, all other nodes are removed [default: #{options[:assembly_coverage_cutoff] }]") do |arg|
19
+ options[:assembly_coverage_cutoff] = arg.to_f
20
+ end
21
+ option_parser.on("--post-assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, implemented after assembly [default: not used]") do |arg|
22
+ options[:post_assembly_coverage_cutoff] = arg.to_f
23
+ end
24
+ option_parser.on("--velvet-directory PATH", "Output assembly intermediate files to this directory [default: use temporary directory, delete afterwards]") do |arg|
25
+ options[:output_assembly_path] = arg
26
+ end
27
+ option_parser.on("--already-assembled-velvet-directory PATH", "If an assembly directory has been specified previously with --velvet-directory, re-use this assembly rather than re-doing the assembly [default: off]") do |arg|
28
+ options[:previous_assembly] = arg
29
+ end
30
+ end
31
+
32
+ # Generate a ProbedGraph object, given one or more 'probe sequences'
33
+ # and metagenomic reads. This is a rather large method, but seems to
34
+ # be approximately repeated in different applications of FinishM, so
35
+ # creating it for DRY purposes.
36
+ #
37
+ # probe_sequences: DNA sequences (as String objects whose direction points to the outsides of contigs)
38
+ # read_inputs: a ReadInput object, containing the information to feed to velveth
39
+ #
40
+ # options:
41
+ # :probe_reads: a list of sequence numbers (numbering as per velvet Sequence file)
42
+ # :probe_read_names: a list of sequence names (not IDs) that are probes (convert the names to IDs using the CnyUnifiedSeqNames file). There may not be a one to one correspondence of these read names and the probe reads returned in the ProbedGraph since reads can map to multiple sequence IDs.
43
+ # :velvet_kmer_size: kmer
44
+ # :assembly_coverage_cutoff: coverage cutoff for nodes
45
+ # :post_assembly_coverage_cutoff: apply this coverage cutoff to nodes after parsing assembly
46
+ # :output_assembly_path: write assembly to this directory
47
+ # :previous_assembly: a velvet directory from a previous run of the same probe sequences and reads. (Don't re-assemble)
48
+ # :use_textual_sequence_file: by default, a binary sequence file is used. Set this true to get velvet to generate the Sequences file
49
+ # :remove_unconnected_nodes: delete nodes from the graph that are not connected to the probe nodes
50
+ # :graph_search_leash_length: when :remove_unconnected_nodes'ing, use this leash length
51
+ # :dont_parse_noded_reads: if true, skip parsing noded reads (ie the positions of the reads in the graph)
52
+ # :dont_parse_reads: if true, skip parsing reads (ie the sequences of the reads themselves)
53
+ def generate_graph(probe_sequences, read_inputs, options={})
54
+ options[:parse_sequence_file] ||= true
55
+ graph = nil
56
+ read_probing_graph = nil
57
+ finishm_graph = Bio::FinishM::ProbedGraph.new
58
+
59
+ log.debug "Options for generate_graph: #{options}" if log.debug?
60
+
61
+ velvet_binary_folder = File.join(File.dirname(__FILE__),'..','..','ext','src')
62
+ log.debug "Using velvet binary folder #{velvet_binary_folder}" if log.debug?
63
+
64
+ velvet_result = nil
65
+
66
+ probe_read_ids = nil
67
+ if options[:probe_reads]
68
+ probe_read_ids = options[:probe_reads]
69
+ else
70
+ probe_read_ids = Set.new((1..probe_sequences.length))
71
+ end
72
+ if options[:previous_assembly].nil? #If assembly has not already been carried out
73
+ Tempfile.open('probes.fa') do |tempfile|
74
+ 50.times do # Do 50 times to make sure that velvet doesn't throw out parts of the graph that contain this contig
75
+ probe_sequences.each_with_index do |probe, i|
76
+ tempfile.puts ">probe#{i}"
77
+ tempfile.puts probe
78
+ end
79
+ end
80
+ tempfile.close
81
+ singles = read_inputs.fasta_singles
82
+ if singles and !singles.empty?
83
+ read_inputs.fasta_singles = [tempfile.path, singles].flatten
84
+ else
85
+ read_inputs.fasta_singles = [tempfile.path]
86
+ end
87
+ log.debug "Inputting probes into the assembly:\n#{File.open(tempfile.path).read}" if log.debug?
88
+
89
+ runner = Bio::Velvet::Runner.new
90
+ required_version = '1.2.10-wwood_finishm'
91
+ found_version = runner.binary_version(File.join(velvet_binary_folder, 'velveth'))
92
+ if found_version != required_version
93
+ raise "Detected velvet version incompatible with FinishM: #{found_version}, expected #{required_version} which is available from https://github.com/wwood/velvet (on branch less_clipping)"
94
+ end
95
+
96
+ log.info "Assembling sampled reads with velvet"
97
+ raise "Need to specify -cov_cutoff" if options[:assembly_coverage_cutoff].nil?
98
+ raise "Need to specify a kmer size" if options[:velvet_kmer_size].nil?
99
+ # Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
100
+ use_binary = options[:use_textual_sequence_file] ? '' : '-create_binary'
101
+ velvet_result = runner.velvet(
102
+ options[:velvet_kmer_size],
103
+ "#{read_inputs.velvet_read_arguments} #{use_binary}",
104
+ "-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff] } -tour_bus no -read_to_node_binary yes",
105
+ :output_assembly_path => options[:output_assembly_path],
106
+ :velveth_path => File.join(velvet_binary_folder, 'velveth'),
107
+ :velvetg_path => File.join(velvet_binary_folder, 'velvetg'),
108
+ )
109
+ if log.debug?
110
+ log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
111
+ log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
112
+ log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
113
+ log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
114
+ end
115
+ log.info "Finished running assembly"
116
+ finishm_graph.velvet_result_directory = velvet_result.result_directory
117
+ end
118
+ else
119
+ log.info "Using previous assembly stored in #{options[:previous_assembly] }"
120
+ velvet_result = Bio::Velvet::Result.new
121
+ velvet_result.result_directory = options[:previous_assembly]
122
+ finishm_graph.velvet_result_directory = velvet_result.result_directory
123
+ end
124
+
125
+ # Check that the probe reads given are present in the assembly passed here
126
+ unless options[:dont_parse_reads]
127
+ sequence_store = parse_velvet_binary_reads(velvet_result.result_directory)
128
+ finishm_graph.velvet_sequences = sequence_store
129
+ if !check_probe_sequences(probe_sequences, sequence_store)
130
+ raise "Probe sequences changed since previous velvet assembly!"
131
+ end
132
+ end
133
+
134
+ log.info "Parsing the graph output from velvet"
135
+ opts = {
136
+ # noded reads are parsed in via C, if they are wanted at all
137
+ :dont_parse_noded_reads => true
138
+ }
139
+ bio_velvet_graph = Bio::Velvet::Graph.parse_from_file(
140
+ File.join(velvet_result.result_directory, 'LastGraph'),
141
+ opts
142
+ )
143
+ log.info "Finished parsing graph: found #{bio_velvet_graph.nodes.length} nodes and #{bio_velvet_graph.arcs.length} arcs"
144
+
145
+ if options[:dont_parse_noded_reads]
146
+ graph = bio_velvet_graph
147
+ else
148
+ log.info "Beginning parse of graph using velvet's parsing C code.."
149
+ read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(velvet_result.result_directory, 'LastGraph')
150
+ log.info "Completed velvet code parsing velvet graph"
151
+
152
+ # Make the two graphs into a hybrid one
153
+ graph = Bio::FinishM::HybridGraph.new(bio_velvet_graph, read_probing_graph)
154
+ end
155
+ finishm_graph.graph = graph
156
+
157
+ # Find the anchor nodes again
158
+ anchor_sequence_ids = probe_read_ids.to_a.sort
159
+ endings = []
160
+ unless probe_read_ids.empty? and options[:probe_read_names].nil? #don't bother trying to find probes if none exists
161
+ # Convert read names to read IDs if required
162
+ if options[:probe_read_names]
163
+ # Probe reads are given as names, not IDs. What are the corresponding probes then?
164
+ entries = Bio::Velvet::CnyUnifiedSeqNamesFile.extract_entries_using_grep_hack(
165
+ File.join(velvet_result.result_directory, 'CnyUnifiedSeq.names'),
166
+ options[:probe_read_names]
167
+ )
168
+ anchor_sequence_ids = []
169
+ double_counts = 0
170
+ options[:probe_read_names].each do |name| #maintain order of them as they are specified in the original array parameter
171
+ if entries[name].empty?
172
+ raise "Unable to find probe `#{name}' in the probe reads file - was it included in the assembly?"
173
+ elsif entries[name].length > 2
174
+ raise "Found >2 sequences named #{name} in the assembly, being conservative and not continuing"
175
+ else
176
+ entries[name].each do |res|
177
+ anchor_sequence_ids.push res.read_id
178
+ end
179
+ if entries[name].length == 2
180
+ double_counts += 1
181
+ log.debug "Found 2 sequences for #{name}" if log.debug?
182
+ end
183
+ end
184
+ end
185
+ if double_counts > 0
186
+ log.info "#{double_counts} reads were found twice (likely as pairs), including both as probes"
187
+ end
188
+ log.info "Recovered #{anchor_sequence_ids.length} sequences using their names" if log.info?
189
+ end
190
+
191
+
192
+ # Parse the read to node structure
193
+ log.info "Reading ReadToNode.bin file.." if log.info?
194
+ finishm_graph.read_to_nodes = Bio::FinishM::ReadToNode.new(File.join(velvet_result.result_directory, 'ReadToNode.bin'))
195
+
196
+ finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
197
+ log.info "Finding probe nodes in the assembly"
198
+ c_graph_endings = finder.find_probes_from_read_to_node(finishm_graph.graph, finishm_graph.read_to_nodes, anchor_sequence_ids)
199
+ log.debug "Converting probe nodes found in C graph to Ruby analogues and adding to Ruby-parsed graph"
200
+ endings = c_graph_endings.collect do |node_direction_read|
201
+ if node_direction_read.empty?
202
+ # No probe found
203
+ []
204
+ else #found a node.
205
+ #equivalent node
206
+ node = graph.nodes[node_direction_read[0].node_id]
207
+ #equivalent direction
208
+ direction = node_direction_read[1]
209
+ #equivalent noded read
210
+ nr = Bio::Velvet::Graph::NodedRead.new
211
+ # nr.read_id = read_id
212
+ # nr.offset_from_start_of_node = row[1].to_i
213
+ # nr.start_coord = row[2].to_i
214
+ # nr.direction = current_node_direction
215
+ cnr = node_direction_read[2]
216
+ nr.read_id = cnr.read_id
217
+ nr.offset_from_start_of_node = cnr.offset_from_start_of_node
218
+ nr.start_coord = cnr.start_coord
219
+ nr.direction = direction
220
+ # collect
221
+ [node, direction, nr]
222
+ end
223
+ end
224
+ end
225
+ finishm_graph.probe_nodes = endings.collect{|array| array[0]}
226
+ finishm_graph.probe_node_directions = endings.collect{|array| array[1]}
227
+ finishm_graph.probe_node_reads = endings.collect{|array| array[2]}
228
+
229
+ # Check to make sure the probe sequences map to nodes in the graph
230
+ if finishm_graph.completely_probed?
231
+ if log.info?
232
+ found_all = true
233
+ num_found = 0
234
+ finishm_graph.probe_nodes.each_with_index do |probe,i|
235
+ if probe.nil?
236
+ found_all = false
237
+ log.debug "Unable to recover probe ##{i+1}, perhaps this will cause problems, but proceding optimistically"
238
+ else
239
+ num_found += 1
240
+ end
241
+ end
242
+ if found_all
243
+ if finishm_graph.probe_nodes.empty?
244
+ log.debug "No probes specified, so didn't find any"
245
+ else
246
+ log.info "Found all anchoring nodes in the graph."
247
+ end
248
+ else
249
+ log.info "Found #{num_found} of #{finishm_graph.probe_nodes.length} anchoring nodes in the graph, ignoring the rest"
250
+ end
251
+ end
252
+ else
253
+ raise "Unable to find all anchor reads from the assembly, cannot continue. This is probably an error with this script, not you. Probes not found: #{finishm_graph.missing_probe_indices.inspect}"
254
+ end
255
+
256
+ if options[:post_assembly_coverage_cutoff]
257
+ log.info "Removing nodes with coverage < #{options[:post_assembly_coverage_cutoff] } from graph.."
258
+ original_num_nodes = graph.nodes.length
259
+ original_num_arcs = graph.arcs.length
260
+ filter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
261
+ filter.remove_low_coverage_nodes(graph,
262
+ options[:post_assembly_coverage_cutoff],
263
+ :whitelisted_sequences => Set.new(anchor_sequence_ids)
264
+ )
265
+ log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
266
+ end
267
+
268
+ if options[:remove_unconnected_nodes]
269
+ if options[:graph_search_leash_length]
270
+ log.info "Removing nodes unconnected to probe nodes from the graph using leash #{options[:graph_search_leash_length] }.."
271
+ else
272
+ log.info "Removing nodes unconnected to probe nodes from the graph without using a leash.."
273
+ end
274
+ original_num_nodes = graph.nodes.length
275
+ original_num_arcs = graph.arcs.length
276
+ filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
277
+ filter.remove_unconnected_nodes(
278
+ graph,
279
+ finishm_graph.probe_nodes.reject{|n| n.nil?},
280
+ :leash_length => options[:graph_search_leash_length]
281
+ )
282
+ log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
283
+ end
284
+
285
+ return finishm_graph
286
+ end
287
+
288
+ # Read in the reads from a velvet result
289
+ def parse_velvet_binary_reads(velvet_result_directory)
290
+ sequences_file_path = File.join velvet_result_directory, 'CnyUnifiedSeq'
291
+ log.info "Reading in the actual sequences of all reads from #{sequences_file_path}"
292
+ sequences = Bio::Velvet::Underground::BinarySequenceStore.new sequences_file_path
293
+ log.info "Read in #{sequences.length} sequences"
294
+ return sequences
295
+ end
296
+
297
+ # When re-using an assembly, sometimes need to make
298
+ # sure that the probe sequences used previously are the same
299
+ # as what is given this time. Given am Array of probe sequences
300
+ # and a binary_sequence_file, check the probe sequences are the
301
+ # consistent.
302
+ def check_probe_sequences(probe_sequences, sequence_store)
303
+ return true if probe_sequences.nil?
304
+
305
+ probe_sequences.each_with_index do |probe, i|
306
+ log.debug "Checking probe sequence \##{i+1}" if log.debug?
307
+ if sequence_store[i+1].upcase != probe.upcase
308
+ log.error "Probe sequence \##{i+1} has changed - perhaps the wrong velvet assembly directory was specified, or a fresh assembly is required?"
309
+ return false
310
+ end
311
+ end
312
+ log.debug "Presence of #{probe_sequences.length} probe sequences verified"
313
+ return true
314
+ end
315
+ end
@@ -0,0 +1,355 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::HeightFinder
5
+ include Bio::FinishM::Logging
6
+
7
+ # visit nodes in range and determine heights
8
+ def traverse(graph, initial_nodes, options={})
9
+ by_height = []
10
+ traversal_nodes = {}
11
+ cycles = {}
12
+ nodes_in_retrace_phase = Set.new
13
+
14
+ # depth-first so stack
15
+ stack = DS::Stack.new
16
+ initial_nodes.each do |onode|
17
+ next if options[:range] and options[:range].none?{|other| other == onode.node }
18
+ traversal_node = CyclicTraversalNode.new
19
+ traversal_node.onode = options[:reverse] ? onode.reverse : onode
20
+ traversal_node.nodes_in = []
21
+ traversal_nodes[traversal_node.onode.to_settable] = traversal_node
22
+ stack.push traversal_node
23
+ end
24
+
25
+ while traversal_node = stack.pop
26
+ settable = traversal_node.onode.to_settable
27
+ describe = nil
28
+
29
+ if log.debug?
30
+ log.debug "visiting #{traversal_node.describe}."
31
+ end
32
+
33
+ # Consider node solved if height is known.
34
+ if not traversal_node.height.nil?
35
+ log.debug "Height of #{traversal_node.describe} is known. Skip." if log.debug?
36
+ next
37
+ end
38
+
39
+ # find neighbours
40
+ neighbours = traversal_node.nodes_out
41
+ if neighbours.nil?
42
+ neighbours = traversal_node.onode.next_neighbours(graph)
43
+ if options[:range]
44
+ neighbours.reject!{|onode| options[:range].none?{|other| other == onode.node}} #not in defined range
45
+ end
46
+
47
+ # Get or create traversal version of node
48
+ neighbours = neighbours.collect do |onode|
49
+ nbr_settable = onode.to_settable
50
+ traversal_nbr = traversal_nodes[nbr_settable]
51
+ if traversal_nbr.nil?
52
+ traversal_nbr = CyclicTraversalNode.new
53
+ traversal_nbr.onode = onode
54
+ traversal_nbr.nodes_in = []
55
+ traversal_nodes[nbr_settable] = traversal_nbr
56
+ end
57
+ traversal_nbr
58
+ end
59
+
60
+ #remember neighbours
61
+ traversal_node.nodes_out = neighbours
62
+ end
63
+
64
+
65
+ # Can we solve the node?
66
+ if neighbours.empty? #check for a tip
67
+ log.debug "#{traversal_node.describe} is a tip." if log.debug?
68
+ traversal_node.height = 0
69
+ if by_height[0].nil?
70
+ by_height[0] = [traversal_node]
71
+ else
72
+ by_height[0].push(traversal_node)
73
+ end
74
+ log.debug "Found height '0' for #{traversal_node.describe}." if log.debug?
75
+ next
76
+ end
77
+
78
+ if nodes_in_retrace_phase.include? settable
79
+ log.debug "Retracing back to #{traversal_node.describe}." if log.debug?
80
+
81
+ # Neighbours should have been explored
82
+ # Are neighbours involved in cycles?
83
+ cyclic_neighbours = neighbours.reject{|node| node.cycles.nil?}
84
+ if not cyclic_neighbours.empty?
85
+ # current node is in a cycle if a neighbour is in an unclosed cycle
86
+ log.debug "Found cyclic neighbours #{cyclic_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
87
+ cyclic_neighbours.each do |node|
88
+ node.cycles.each do |cycle|
89
+ log.debug "Merging cycle #{cycle.onodes.collect{|onode| onode.to_shorthand}.join(',')}." if log.debug?
90
+ new_cycle = traversal_node.merge_unclosed_cycle cycle.copy
91
+ if not new_cycle.nil? and new_cycle.closed?
92
+ log.debug "Cycle completes at #{traversal_node.describe}."
93
+ new_cycle_key = new_cycle.to_settable
94
+ if cycles.has_key? new_cycle_key
95
+ log.debug "Already seen this cycle." if log.debug?
96
+ else
97
+ cycles[new_cycle_key] = new_cycle.onodes
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ # Unsolved neighbours imply a closed cyclic path.
105
+ # Are neighbours unsolved?
106
+ solved_neighbours = neighbours.reject{|node| node.height.nil?}
107
+ unless solved_neighbours.empty?
108
+ log.debug "We know the heights of neighbours #{solved_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
109
+ # Compute height from solved neighbours
110
+ height = solved_neighbours.map{|node| node.height}.max + 1
111
+ log.debug "Found height '#{height}' for #{traversal_node.describe}." if log.debug?
112
+ traversal_node.height = height
113
+ if by_height[height].nil?
114
+ by_height[height] = [traversal_node]
115
+ else
116
+ by_height[height].push(traversal_node)
117
+ end
118
+ end
119
+ # If no solved neighbours, leave unsolved
120
+
121
+ # Move out of retrace phase
122
+ nodes_in_retrace_phase.delete settable
123
+ log.debug "Finished retracing #{traversal_node.describe}." if log.debug?
124
+ next
125
+ end
126
+
127
+ # Move current node to retrace phase, before checking for retracing neighbours in case is own neighbour
128
+ nodes_in_retrace_phase << settable
129
+
130
+ # Look for currently retracing neighbours and initiate cycles
131
+ retracing_neighbours = neighbours.select{|node| nodes_in_retrace_phase.include? node.onode.to_settable}
132
+ if not retracing_neighbours.empty?
133
+ log.debug "Initiating cycles for neighbours #{retracing_neighbours.collect{|node| node.describe}.join(',')} currently retracing." if log.debug?
134
+ # initiate cycles for each retracing neighbour
135
+ retracing_neighbours.each{|node| traversal_node.initiate_cycle(node.onode)}
136
+ end
137
+
138
+ # Return node stack and push neighbours
139
+ stack.push traversal_node
140
+ log.debug "Pushing #{traversal_node.describe} in retrace mode." if log.debug?
141
+ neighbours.each do |node|
142
+ node_settable = node.onode.to_settable
143
+
144
+ # Note the parent of neighbour unless already known
145
+ nodes_in = node.nodes_in
146
+ if not nodes_in.any?{|nbr| nbr.onode == node.onode}
147
+ nodes_in.push traversal_node
148
+ end
149
+
150
+ if nodes_in_retrace_phase.include? node_settable
151
+ # A currently retracing neighbour implies a cycle, cut it off here
152
+ log.debug "Neighbour #{node.describe} is retracing. Not revisiting." if log.debug?
153
+ else
154
+ log.debug "Pushing neighbour #{node.describe}." if log.debug?
155
+ stack.push node
156
+ end
157
+ end
158
+ end
159
+ return by_height, cycles.values
160
+ end
161
+
162
+ class TraversalNode
163
+ attr_accessor :onode, :height, :nodes_in, :nodes_out
164
+
165
+ def describe
166
+ @onode.to_shorthand
167
+ end
168
+
169
+ def node_id
170
+ @onode.node_id
171
+ end
172
+ end
173
+
174
+ class CyclicTraversalNode < TraversalNode
175
+ attr_accessor :cycles
176
+
177
+ def initiate_cycle(onode)
178
+ cycle = CyclePath.new
179
+ cycle.onodes = [onode]
180
+ merge_unclosed_cycle cycle
181
+ end
182
+
183
+ def merge_unclosed_cycle(cycle)
184
+ return if cycle.closed?
185
+ if cycle.onodes.last == @onode
186
+ cycle.closed = true
187
+ else
188
+ cycle.onodes.unshift @onode
189
+ end
190
+ if @cycles.nil?
191
+ @cycles = [cycle]
192
+ else
193
+ @cycles.push(cycle)
194
+ end
195
+ return cycle
196
+ end
197
+
198
+ class CyclePath
199
+ attr_accessor :onodes, :closed
200
+
201
+ def closed?
202
+ return @closed == true
203
+ end
204
+
205
+ def copy
206
+ cycle = CyclePath.new
207
+ cycle.onodes = @onodes[0..-1]
208
+ cycle.closed = @closed
209
+ cycle
210
+ end
211
+
212
+ def to_settable
213
+ # return sorted list of onode settables
214
+ @onodes.collect{|onode| onode.to_settable}.sort do |a, b|
215
+ result = a[0] <=> b[0]
216
+ if result == 0
217
+ result = a[1] == Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode::START_IS_FIRST ? -1 : 1
218
+ end
219
+ result
220
+ end
221
+ end
222
+ end
223
+ end
224
+
225
+
226
+ # maximum paths
227
+ def max_paths_through(by_height)
228
+ max_paths_from = {}
229
+ by_height.each_with_index do |nodes, level|
230
+ log.debug "At height #{level}." if log.debug?
231
+ if level == 0 # tips
232
+ nodes.each do |node|
233
+ log.debug "Counted maximum of 1 path to #{node.describe}." if log.debug?
234
+ max_paths_from[node.onode.to_settable] = 1
235
+ end
236
+ next
237
+ end
238
+
239
+ nodes.each do |node|
240
+ settable = node.onode.to_settable
241
+ max_paths_from_neighbours = node.nodes_out.collect{|nbr| max_paths_from[nbr.onode.to_settable]}.reject{|n| n.nil?}
242
+ log.debug "Found neighbours of #{node.describe} with maximum paths #{max_paths_from_neighbours.join(',')}." if log.debug?
243
+ max_paths_from[settable] = max_paths_from_neighbours.reduce{|memo, num| memo+num}
244
+ log.debug "Counted maximum of #{max_paths_from[settable]} paths to #{node.describe}." if log.debug?
245
+ end
246
+ end
247
+
248
+ # Get the graph roots (which are nodes with no parents) and add max_paths_from for each to get graph total
249
+ root_keys = by_height.flatten.select{|node| node.nodes_in.empty? }.collect{|node| node.onode.to_settable}
250
+ log.debug "Found graph roots #{root_keys.collect{|settable| settable[0]}.join(',')} with maximum paths #{root_keys.collect{|key| max_paths_from[key]}.join(',')}." if log.debug?
251
+ max_paths = root_keys.map{|settable| max_paths_from[settable]}.reduce{|memo, num| memo+num}
252
+ log.debug "Counted maximum of #{max_paths} through graph." if log.debug?
253
+ return max_paths
254
+ end
255
+
256
+ # minimum paths
257
+ def min_paths_through(by_height)
258
+ live_nodes = Set.new
259
+ max_alive_counter = 0
260
+ by_height.each_with_index do |nodes, level|
261
+ log.debug "At height #{level}." if log.debug?
262
+ # nodes at current level become live
263
+ nodes.each do |node|
264
+ settable = node.onode.to_settable
265
+ log.debug "Setting #{node.describe} as live." if log.debug?
266
+ live_nodes << settable
267
+ end
268
+ if level > 0
269
+ #children of nodes at current level are no longer live
270
+ nodes.each do |node|
271
+ children = node.nodes_out
272
+ children.each do |nbr|
273
+ log.debug "Setting child #{nbr.describe} of live node #{node.describe} as inactive." if log.debug?
274
+ live_nodes.delete(nbr.onode.to_settable)
275
+ end
276
+ end
277
+ end
278
+
279
+ log.debug "There are currently #{live_nodes.length} nodes alive. Max is #{max_alive_counter}." if log.debug?
280
+ if live_nodes.length > max_alive_counter
281
+ #track the maximum live nodes at any level
282
+ log.debug "Updating max to #{live_nodes.length}." if log.debug?
283
+ max_alive_counter = live_nodes.length
284
+ end
285
+ end
286
+ return max_alive_counter
287
+ end
288
+
289
+ def find_oriented_edge_of_range(graph, nodes=nil)
290
+ nodes ||= graph.nodes
291
+ log.debug "Looking for oriented start and end points from #{nodes.collect{|n| n.node_id}.join(',')}" if log.debug?
292
+ nodes_all_directions = nodes.collect{|node| [[node, true], [node, false]]}.flatten(1)
293
+
294
+
295
+ # Find nodes and directions which are not reachable from other nodes within range
296
+ unreached_nodes = {}
297
+ nodes_all_directions.each do |node_and_direction|
298
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node_and_direction[0], node_and_direction[1]
299
+ unless unreached_nodes.has_key? onode.to_settable
300
+ unreached_nodes[onode.to_settable] = onode
301
+ end
302
+ onode.next_neighbours(graph).each do |oneigh|
303
+ unreached_nodes[oneigh.to_settable] = nil
304
+ end
305
+ end
306
+
307
+ entry_points = unreached_nodes.values.reject{|n| n.nil?}
308
+ log.debug "Found the following nodes for a particular orientation have no paths connecting to other nodes in range: #{entry_points.collect{|n| n.to_shorthand}.join(',')}" if log.debug?
309
+
310
+ # Start from an unreachable node, and trace all paths until the reverse end of other unreachable nodes
311
+ # are reached, which are then defined as 'end' nodes. When finished, choose a remaining non-end unreachable
312
+ # node and repeat, stopping paths if an already seen node is encountered.
313
+ seen_nodes = Set.new
314
+ start_onodes = []
315
+ end_onodes = []
316
+ stack = DS::Stack.new
317
+ entry_points.reverse.each do |onode|
318
+ stack.push onode
319
+ end
320
+
321
+ while current_node = stack.pop
322
+ log.debug "At node #{current_node.to_shorthand}" if log.debug?
323
+
324
+ node_id = current_node.node_id
325
+ if seen_nodes.include? node_id or not nodes.include? current_node.node
326
+ log.debug "Node has been seen or is out of range. Skipping..." if log.debug?
327
+ next
328
+ end
329
+ seen_nodes << node_id
330
+
331
+ current_unreached = unreached_nodes[current_node.to_settable]
332
+ log.debug "Is current unreached? #{current_unreached}" if log.debug?
333
+ if current_unreached
334
+ log.debug "Defining starting node #{current_unreached.to_shorthand}" if log.debug?
335
+ # Found start node
336
+ start_onodes.push current_unreached
337
+ else
338
+ reverse_unreached = unreached_nodes[current_node.reverse.to_settable]
339
+ log.debug "Is reverse unreached? #{reverse_unreached}" if log.debug?
340
+ if reverse_unreached
341
+ log.debug "Found ending node #{reverse_unreached.to_shorthand}" if log.debug?
342
+ # Found end node
343
+ end_onodes.push reverse_unreached
344
+ end
345
+ end
346
+
347
+ current_node.next_neighbours(graph).each do |onode|
348
+ log.debug "Adding neighbour #{onode.to_shorthand} to stack" if log.debug?
349
+ stack.push onode
350
+ end
351
+ end
352
+
353
+ return start_onodes, end_onodes
354
+ end
355
+ end