finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,85 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module AssemblyGraphAlgorithms
6
+ class GraphExplorer
7
+ # Return all paths that emenate from a given node, in the graph
8
+ def explore_from_node(graph, initial_path, leash_length)
9
+ # Do a simple depth first search, forking at each node. Vanilla graph traversal.
10
+ depth_first_search_stack = DS::Stack.new
11
+ first_path = ExplorationPath.new initial_path
12
+ depth_first_search_stack.push first_path
13
+ found_paths = []
14
+ # While there's more paths to explore
15
+ while current_path = depth_first_search_stack.pop
16
+ last = current_path.path.last
17
+ if !leash_length.nil? and current_path.path.length_in_bp > leash_length
18
+ current_path.termination_type = 'Leashed'
19
+ found_paths.push current_path
20
+ else
21
+ neighbours = current_path.path.neighbours_of_last_node(graph)
22
+ if neighbours.empty?
23
+ current_path.termination_type = 'Dead end / coverage'
24
+ found_paths.push current_path
25
+ else
26
+ neighbours_to_add = []
27
+ neighbours.each do |oriented_neighbour|
28
+ # Test for loops, I'm only interested in acyclic paths for the moment
29
+ if current_path.include?(oriented_neighbour)
30
+ #loop found, terminate path
31
+ new_path = current_path.copy
32
+ new_path.add_node oriented_neighbour
33
+ new_path.termination_type = 'Loop'
34
+ found_paths.push new_path
35
+ else
36
+ neighbours_to_add.push oriented_neighbour
37
+ end
38
+ end
39
+ neighbours_to_add.each_with_index do |oriented_neighbour, i|
40
+ # If the last neighbour is being added here, reuse the path
41
+ next_path = nil
42
+ if i == neighbours_to_add.length-1
43
+ next_path = current_path
44
+ else
45
+ next_path = current_path.copy
46
+ end
47
+ next_path.add_node oriented_neighbour
48
+ depth_first_search_stack.push next_path
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ return found_paths
55
+ end
56
+
57
+ class ExplorationPath
58
+ attr_accessor :path, :set_of_nodes, :termination_type
59
+
60
+ def initialize(path)
61
+ @path = path
62
+ @set_of_nodes = Set.new path.collect{|n| n.to_settable}
63
+ end
64
+
65
+ def include?(oriented_node)
66
+ @set_of_nodes.include?(oriented_node.to_settable)
67
+ end
68
+
69
+ def add_node(onode)
70
+ path.add_oriented_node onode
71
+ @set_of_nodes << onode.to_settable
72
+ end
73
+
74
+ def copy
75
+ anew = ExplorationPath.new @path.copy
76
+ return anew
77
+ end
78
+
79
+ def to_s
80
+ @path.collect{|on| on.node_id}.join(',')
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,315 @@
1
+ require 'bio-velvet'
2
+ require 'bio'
3
+ require 'pry'
4
+
5
+ class Bio::FinishM::GraphGenerator
6
+ include Bio::FinishM::Logging
7
+
8
+ DEFAULT_OPTIONS = {
9
+ :velvet_kmer_size => 51,
10
+ :assembly_coverage_cutoff => 3.5,
11
+ }
12
+
13
+ def add_options(option_parser, options)
14
+ options.merge!(DEFAULT_OPTIONS)
15
+ option_parser.on("--assembly-kmer NUMBER", "when assembling, use this kmer length [default: #{options[:velvet_kmer_size] }]") do |arg|
16
+ options[:velvet_kmer_size] = arg.to_i
17
+ end
18
+ option_parser.on("--assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, all other nodes are removed [default: #{options[:assembly_coverage_cutoff] }]") do |arg|
19
+ options[:assembly_coverage_cutoff] = arg.to_f
20
+ end
21
+ option_parser.on("--post-assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, implemented after assembly [default: not used]") do |arg|
22
+ options[:post_assembly_coverage_cutoff] = arg.to_f
23
+ end
24
+ option_parser.on("--velvet-directory PATH", "Output assembly intermediate files to this directory [default: use temporary directory, delete afterwards]") do |arg|
25
+ options[:output_assembly_path] = arg
26
+ end
27
+ option_parser.on("--already-assembled-velvet-directory PATH", "If an assembly directory has been specified previously with --velvet-directory, re-use this assembly rather than re-doing the assembly [default: off]") do |arg|
28
+ options[:previous_assembly] = arg
29
+ end
30
+ end
31
+
32
+ # Generate a ProbedGraph object, given one or more 'probe sequences'
33
+ # and metagenomic reads. This is a rather large method, but seems to
34
+ # be approximately repeated in different applications of FinishM, so
35
+ # creating it for DRY purposes.
36
+ #
37
+ # probe_sequences: DNA sequences (as String objects whose direction points to the outsides of contigs)
38
+ # read_inputs: a ReadInput object, containing the information to feed to velveth
39
+ #
40
+ # options:
41
+ # :probe_reads: a list of sequence numbers (numbering as per velvet Sequence file)
42
+ # :probe_read_names: a list of sequence names (not IDs) that are probes (convert the names to IDs using the CnyUnifiedSeqNames file). There may not be a one to one correspondence of these read names and the probe reads returned in the ProbedGraph since reads can map to multiple sequence IDs.
43
+ # :velvet_kmer_size: kmer
44
+ # :assembly_coverage_cutoff: coverage cutoff for nodes
45
+ # :post_assembly_coverage_cutoff: apply this coverage cutoff to nodes after parsing assembly
46
+ # :output_assembly_path: write assembly to this directory
47
+ # :previous_assembly: a velvet directory from a previous run of the same probe sequences and reads. (Don't re-assemble)
48
+ # :use_textual_sequence_file: by default, a binary sequence file is used. Set this true to get velvet to generate the Sequences file
49
+ # :remove_unconnected_nodes: delete nodes from the graph that are not connected to the probe nodes
50
+ # :graph_search_leash_length: when :remove_unconnected_nodes'ing, use this leash length
51
+ # :dont_parse_noded_reads: if true, skip parsing noded reads (ie the positions of the reads in the graph)
52
+ # :dont_parse_reads: if true, skip parsing reads (ie the sequences of the reads themselves)
53
+ def generate_graph(probe_sequences, read_inputs, options={})
54
+ options[:parse_sequence_file] ||= true
55
+ graph = nil
56
+ read_probing_graph = nil
57
+ finishm_graph = Bio::FinishM::ProbedGraph.new
58
+
59
+ log.debug "Options for generate_graph: #{options}" if log.debug?
60
+
61
+ velvet_binary_folder = File.join(File.dirname(__FILE__),'..','..','ext','src')
62
+ log.debug "Using velvet binary folder #{velvet_binary_folder}" if log.debug?
63
+
64
+ velvet_result = nil
65
+
66
+ probe_read_ids = nil
67
+ if options[:probe_reads]
68
+ probe_read_ids = options[:probe_reads]
69
+ else
70
+ probe_read_ids = Set.new((1..probe_sequences.length))
71
+ end
72
+ if options[:previous_assembly].nil? #If assembly has not already been carried out
73
+ Tempfile.open('probes.fa') do |tempfile|
74
+ 50.times do # Do 50 times to make sure that velvet doesn't throw out parts of the graph that contain this contig
75
+ probe_sequences.each_with_index do |probe, i|
76
+ tempfile.puts ">probe#{i}"
77
+ tempfile.puts probe
78
+ end
79
+ end
80
+ tempfile.close
81
+ singles = read_inputs.fasta_singles
82
+ if singles and !singles.empty?
83
+ read_inputs.fasta_singles = [tempfile.path, singles].flatten
84
+ else
85
+ read_inputs.fasta_singles = [tempfile.path]
86
+ end
87
+ log.debug "Inputting probes into the assembly:\n#{File.open(tempfile.path).read}" if log.debug?
88
+
89
+ runner = Bio::Velvet::Runner.new
90
+ required_version = '1.2.10-wwood_finishm'
91
+ found_version = runner.binary_version(File.join(velvet_binary_folder, 'velveth'))
92
+ if found_version != required_version
93
+ raise "Detected velvet version incompatible with FinishM: #{found_version}, expected #{required_version} which is available from https://github.com/wwood/velvet (on branch less_clipping)"
94
+ end
95
+
96
+ log.info "Assembling sampled reads with velvet"
97
+ raise "Need to specify -cov_cutoff" if options[:assembly_coverage_cutoff].nil?
98
+ raise "Need to specify a kmer size" if options[:velvet_kmer_size].nil?
99
+ # Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
100
+ use_binary = options[:use_textual_sequence_file] ? '' : '-create_binary'
101
+ velvet_result = runner.velvet(
102
+ options[:velvet_kmer_size],
103
+ "#{read_inputs.velvet_read_arguments} #{use_binary}",
104
+ "-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff] } -tour_bus no -read_to_node_binary yes",
105
+ :output_assembly_path => options[:output_assembly_path],
106
+ :velveth_path => File.join(velvet_binary_folder, 'velveth'),
107
+ :velvetg_path => File.join(velvet_binary_folder, 'velvetg'),
108
+ )
109
+ if log.debug?
110
+ log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
111
+ log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
112
+ log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
113
+ log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
114
+ end
115
+ log.info "Finished running assembly"
116
+ finishm_graph.velvet_result_directory = velvet_result.result_directory
117
+ end
118
+ else
119
+ log.info "Using previous assembly stored in #{options[:previous_assembly] }"
120
+ velvet_result = Bio::Velvet::Result.new
121
+ velvet_result.result_directory = options[:previous_assembly]
122
+ finishm_graph.velvet_result_directory = velvet_result.result_directory
123
+ end
124
+
125
+ # Check that the probe reads given are present in the assembly passed here
126
+ unless options[:dont_parse_reads]
127
+ sequence_store = parse_velvet_binary_reads(velvet_result.result_directory)
128
+ finishm_graph.velvet_sequences = sequence_store
129
+ if !check_probe_sequences(probe_sequences, sequence_store)
130
+ raise "Probe sequences changed since previous velvet assembly!"
131
+ end
132
+ end
133
+
134
+ log.info "Parsing the graph output from velvet"
135
+ opts = {
136
+ # noded reads are parsed in via C, if they are wanted at all
137
+ :dont_parse_noded_reads => true
138
+ }
139
+ bio_velvet_graph = Bio::Velvet::Graph.parse_from_file(
140
+ File.join(velvet_result.result_directory, 'LastGraph'),
141
+ opts
142
+ )
143
+ log.info "Finished parsing graph: found #{bio_velvet_graph.nodes.length} nodes and #{bio_velvet_graph.arcs.length} arcs"
144
+
145
+ if options[:dont_parse_noded_reads]
146
+ graph = bio_velvet_graph
147
+ else
148
+ log.info "Beginning parse of graph using velvet's parsing C code.."
149
+ read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(velvet_result.result_directory, 'LastGraph')
150
+ log.info "Completed velvet code parsing velvet graph"
151
+
152
+ # Make the two graphs into a hybrid one
153
+ graph = Bio::FinishM::HybridGraph.new(bio_velvet_graph, read_probing_graph)
154
+ end
155
+ finishm_graph.graph = graph
156
+
157
+ # Find the anchor nodes again
158
+ anchor_sequence_ids = probe_read_ids.to_a.sort
159
+ endings = []
160
+ unless probe_read_ids.empty? and options[:probe_read_names].nil? #don't bother trying to find probes if none exists
161
+ # Convert read names to read IDs if required
162
+ if options[:probe_read_names]
163
+ # Probe reads are given as names, not IDs. What are the corresponding probes then?
164
+ entries = Bio::Velvet::CnyUnifiedSeqNamesFile.extract_entries_using_grep_hack(
165
+ File.join(velvet_result.result_directory, 'CnyUnifiedSeq.names'),
166
+ options[:probe_read_names]
167
+ )
168
+ anchor_sequence_ids = []
169
+ double_counts = 0
170
+ options[:probe_read_names].each do |name| #maintain order of them as they are specified in the original array parameter
171
+ if entries[name].empty?
172
+ raise "Unable to find probe `#{name}' in the probe reads file - was it included in the assembly?"
173
+ elsif entries[name].length > 2
174
+ raise "Found >2 sequences named #{name} in the assembly, being conservative and not continuing"
175
+ else
176
+ entries[name].each do |res|
177
+ anchor_sequence_ids.push res.read_id
178
+ end
179
+ if entries[name].length == 2
180
+ double_counts += 1
181
+ log.debug "Found 2 sequences for #{name}" if log.debug?
182
+ end
183
+ end
184
+ end
185
+ if double_counts > 0
186
+ log.info "#{double_counts} reads were found twice (likely as pairs), including both as probes"
187
+ end
188
+ log.info "Recovered #{anchor_sequence_ids.length} sequences using their names" if log.info?
189
+ end
190
+
191
+
192
+ # Parse the read to node structure
193
+ log.info "Reading ReadToNode.bin file.." if log.info?
194
+ finishm_graph.read_to_nodes = Bio::FinishM::ReadToNode.new(File.join(velvet_result.result_directory, 'ReadToNode.bin'))
195
+
196
+ finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
197
+ log.info "Finding probe nodes in the assembly"
198
+ c_graph_endings = finder.find_probes_from_read_to_node(finishm_graph.graph, finishm_graph.read_to_nodes, anchor_sequence_ids)
199
+ log.debug "Converting probe nodes found in C graph to Ruby analogues and adding to Ruby-parsed graph"
200
+ endings = c_graph_endings.collect do |node_direction_read|
201
+ if node_direction_read.empty?
202
+ # No probe found
203
+ []
204
+ else #found a node.
205
+ #equivalent node
206
+ node = graph.nodes[node_direction_read[0].node_id]
207
+ #equivalent direction
208
+ direction = node_direction_read[1]
209
+ #equivalent noded read
210
+ nr = Bio::Velvet::Graph::NodedRead.new
211
+ # nr.read_id = read_id
212
+ # nr.offset_from_start_of_node = row[1].to_i
213
+ # nr.start_coord = row[2].to_i
214
+ # nr.direction = current_node_direction
215
+ cnr = node_direction_read[2]
216
+ nr.read_id = cnr.read_id
217
+ nr.offset_from_start_of_node = cnr.offset_from_start_of_node
218
+ nr.start_coord = cnr.start_coord
219
+ nr.direction = direction
220
+ # collect
221
+ [node, direction, nr]
222
+ end
223
+ end
224
+ end
225
+ finishm_graph.probe_nodes = endings.collect{|array| array[0]}
226
+ finishm_graph.probe_node_directions = endings.collect{|array| array[1]}
227
+ finishm_graph.probe_node_reads = endings.collect{|array| array[2]}
228
+
229
+ # Check to make sure the probe sequences map to nodes in the graph
230
+ if finishm_graph.completely_probed?
231
+ if log.info?
232
+ found_all = true
233
+ num_found = 0
234
+ finishm_graph.probe_nodes.each_with_index do |probe,i|
235
+ if probe.nil?
236
+ found_all = false
237
+ log.debug "Unable to recover probe ##{i+1}, perhaps this will cause problems, but proceding optimistically"
238
+ else
239
+ num_found += 1
240
+ end
241
+ end
242
+ if found_all
243
+ if finishm_graph.probe_nodes.empty?
244
+ log.debug "No probes specified, so didn't find any"
245
+ else
246
+ log.info "Found all anchoring nodes in the graph."
247
+ end
248
+ else
249
+ log.info "Found #{num_found} of #{finishm_graph.probe_nodes.length} anchoring nodes in the graph, ignoring the rest"
250
+ end
251
+ end
252
+ else
253
+ raise "Unable to find all anchor reads from the assembly, cannot continue. This is probably an error with this script, not you. Probes not found: #{finishm_graph.missing_probe_indices.inspect}"
254
+ end
255
+
256
+ if options[:post_assembly_coverage_cutoff]
257
+ log.info "Removing nodes with coverage < #{options[:post_assembly_coverage_cutoff] } from graph.."
258
+ original_num_nodes = graph.nodes.length
259
+ original_num_arcs = graph.arcs.length
260
+ filter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
261
+ filter.remove_low_coverage_nodes(graph,
262
+ options[:post_assembly_coverage_cutoff],
263
+ :whitelisted_sequences => Set.new(anchor_sequence_ids)
264
+ )
265
+ log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
266
+ end
267
+
268
+ if options[:remove_unconnected_nodes]
269
+ if options[:graph_search_leash_length]
270
+ log.info "Removing nodes unconnected to probe nodes from the graph using leash #{options[:graph_search_leash_length] }.."
271
+ else
272
+ log.info "Removing nodes unconnected to probe nodes from the graph without using a leash.."
273
+ end
274
+ original_num_nodes = graph.nodes.length
275
+ original_num_arcs = graph.arcs.length
276
+ filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
277
+ filter.remove_unconnected_nodes(
278
+ graph,
279
+ finishm_graph.probe_nodes.reject{|n| n.nil?},
280
+ :leash_length => options[:graph_search_leash_length]
281
+ )
282
+ log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
283
+ end
284
+
285
+ return finishm_graph
286
+ end
287
+
288
+ # Read in the reads from a velvet result
289
+ def parse_velvet_binary_reads(velvet_result_directory)
290
+ sequences_file_path = File.join velvet_result_directory, 'CnyUnifiedSeq'
291
+ log.info "Reading in the actual sequences of all reads from #{sequences_file_path}"
292
+ sequences = Bio::Velvet::Underground::BinarySequenceStore.new sequences_file_path
293
+ log.info "Read in #{sequences.length} sequences"
294
+ return sequences
295
+ end
296
+
297
+ # When re-using an assembly, sometimes need to make
298
+ # sure that the probe sequences used previously are the same
299
+ # as what is given this time. Given am Array of probe sequences
300
+ # and a binary_sequence_file, check the probe sequences are the
301
+ # consistent.
302
+ def check_probe_sequences(probe_sequences, sequence_store)
303
+ return true if probe_sequences.nil?
304
+
305
+ probe_sequences.each_with_index do |probe, i|
306
+ log.debug "Checking probe sequence \##{i+1}" if log.debug?
307
+ if sequence_store[i+1].upcase != probe.upcase
308
+ log.error "Probe sequence \##{i+1} has changed - perhaps the wrong velvet assembly directory was specified, or a fresh assembly is required?"
309
+ return false
310
+ end
311
+ end
312
+ log.debug "Presence of #{probe_sequences.length} probe sequences verified"
313
+ return true
314
+ end
315
+ end
@@ -0,0 +1,355 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::HeightFinder
5
+ include Bio::FinishM::Logging
6
+
7
+ # visit nodes in range and determine heights
8
+ def traverse(graph, initial_nodes, options={})
9
+ by_height = []
10
+ traversal_nodes = {}
11
+ cycles = {}
12
+ nodes_in_retrace_phase = Set.new
13
+
14
+ # depth-first so stack
15
+ stack = DS::Stack.new
16
+ initial_nodes.each do |onode|
17
+ next if options[:range] and options[:range].none?{|other| other == onode.node }
18
+ traversal_node = CyclicTraversalNode.new
19
+ traversal_node.onode = options[:reverse] ? onode.reverse : onode
20
+ traversal_node.nodes_in = []
21
+ traversal_nodes[traversal_node.onode.to_settable] = traversal_node
22
+ stack.push traversal_node
23
+ end
24
+
25
+ while traversal_node = stack.pop
26
+ settable = traversal_node.onode.to_settable
27
+ describe = nil
28
+
29
+ if log.debug?
30
+ log.debug "visiting #{traversal_node.describe}."
31
+ end
32
+
33
+ # Consider node solved if height is known.
34
+ if not traversal_node.height.nil?
35
+ log.debug "Height of #{traversal_node.describe} is known. Skip." if log.debug?
36
+ next
37
+ end
38
+
39
+ # find neighbours
40
+ neighbours = traversal_node.nodes_out
41
+ if neighbours.nil?
42
+ neighbours = traversal_node.onode.next_neighbours(graph)
43
+ if options[:range]
44
+ neighbours.reject!{|onode| options[:range].none?{|other| other == onode.node}} #not in defined range
45
+ end
46
+
47
+ # Get or create traversal version of node
48
+ neighbours = neighbours.collect do |onode|
49
+ nbr_settable = onode.to_settable
50
+ traversal_nbr = traversal_nodes[nbr_settable]
51
+ if traversal_nbr.nil?
52
+ traversal_nbr = CyclicTraversalNode.new
53
+ traversal_nbr.onode = onode
54
+ traversal_nbr.nodes_in = []
55
+ traversal_nodes[nbr_settable] = traversal_nbr
56
+ end
57
+ traversal_nbr
58
+ end
59
+
60
+ #remember neighbours
61
+ traversal_node.nodes_out = neighbours
62
+ end
63
+
64
+
65
+ # Can we solve the node?
66
+ if neighbours.empty? #check for a tip
67
+ log.debug "#{traversal_node.describe} is a tip." if log.debug?
68
+ traversal_node.height = 0
69
+ if by_height[0].nil?
70
+ by_height[0] = [traversal_node]
71
+ else
72
+ by_height[0].push(traversal_node)
73
+ end
74
+ log.debug "Found height '0' for #{traversal_node.describe}." if log.debug?
75
+ next
76
+ end
77
+
78
+ if nodes_in_retrace_phase.include? settable
79
+ log.debug "Retracing back to #{traversal_node.describe}." if log.debug?
80
+
81
+ # Neighbours should have been explored
82
+ # Are neighbours involved in cycles?
83
+ cyclic_neighbours = neighbours.reject{|node| node.cycles.nil?}
84
+ if not cyclic_neighbours.empty?
85
+ # current node is in a cycle if a neighbour is in an unclosed cycle
86
+ log.debug "Found cyclic neighbours #{cyclic_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
87
+ cyclic_neighbours.each do |node|
88
+ node.cycles.each do |cycle|
89
+ log.debug "Merging cycle #{cycle.onodes.collect{|onode| onode.to_shorthand}.join(',')}." if log.debug?
90
+ new_cycle = traversal_node.merge_unclosed_cycle cycle.copy
91
+ if not new_cycle.nil? and new_cycle.closed?
92
+ log.debug "Cycle completes at #{traversal_node.describe}."
93
+ new_cycle_key = new_cycle.to_settable
94
+ if cycles.has_key? new_cycle_key
95
+ log.debug "Already seen this cycle." if log.debug?
96
+ else
97
+ cycles[new_cycle_key] = new_cycle.onodes
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ # Unsolved neighbours imply a closed cyclic path.
105
+ # Are neighbours unsolved?
106
+ solved_neighbours = neighbours.reject{|node| node.height.nil?}
107
+ unless solved_neighbours.empty?
108
+ log.debug "We know the heights of neighbours #{solved_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
109
+ # Compute height from solved neighbours
110
+ height = solved_neighbours.map{|node| node.height}.max + 1
111
+ log.debug "Found height '#{height}' for #{traversal_node.describe}." if log.debug?
112
+ traversal_node.height = height
113
+ if by_height[height].nil?
114
+ by_height[height] = [traversal_node]
115
+ else
116
+ by_height[height].push(traversal_node)
117
+ end
118
+ end
119
+ # If no solved neighbours, leave unsolved
120
+
121
+ # Move out of retrace phase
122
+ nodes_in_retrace_phase.delete settable
123
+ log.debug "Finished retracing #{traversal_node.describe}." if log.debug?
124
+ next
125
+ end
126
+
127
+ # Move current node to retrace phase, before checking for retracing neighbours in case is own neighbour
128
+ nodes_in_retrace_phase << settable
129
+
130
+ # Look for currently retracing neighbours and initiate cycles
131
+ retracing_neighbours = neighbours.select{|node| nodes_in_retrace_phase.include? node.onode.to_settable}
132
+ if not retracing_neighbours.empty?
133
+ log.debug "Initiating cycles for neighbours #{retracing_neighbours.collect{|node| node.describe}.join(',')} currently retracing." if log.debug?
134
+ # initiate cycles for each retracing neighbour
135
+ retracing_neighbours.each{|node| traversal_node.initiate_cycle(node.onode)}
136
+ end
137
+
138
+ # Return node stack and push neighbours
139
+ stack.push traversal_node
140
+ log.debug "Pushing #{traversal_node.describe} in retrace mode." if log.debug?
141
+ neighbours.each do |node|
142
+ node_settable = node.onode.to_settable
143
+
144
+ # Note the parent of neighbour unless already known
145
+ nodes_in = node.nodes_in
146
+ if not nodes_in.any?{|nbr| nbr.onode == node.onode}
147
+ nodes_in.push traversal_node
148
+ end
149
+
150
+ if nodes_in_retrace_phase.include? node_settable
151
+ # A currently retracing neighbour implies a cycle, cut it off here
152
+ log.debug "Neighbour #{node.describe} is retracing. Not revisiting." if log.debug?
153
+ else
154
+ log.debug "Pushing neighbour #{node.describe}." if log.debug?
155
+ stack.push node
156
+ end
157
+ end
158
+ end
159
+ return by_height, cycles.values
160
+ end
161
+
162
+ class TraversalNode
163
+ attr_accessor :onode, :height, :nodes_in, :nodes_out
164
+
165
+ def describe
166
+ @onode.to_shorthand
167
+ end
168
+
169
+ def node_id
170
+ @onode.node_id
171
+ end
172
+ end
173
+
174
+ class CyclicTraversalNode < TraversalNode
175
+ attr_accessor :cycles
176
+
177
+ def initiate_cycle(onode)
178
+ cycle = CyclePath.new
179
+ cycle.onodes = [onode]
180
+ merge_unclosed_cycle cycle
181
+ end
182
+
183
+ def merge_unclosed_cycle(cycle)
184
+ return if cycle.closed?
185
+ if cycle.onodes.last == @onode
186
+ cycle.closed = true
187
+ else
188
+ cycle.onodes.unshift @onode
189
+ end
190
+ if @cycles.nil?
191
+ @cycles = [cycle]
192
+ else
193
+ @cycles.push(cycle)
194
+ end
195
+ return cycle
196
+ end
197
+
198
+ class CyclePath
199
+ attr_accessor :onodes, :closed
200
+
201
+ def closed?
202
+ return @closed == true
203
+ end
204
+
205
+ def copy
206
+ cycle = CyclePath.new
207
+ cycle.onodes = @onodes[0..-1]
208
+ cycle.closed = @closed
209
+ cycle
210
+ end
211
+
212
+ def to_settable
213
+ # return sorted list of onode settables
214
+ @onodes.collect{|onode| onode.to_settable}.sort do |a, b|
215
+ result = a[0] <=> b[0]
216
+ if result == 0
217
+ result = a[1] == Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode::START_IS_FIRST ? -1 : 1
218
+ end
219
+ result
220
+ end
221
+ end
222
+ end
223
+ end
224
+
225
+
226
+ # maximum paths
227
+ def max_paths_through(by_height)
228
+ max_paths_from = {}
229
+ by_height.each_with_index do |nodes, level|
230
+ log.debug "At height #{level}." if log.debug?
231
+ if level == 0 # tips
232
+ nodes.each do |node|
233
+ log.debug "Counted maximum of 1 path to #{node.describe}." if log.debug?
234
+ max_paths_from[node.onode.to_settable] = 1
235
+ end
236
+ next
237
+ end
238
+
239
+ nodes.each do |node|
240
+ settable = node.onode.to_settable
241
+ max_paths_from_neighbours = node.nodes_out.collect{|nbr| max_paths_from[nbr.onode.to_settable]}.reject{|n| n.nil?}
242
+ log.debug "Found neighbours of #{node.describe} with maximum paths #{max_paths_from_neighbours.join(',')}." if log.debug?
243
+ max_paths_from[settable] = max_paths_from_neighbours.reduce{|memo, num| memo+num}
244
+ log.debug "Counted maximum of #{max_paths_from[settable]} paths to #{node.describe}." if log.debug?
245
+ end
246
+ end
247
+
248
+ # Get the graph roots (which are nodes with no parents) and add max_paths_from for each to get graph total
249
+ root_keys = by_height.flatten.select{|node| node.nodes_in.empty? }.collect{|node| node.onode.to_settable}
250
+ log.debug "Found graph roots #{root_keys.collect{|settable| settable[0]}.join(',')} with maximum paths #{root_keys.collect{|key| max_paths_from[key]}.join(',')}." if log.debug?
251
+ max_paths = root_keys.map{|settable| max_paths_from[settable]}.reduce{|memo, num| memo+num}
252
+ log.debug "Counted maximum of #{max_paths} through graph." if log.debug?
253
+ return max_paths
254
+ end
255
+
256
+ # minimum paths
257
+ def min_paths_through(by_height)
258
+ live_nodes = Set.new
259
+ max_alive_counter = 0
260
+ by_height.each_with_index do |nodes, level|
261
+ log.debug "At height #{level}." if log.debug?
262
+ # nodes at current level become live
263
+ nodes.each do |node|
264
+ settable = node.onode.to_settable
265
+ log.debug "Setting #{node.describe} as live." if log.debug?
266
+ live_nodes << settable
267
+ end
268
+ if level > 0
269
+ #children of nodes at current level are no longer live
270
+ nodes.each do |node|
271
+ children = node.nodes_out
272
+ children.each do |nbr|
273
+ log.debug "Setting child #{nbr.describe} of live node #{node.describe} as inactive." if log.debug?
274
+ live_nodes.delete(nbr.onode.to_settable)
275
+ end
276
+ end
277
+ end
278
+
279
+ log.debug "There are currently #{live_nodes.length} nodes alive. Max is #{max_alive_counter}." if log.debug?
280
+ if live_nodes.length > max_alive_counter
281
+ #track the maximum live nodes at any level
282
+ log.debug "Updating max to #{live_nodes.length}." if log.debug?
283
+ max_alive_counter = live_nodes.length
284
+ end
285
+ end
286
+ return max_alive_counter
287
+ end
288
+
289
+ def find_oriented_edge_of_range(graph, nodes=nil)
290
+ nodes ||= graph.nodes
291
+ log.debug "Looking for oriented start and end points from #{nodes.collect{|n| n.node_id}.join(',')}" if log.debug?
292
+ nodes_all_directions = nodes.collect{|node| [[node, true], [node, false]]}.flatten(1)
293
+
294
+
295
+ # Find nodes and directions which are not reachable from other nodes within range
296
+ unreached_nodes = {}
297
+ nodes_all_directions.each do |node_and_direction|
298
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node_and_direction[0], node_and_direction[1]
299
+ unless unreached_nodes.has_key? onode.to_settable
300
+ unreached_nodes[onode.to_settable] = onode
301
+ end
302
+ onode.next_neighbours(graph).each do |oneigh|
303
+ unreached_nodes[oneigh.to_settable] = nil
304
+ end
305
+ end
306
+
307
+ entry_points = unreached_nodes.values.reject{|n| n.nil?}
308
+ log.debug "Found the following nodes for a particular orientation have no paths connecting to other nodes in range: #{entry_points.collect{|n| n.to_shorthand}.join(',')}" if log.debug?
309
+
310
+ # Start from an unreachable node, and trace all paths until the reverse end of other unreachable nodes
311
+ # are reached, which are then defined as 'end' nodes. When finished, choose a remaining non-end unreachable
312
+ # node and repeat, stopping paths if an already seen node is encountered.
313
+ seen_nodes = Set.new
314
+ start_onodes = []
315
+ end_onodes = []
316
+ stack = DS::Stack.new
317
+ entry_points.reverse.each do |onode|
318
+ stack.push onode
319
+ end
320
+
321
+ while current_node = stack.pop
322
+ log.debug "At node #{current_node.to_shorthand}" if log.debug?
323
+
324
+ node_id = current_node.node_id
325
+ if seen_nodes.include? node_id or not nodes.include? current_node.node
326
+ log.debug "Node has been seen or is out of range. Skipping..." if log.debug?
327
+ next
328
+ end
329
+ seen_nodes << node_id
330
+
331
+ current_unreached = unreached_nodes[current_node.to_settable]
332
+ log.debug "Is current unreached? #{current_unreached}" if log.debug?
333
+ if current_unreached
334
+ log.debug "Defining starting node #{current_unreached.to_shorthand}" if log.debug?
335
+ # Found start node
336
+ start_onodes.push current_unreached
337
+ else
338
+ reverse_unreached = unreached_nodes[current_node.reverse.to_settable]
339
+ log.debug "Is reverse unreached? #{reverse_unreached}" if log.debug?
340
+ if reverse_unreached
341
+ log.debug "Found ending node #{reverse_unreached.to_shorthand}" if log.debug?
342
+ # Found end node
343
+ end_onodes.push reverse_unreached
344
+ end
345
+ end
346
+
347
+ current_node.next_neighbours(graph).each do |onode|
348
+ log.debug "Adding neighbour #{onode.to_shorthand} to stack" if log.debug?
349
+ stack.push onode
350
+ end
351
+ end
352
+
353
+ return start_onodes, end_onodes
354
+ end
355
+ end