finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,68 @@
1
+ require 'set'
2
+
3
+ module Bio::AssemblyGraphAlgorithms
4
+ class CoverageBasedGraphFilter
5
+ include Bio::FinishM::Logging
6
+
7
+ # Remove all nodes from the graph that do not have sufficient coverage
8
+ # (i.e. possibly are sequencing error artefacts)
9
+ #
10
+ # Options:
11
+ # :whitelisted_sequences: provide an enumerable of sequence IDs, don't remove any nodes that have reads tracked to any of these IDs
12
+ #
13
+ # Returns nodes_removed, arcs_removed (as objects, in particular order)
14
+ def remove_low_coverage_nodes(graph, threshold, options = {})
15
+ graph.delete_nodes_if do |node|
16
+ deleting = false
17
+ if node.coverage and (node.coverage < threshold)
18
+ deleting = true
19
+ end
20
+
21
+ if deleting and options[:whitelisted_sequences] and !node.short_reads.nil?
22
+ options[:whitelisted_sequences].each do |seq_id|
23
+ if node.short_reads.collect{|r| r.read_id}.include?(seq_id)
24
+ deleting = false
25
+ log.debug "Preserving low coverage but whitelisted node: #{node.node_id}" if log.debug?
26
+ end
27
+ end
28
+ end
29
+ deleting
30
+ end
31
+ end
32
+ end
33
+
34
+ class ConnectivityBasedGraphFilter
35
+ include Bio::FinishM::Logging
36
+
37
+ # Remove parts of the graph that are unconnected to any whitelisted nodes
38
+ #
39
+ # options:
40
+ # :leash_length: don't explore more than this length away from each of the whitelisted_nodes. Defualt nil, no bounds
41
+ def remove_unconnected_nodes(graph, whitelisted_nodes, options={})
42
+ # Copy the whitelist
43
+ all_whitelisted_nodes = Set.new whitelisted_nodes
44
+
45
+ dij = Bio::AssemblyGraphAlgorithms::Dijkstra.new
46
+ dij_options = {:ignore_directions => true}
47
+ dij_options[:leash_length] = options[:leash_length]
48
+
49
+ # Depth-first search of all the connected parts looking for nodes to keep
50
+ whitelisted_nodes.each do |originally_whitelisted_node|
51
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
52
+ onode.node = originally_whitelisted_node
53
+ onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST #irrelevant which is first because :ignore_directions => true
54
+ log.debug "Testing for connectivity from #{onode.node.node_id}" if log.debug?
55
+
56
+ min_distances = dij.min_distances(graph, onode, dij_options)
57
+ min_distances.each do |key, distance|
58
+ all_whitelisted_nodes << graph.nodes[key[0]]
59
+ end
60
+ end
61
+
62
+ # Delete all nodes that aren't in the
63
+ graph.delete_nodes_if do |node|
64
+ !all_whitelisted_nodes.include?(node)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,63 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module Velvet
6
+ class Graph
7
+ # Do a depth first search starting at this oriented node,
8
+ # yielding an OrientedNodeTrail at each new node encountered.
9
+ # The new node is the last node in the yielded trail. The result
10
+ # of the yield tells whether this method whether to abandon
11
+ # searching further from this point (false)
12
+ # or keep going (true).
13
+ def depth_first_search(oriented_node)
14
+ log = Bio::Log::LoggerPlus['finishm']
15
+ discovered_oriented_nodes = Set.new
16
+
17
+ # Traverse through the graph, yielding at each new node
18
+ current_path = Bio::Velvet::Graph::OrientedNodeTrail.new
19
+ current_path.add_oriented_node oriented_node
20
+
21
+ stack = DS::Stack.new
22
+ stack.push current_path
23
+
24
+ # While there is more on the stack
25
+ while current_path = stack.pop
26
+ log.debug "Perhaps #{current_path.last}?" if log.debug?
27
+ if discovered_oriented_nodes.include?(path_to_searchable(current_path))
28
+ # Already seen this node, do nothing with it
29
+ log.debug "Skipping #{current_path.last} since that has already been seen" if log.debug?
30
+ next
31
+ else
32
+ log.debug "That's a new node, #{current_path.last}" if log.debug?
33
+ # Found a new node for the user to play with
34
+ discovered_oriented_nodes << path_to_searchable(current_path)
35
+
36
+ continue = yield current_path
37
+
38
+ # prep for next time if required.
39
+ if continue
40
+ # Sort node IDs to simplify testing
41
+ next_nodes = current_path.neighbours_of_last_node(self).sort{|n1, n2|
42
+ -(n1.node.node_id <=> n2.node.node_id)
43
+ }
44
+ next_nodes.each do |n|
45
+ path = current_path.copy
46
+ path.add_oriented_node n
47
+ stack.push path
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ private
55
+ # Set#include? doesn't pick up when the same OrientedNode is picked
56
+ # up twice independently, I don't think. So convert to an array first
57
+ def path_to_searchable(path)
58
+ last = path.last
59
+ return [last.node.node_id, last.first_side]
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,216 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::Dijkstra
5
+ include Bio::FinishM::Logging
6
+
7
+ # Return an array of DistancedOrientedNode objects, those reachable from
8
+ # the initial_oriented_node. options:
9
+ # :leash_length => max distance explored,
10
+ # can be set to nil to search indefinitely
11
+ # :ignore_directions: => true or false (default). If true, explore direction-independently.
12
+ # i.e. if 1s->3s and 2s->3s, then include 2s in the returned set of min_distances
13
+ # and continue exploring from 2s. Return each found node twice, once for each direction
14
+ # :neighbour_finder => an object that responds to #neighbours(oriented_node) and
15
+ # returns an array of Bio::FinishM::PairedEndNeighbourFinder::Neighbour objects
16
+ # default: just search using OrientedNode#next_neighbours
17
+ # :max_nodes => maximum number of nodes to return, to prevent out of control
18
+ # exploring of the graph. If there is plenty of nodes to explore, then the
19
+ # length of the returned hash is options[:max_nodes]+1 (+1 because the starting
20
+ # node is included). It will probably be longer if :ignore_directions == true, in that
21
+ # case the number of node_ids is constrained. It may also be longer if there is ties
22
+ # at the edges of the constrained exploration.
23
+ #
24
+ # Returns a Hash of [node_id, first_side] => distance
25
+ def min_distances(graph, initial_oriented_node, options={})
26
+ pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
27
+ first = DistancedOrientedNode.new
28
+ first.node = initial_oriented_node.node
29
+ first.first_side = initial_oriented_node.first_side
30
+ first.distance = 0
31
+ pqueue.push first, first.distance
32
+
33
+ to_return = {}
34
+ first_node = true
35
+ found_nodes = Set.new([first.node.node_id])
36
+
37
+ while min_distanced_node = pqueue.shift
38
+
39
+ # Add/overwrite the current one
40
+ to_return[min_distanced_node.to_settable] = min_distanced_node.distance
41
+
42
+ log.debug "Working from #{min_distanced_node.inspect}" if log.debug?
43
+
44
+ if options[:leash_length] and min_distanced_node.distance > options[:leash_length]
45
+ # we are passed leash length, and this is the nearest node. So we are finito.
46
+ log.debug "passed the leash length, cutting short our travels" if log.debug?
47
+ break
48
+ end
49
+
50
+ if options[:max_nodes] and found_nodes.length > options[:max_nodes]
51
+ log.debug "passed max-nodes threshold and have #{found_nodes.length} nodes" if log.debug?
52
+ # remove extras that may have been queued if we are over the limit
53
+ distances_direction_agnostic = {}
54
+ to_return.each do |key, distance|
55
+ prev = distances_direction_agnostic[key[0]]
56
+ if prev.nil? or prev > distance
57
+ distances_direction_agnostic[key[0]] = distance
58
+ end
59
+ end
60
+ if distances_direction_agnostic.length > options[:max_nodes]
61
+ sorted = distances_direction_agnostic.to_a.sort{|a,b| a[1]<=>b[1]}
62
+ # deal with ties i.e. at the edge there can be multiple neighbours
63
+ last_distance = sorted[options[:max_nodes]][1]
64
+
65
+ # only keep those nodes that are sufficiently close
66
+ to_return.select! do |key, distance|
67
+ distance <= last_distance
68
+ end
69
+ end
70
+ break
71
+ end
72
+
73
+ # Queue nodes after this one
74
+ current_distance = min_distanced_node.distance
75
+
76
+ # Find neighbouring nodes
77
+ neighbours = nil
78
+ if options[:neighbour_finder]
79
+ neighbours = options[:neighbour_finder].neighbours(min_distanced_node)
80
+ else
81
+ neighbours = min_distanced_node.next_neighbours(graph)
82
+ end
83
+
84
+ # explore each neighbour node
85
+ neighbours.each do |onode|
86
+ found_nodes << onode.node.node_id
87
+ new_distance = current_distance
88
+ if options[:neighbour_finder]
89
+ # Don't use negative distances as this algorithm cannot handle it, and it is impossible
90
+ # anyway
91
+ if onode.distance > 0
92
+ new_distance += onode.distance
93
+ else
94
+ new_distance += 0
95
+ end
96
+ end
97
+ unless first_node
98
+ new_distance += min_distanced_node.node.length_alone
99
+ end
100
+
101
+ if to_return[onode.to_settable] and to_return[onode.to_settable] <= new_distance
102
+ # We already know a shorter path to this neighbour, so ignore it
103
+ log.debug "Already seen this node at the same or shorter distance, going no further" if log.debug?
104
+ else
105
+ log.debug "Queuing new distance for neighbour: #{onode}: #{new_distance}" if log.debug?
106
+ # new shortest distance found. queue it up
107
+ distanced_node = DistancedOrientedNode.new
108
+ distanced_node.node = onode.node
109
+ distanced_node.first_side = onode.first_side
110
+ distanced_node.distance = new_distance
111
+ to_return[onode.to_settable] = new_distance
112
+ pqueue.push distanced_node, new_distance
113
+
114
+ if options[:ignore_directions]
115
+ reverse = DistancedOrientedNode.new
116
+ reverse.node = onode.node
117
+ reverse.first_side = onode.reverse.first_side
118
+ reverse.distance = new_distance
119
+ to_return[onode.to_settable] = new_distance
120
+ pqueue.push reverse, new_distance
121
+ end
122
+ end
123
+ end
124
+
125
+ first_node = false
126
+ end
127
+
128
+ # if ignore directions, then fixup the return so that each direction is included
129
+ if options[:ignore_directions]
130
+ new_to_return = {}
131
+ to_return.each do |key, distance|
132
+ keys = [
133
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
134
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST].collect do |direction|
135
+ [key[0], direction]
136
+ end
137
+ new_distance = keys.collect{|k| to_return[k]}.reject{|d| d.nil?}.min
138
+ keys.each do |key|
139
+ new_to_return[key] = new_distance
140
+ end
141
+ end
142
+ to_return = new_to_return
143
+ end
144
+
145
+ return to_return
146
+ end
147
+
148
+ # like #min_distances except explores in both directions
149
+ def min_distances_in_both_directions(graph, node, options={})
150
+ all_min_distances = {}
151
+ [
152
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
153
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST,
154
+ ].each do |direction|
155
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
156
+ min_distances = min_distances(graph, onode, options)
157
+ min_distances.each do |node_direction, distance|
158
+ current = all_min_distances[node_direction]
159
+ unless current and current > distance
160
+ all_min_distances[node_direction] = distance
161
+ end
162
+ end
163
+ end
164
+ return all_min_distances
165
+ end
166
+
167
+ def min_distances_from_many_nodes_in_both_directions(graph, nodes, options={})
168
+ all_min_distances = {}
169
+ nodes.each do |node|
170
+ [
171
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
172
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST,
173
+ ].each do |direction|
174
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
175
+ min_distances = min_distances(graph, onode, options)
176
+ min_distances.each do |node_direction, distance|
177
+ current = all_min_distances[node_direction]
178
+ unless current and current > distance
179
+ all_min_distances[node_direction] = distance
180
+ end
181
+ end
182
+ end
183
+ end
184
+ return all_min_distances
185
+ end
186
+
187
+ # An oriented node some distance from the origin of exploration
188
+ class DistancedOrientedNode
189
+ attr_accessor :node, :first_side, :distance
190
+
191
+ # Using Set object, often we want two separate objects to be considered equal even if
192
+ # they are distinct objects
193
+ def to_settable
194
+ [@node.node_id, @first_side]
195
+ end
196
+
197
+ # Which side of the node is not first?
198
+ def second_side
199
+ @first_side == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ?
200
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST :
201
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
202
+ end
203
+
204
+ def next_neighbours(graph)
205
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
206
+ onode.node = @node
207
+ onode.first_side = @first_side
208
+ return onode.next_neighbours(graph)
209
+ end
210
+
211
+ def inspect
212
+ "DistancedOrientedNode #{object_id}: node=#{@node.node_id} first=#{@first_side} distance=#{@distance}"
213
+ end
214
+ alias_method :to_s, :inspect
215
+ end
216
+ end
@@ -0,0 +1,253 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module AssemblyGraphAlgorithms
6
+ class Fluffer
7
+ include Bio::FinishM::Logging
8
+ BEYOND_LEASH_LENGTH_FATE = :beyond_leash_length
9
+ TERMINAL_NODE_FATE = :terminal_node
10
+ DEAD_END_FATE = :dead_end
11
+
12
+
13
+ class FlufferHalfResult
14
+ attr_accessor :golden_paths, :golden_path_fragments, :golden_path_fates
15
+
16
+ def initialize
17
+ @golden_paths = []
18
+ @golden_path_fragments = []
19
+ end
20
+ end
21
+
22
+ # Holds an array of paths, but also the fates of each of those paths - why were they halted?
23
+ class FlufferPathSet < Array
24
+ attr_accessor :fates
25
+
26
+ def initialize
27
+ @fates = []
28
+ end
29
+ end
30
+
31
+ # Return an array of array of paths.
32
+ def fluff(finishm_graph, leash_length, options={})
33
+ log.debug "Fluffing part 1.." if log.debug?
34
+ half_results = fluff_part1(finishm_graph, leash_length, options)
35
+
36
+ log.debug "Found fluff half results: #{half_results}" if log.debug?
37
+ log.debug "Fluffing part 2.." if log.debug?
38
+ return fluff_part2(half_results)
39
+ end
40
+
41
+ def fluff_part1(finishm_graph, leash_length, options={})
42
+ # Get a set of all probes so that they can be checked against
43
+ log.debug "Found #{finishm_graph.probe_nodes.reject{|node| node.nil?}.length} different probes that were in the final velvet graph" if log.debug?
44
+ half_results = []
45
+ graph = finishm_graph.graph
46
+
47
+ # For each probe in the graph
48
+ finishm_graph.probe_nodes.each_with_index do |node, probe_index|
49
+
50
+ # If the node is not found in the graph, then forget it
51
+ if node.nil?
52
+ half_results.push FlufferHalfResult.new
53
+
54
+ else # probe was found in the graph. Start finding them paths.
55
+ # start exploring the squid way
56
+ golden_paths = [] #These is the full paths found
57
+ golden_fragments = [] #paths to join up to the end
58
+ already_visited_nodes = Set.new #Nodes that have already been explored
59
+ golden_path_fates = [] #An array of how each of the paths were halted
60
+
61
+ golden_onodes = Set.new #Nodes that stop exploration
62
+ terminal_nodes = Set.new
63
+ # Add all the nodes that are being probed, because we don't want double exploration
64
+ # i.e. want probe1 => probe2, probe2 => probe3, but not probe1 => probe2 => probe3
65
+ finishm_graph.probe_nodes.each_with_index do |node, probe_index2|
66
+ # Don't add the node itself. This is a special case which is already handled below
67
+ unless probe_index == probe_index2 or node.nil?
68
+ terminal_nodes << finishm_graph.velvet_oriented_node(probe_index2).to_settable
69
+ end
70
+ end
71
+
72
+ stack = DS::Stack.new
73
+ stack.push finishm_graph.initial_path_from_probe(probe_index)
74
+
75
+ while current_path = stack.pop
76
+ log.debug "Perhaps #{current_path}?" if log.debug?
77
+ if golden_onodes.include?(current_path.last.to_settable)
78
+ # Probably a golden fragment, unless the node found is in the current path.
79
+ # if that is true, that's a loop along a golden path.
80
+ first_index = nil
81
+ current_path.each_with_index do |directed_node, i|
82
+ if directed_node.node == current_path.last.node and
83
+ directed_node.first_side == current_path.last.first_side
84
+ first_index = i
85
+ break
86
+ end
87
+ end
88
+
89
+ if first_index == current_path.length-1
90
+ # Found another golden path(s)
91
+ log.debug "Ran into a golden node" if log.debug?
92
+ golden_fragments.push current_path
93
+ current_path.each do |onode|
94
+ golden_onodes << onode.to_settable
95
+ end
96
+ else
97
+ # Loop found along a golden path or fragment
98
+ log.debug "Found a loop along a golden path: #{current_path}" if log.debug?
99
+ next
100
+ end
101
+ elsif already_visited_nodes.include?(current_path.last.to_settable) and
102
+ !terminal_nodes.include?(current_path.last.to_settable)
103
+ # Already seen this (non-golden) node, do nothing with it
104
+ log.debug "Skipping #{current_path.last} since that has already been seen" if log.debug?
105
+ next
106
+ else
107
+ if log.debug?
108
+ second_last_node = current_path[current_path.length-2]
109
+ second_last_node ||= 'initial_node'
110
+ log.debug "That's a new node, #{second_last_node}/#{current_path.last}" if log.debug?
111
+ end
112
+
113
+ # if we aren't beyond the leash. Presumably this
114
+ # doesn't happen much, but there is a possibility that the leash will
115
+ # prevent a real path being discovered. If there is two or more paths to a node
116
+ # and a path longer than the leash is discovered first, then all the
117
+ # nodes on that leash will be marked as discovered when they really aren't
118
+ # TODO: fix the above bug
119
+ if leash_length.nil? or current_path.length_in_bp < leash_length
120
+ # Found a new node for the user to play with
121
+ already_visited_nodes << current_path.last.to_settable
122
+
123
+ # Have we found a path to one of the other probes?
124
+ if terminal_nodes.include?(current_path.last.to_settable)
125
+ log.debug "Found the terminal node: #{current_path}" if log.debug?
126
+ golden_paths.push current_path
127
+ golden_path_fates.push TERMINAL_NODE_FATE
128
+
129
+ else # Use an else statement here because we want exploration to stop when other probes are encountered
130
+
131
+ # prep for next time
132
+ # Sort node IDs to simplify testing
133
+ next_nodes = current_path.neighbours_of_last_node(graph).sort{|n1, n2|
134
+ -(n1.node.node_id <=> n2.node.node_id)
135
+ }
136
+ next_nodes.each do |n|
137
+ path = current_path.copy
138
+ path.add_oriented_node n
139
+ log.debug "Pushing a path yet to be explored: #{path}" if log.debug?
140
+ stack.push path
141
+ end
142
+
143
+ # If we are at a dead end, add this whole path as a golden path. This
144
+ # is low coverage fluff, perhaps. Or it is nothing.
145
+ if next_nodes.empty?
146
+ log.debug "Found a dead end path: #{current_path}" if log.debug?
147
+ golden_paths.push current_path
148
+ golden_path_fates.push DEAD_END_FATE
149
+ current_path.each do |onode|
150
+ golden_onodes << onode.to_settable
151
+ end
152
+ end
153
+ end
154
+ else
155
+ if log.debug?
156
+ log.debug "Found a path that made it to the end of the leash, with path length #{current_path.length_in_bp} vs leash length #{leash_length}"
157
+ log.debug "Path given up on: #{current_path}"
158
+ log.debug "Path sequence given up on: #{current_path.sequence}"
159
+ log.debug "Node lengths: #{current_path.collect{|n| n.node.length_alone}.join(',')}"
160
+ end
161
+ # Record this past-leash-length path
162
+ golden_paths.push current_path
163
+ golden_path_fates.push BEYOND_LEASH_LENGTH_FATE
164
+ current_path.each do |onode|
165
+ golden_onodes << onode.to_settable
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ log.debug "Found #{golden_paths.length} golden paths and #{golden_fragments.length} golden fragments" if log.debug?
172
+ fluff_half_result = FlufferHalfResult.new
173
+ fluff_half_result.golden_paths = golden_paths
174
+ fluff_half_result.golden_path_fragments = golden_fragments
175
+ fluff_half_result.golden_path_fates = golden_path_fates
176
+
177
+ half_results.push fluff_half_result
178
+ end
179
+ end
180
+
181
+ return half_results
182
+ end
183
+
184
+ def fluff_part2(half_results)
185
+ all_all_paths = []
186
+
187
+ half_results.each do |segment_half_result|
188
+ # OK, so we've transformed the data into a state where there is
189
+ # at least one path through the data
190
+ # and tentacles hanging off various golden nodes.
191
+ # Now separate out the paths and return the array.
192
+ # First transform the data so it can be referenced by the end node
193
+ terminal_golden_nodes_to_paths = {}
194
+ segment_half_result.golden_path_fragments.each do |fragment|
195
+ l = fragment.last.to_settable
196
+ terminal_golden_nodes_to_paths[l] ||= []
197
+ terminal_golden_nodes_to_paths[l].push fragment
198
+ end
199
+ # Next backtrack through the paths
200
+ # Each path starts at the beginning and ends at a
201
+ # golden node
202
+ all_paths = FlufferPathSet.new
203
+ stack = DS::Stack.new
204
+ # Push the golden path and all paths that finish at the last node
205
+ segment_half_result.golden_paths.each_with_index do |golden_path, i|
206
+ stack.push [golden_path, 0, segment_half_result.golden_path_fates[i]]
207
+ end
208
+
209
+ while array = stack.pop
210
+ current_path = array[0]
211
+ num_to_ignore = array[1]
212
+ fate = array[2]
213
+
214
+ log.debug "Defragging #{current_path.to_s}/#{fate}, ignoring the last #{num_to_ignore} nodes" if log.debug?
215
+ all_paths.push current_path
216
+ all_paths.fates.push fate
217
+
218
+ # Iterate down this path, spawning new paths if there
219
+ # are paths that intersect
220
+ passed_nodes = []
221
+ current_path.trail.reverse.each_with_index do |onode, i|
222
+ unless i < num_to_ignore #ignore the last one(s) because they've already been handled
223
+ frags = terminal_golden_nodes_to_paths[onode.to_settable]
224
+ log.debug "Offshoots from #{onode}: #{frags.nil? ? '[]' : frags.collect{|f| f.collect{|n| n.node_id}.join(',')}.join(' and ')}" if log.debug?
225
+ if frags
226
+ frags.each do |fragment|
227
+ log.debug "Using an offshoot: #{fragment.to_s}" if log.debug?
228
+ # The fragment extends from the beginning to the golden node,
229
+ # the current node. So create a new complete path,
230
+ # And push it to the stack.
231
+ new_golden = fragment.copy
232
+ log.debug "Adding #{new_golden.to_s} and #{passed_nodes.collect{|n| n.node_id}}" if log.debug?
233
+ passed_nodes.reverse.each_with_index do |onode, i|
234
+ new_golden.add_oriented_node onode
235
+ end
236
+ log.debug "Enqueueing: #{new_golden.to_s} ignoring the last #{i+1} nodes" if log.debug?
237
+ stack.push [new_golden, i+1, fate]
238
+ end
239
+ end
240
+ end
241
+ passed_nodes.push onode
242
+ end
243
+ end
244
+
245
+ # All the paths are in an array, just a linear series of distinct paths
246
+ all_all_paths.push all_paths
247
+ end
248
+
249
+ return all_all_paths
250
+ end
251
+ end
252
+ end
253
+ end