finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,68 @@
1
+ require 'set'
2
+
3
+ module Bio::AssemblyGraphAlgorithms
4
+ class CoverageBasedGraphFilter
5
+ include Bio::FinishM::Logging
6
+
7
+ # Remove all nodes from the graph that do not have sufficient coverage
8
+ # (i.e. possibly are sequencing error artefacts)
9
+ #
10
+ # Options:
11
+ # :whitelisted_sequences: provide an enumerable of sequence IDs, don't remove any nodes that have reads tracked to any of these IDs
12
+ #
13
+ # Returns nodes_removed, arcs_removed (as objects, in particular order)
14
+ def remove_low_coverage_nodes(graph, threshold, options = {})
15
+ graph.delete_nodes_if do |node|
16
+ deleting = false
17
+ if node.coverage and (node.coverage < threshold)
18
+ deleting = true
19
+ end
20
+
21
+ if deleting and options[:whitelisted_sequences] and !node.short_reads.nil?
22
+ options[:whitelisted_sequences].each do |seq_id|
23
+ if node.short_reads.collect{|r| r.read_id}.include?(seq_id)
24
+ deleting = false
25
+ log.debug "Preserving low coverage but whitelisted node: #{node.node_id}" if log.debug?
26
+ end
27
+ end
28
+ end
29
+ deleting
30
+ end
31
+ end
32
+ end
33
+
34
+ class ConnectivityBasedGraphFilter
35
+ include Bio::FinishM::Logging
36
+
37
+ # Remove parts of the graph that are unconnected to any whitelisted nodes
38
+ #
39
+ # options:
40
+ # :leash_length: don't explore more than this length away from each of the whitelisted_nodes. Defualt nil, no bounds
41
+ def remove_unconnected_nodes(graph, whitelisted_nodes, options={})
42
+ # Copy the whitelist
43
+ all_whitelisted_nodes = Set.new whitelisted_nodes
44
+
45
+ dij = Bio::AssemblyGraphAlgorithms::Dijkstra.new
46
+ dij_options = {:ignore_directions => true}
47
+ dij_options[:leash_length] = options[:leash_length]
48
+
49
+ # Depth-first search of all the connected parts looking for nodes to keep
50
+ whitelisted_nodes.each do |originally_whitelisted_node|
51
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
52
+ onode.node = originally_whitelisted_node
53
+ onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST #irrelevant which is first because :ignore_directions => true
54
+ log.debug "Testing for connectivity from #{onode.node.node_id}" if log.debug?
55
+
56
+ min_distances = dij.min_distances(graph, onode, dij_options)
57
+ min_distances.each do |key, distance|
58
+ all_whitelisted_nodes << graph.nodes[key[0]]
59
+ end
60
+ end
61
+
62
+ # Delete all nodes that aren't in the
63
+ graph.delete_nodes_if do |node|
64
+ !all_whitelisted_nodes.include?(node)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,63 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module Velvet
6
+ class Graph
7
+ # Do a depth first search starting at this oriented node,
8
+ # yielding an OrientedNodeTrail at each new node encountered.
9
+ # The new node is the last node in the yielded trail. The result
10
+ # of the yield tells whether this method whether to abandon
11
+ # searching further from this point (false)
12
+ # or keep going (true).
13
+ def depth_first_search(oriented_node)
14
+ log = Bio::Log::LoggerPlus['finishm']
15
+ discovered_oriented_nodes = Set.new
16
+
17
+ # Traverse through the graph, yielding at each new node
18
+ current_path = Bio::Velvet::Graph::OrientedNodeTrail.new
19
+ current_path.add_oriented_node oriented_node
20
+
21
+ stack = DS::Stack.new
22
+ stack.push current_path
23
+
24
+ # While there is more on the stack
25
+ while current_path = stack.pop
26
+ log.debug "Perhaps #{current_path.last}?" if log.debug?
27
+ if discovered_oriented_nodes.include?(path_to_searchable(current_path))
28
+ # Already seen this node, do nothing with it
29
+ log.debug "Skipping #{current_path.last} since that has already been seen" if log.debug?
30
+ next
31
+ else
32
+ log.debug "That's a new node, #{current_path.last}" if log.debug?
33
+ # Found a new node for the user to play with
34
+ discovered_oriented_nodes << path_to_searchable(current_path)
35
+
36
+ continue = yield current_path
37
+
38
+ # prep for next time if required.
39
+ if continue
40
+ # Sort node IDs to simplify testing
41
+ next_nodes = current_path.neighbours_of_last_node(self).sort{|n1, n2|
42
+ -(n1.node.node_id <=> n2.node.node_id)
43
+ }
44
+ next_nodes.each do |n|
45
+ path = current_path.copy
46
+ path.add_oriented_node n
47
+ stack.push path
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ private
55
+ # Set#include? doesn't pick up when the same OrientedNode is picked
56
+ # up twice independently, I don't think. So convert to an array first
57
+ def path_to_searchable(path)
58
+ last = path.last
59
+ return [last.node.node_id, last.first_side]
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,216 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ class Bio::AssemblyGraphAlgorithms::Dijkstra
5
+ include Bio::FinishM::Logging
6
+
7
+ # Return an array of DistancedOrientedNode objects, those reachable from
8
+ # the initial_oriented_node. options:
9
+ # :leash_length => max distance explored,
10
+ # can be set to nil to search indefinitely
11
+ # :ignore_directions: => true or false (default). If true, explore direction-independently.
12
+ # i.e. if 1s->3s and 2s->3s, then include 2s in the returned set of min_distances
13
+ # and continue exploring from 2s. Return each found node twice, once for each direction
14
+ # :neighbour_finder => an object that responds to #neighbours(oriented_node) and
15
+ # returns an array of Bio::FinishM::PairedEndNeighbourFinder::Neighbour objects
16
+ # default: just search using OrientedNode#next_neighbours
17
+ # :max_nodes => maximum number of nodes to return, to prevent out of control
18
+ # exploring of the graph. If there is plenty of nodes to explore, then the
19
+ # length of the returned hash is options[:max_nodes]+1 (+1 because the starting
20
+ # node is included). It will probably be longer if :ignore_directions == true, in that
21
+ # case the number of node_ids is constrained. It may also be longer if there is ties
22
+ # at the edges of the constrained exploration.
23
+ #
24
+ # Returns a Hash of [node_id, first_side] => distance
25
+ def min_distances(graph, initial_oriented_node, options={})
26
+ pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
27
+ first = DistancedOrientedNode.new
28
+ first.node = initial_oriented_node.node
29
+ first.first_side = initial_oriented_node.first_side
30
+ first.distance = 0
31
+ pqueue.push first, first.distance
32
+
33
+ to_return = {}
34
+ first_node = true
35
+ found_nodes = Set.new([first.node.node_id])
36
+
37
+ while min_distanced_node = pqueue.shift
38
+
39
+ # Add/overwrite the current one
40
+ to_return[min_distanced_node.to_settable] = min_distanced_node.distance
41
+
42
+ log.debug "Working from #{min_distanced_node.inspect}" if log.debug?
43
+
44
+ if options[:leash_length] and min_distanced_node.distance > options[:leash_length]
45
+ # we are passed leash length, and this is the nearest node. So we are finito.
46
+ log.debug "passed the leash length, cutting short our travels" if log.debug?
47
+ break
48
+ end
49
+
50
+ if options[:max_nodes] and found_nodes.length > options[:max_nodes]
51
+ log.debug "passed max-nodes threshold and have #{found_nodes.length} nodes" if log.debug?
52
+ # remove extras that may have been queued if we are over the limit
53
+ distances_direction_agnostic = {}
54
+ to_return.each do |key, distance|
55
+ prev = distances_direction_agnostic[key[0]]
56
+ if prev.nil? or prev > distance
57
+ distances_direction_agnostic[key[0]] = distance
58
+ end
59
+ end
60
+ if distances_direction_agnostic.length > options[:max_nodes]
61
+ sorted = distances_direction_agnostic.to_a.sort{|a,b| a[1]<=>b[1]}
62
+ # deal with ties i.e. at the edge there can be multiple neighbours
63
+ last_distance = sorted[options[:max_nodes]][1]
64
+
65
+ # only keep those nodes that are sufficiently close
66
+ to_return.select! do |key, distance|
67
+ distance <= last_distance
68
+ end
69
+ end
70
+ break
71
+ end
72
+
73
+ # Queue nodes after this one
74
+ current_distance = min_distanced_node.distance
75
+
76
+ # Find neighbouring nodes
77
+ neighbours = nil
78
+ if options[:neighbour_finder]
79
+ neighbours = options[:neighbour_finder].neighbours(min_distanced_node)
80
+ else
81
+ neighbours = min_distanced_node.next_neighbours(graph)
82
+ end
83
+
84
+ # explore each neighbour node
85
+ neighbours.each do |onode|
86
+ found_nodes << onode.node.node_id
87
+ new_distance = current_distance
88
+ if options[:neighbour_finder]
89
+ # Don't use negative distances as this algorithm cannot handle it, and it is impossible
90
+ # anyway
91
+ if onode.distance > 0
92
+ new_distance += onode.distance
93
+ else
94
+ new_distance += 0
95
+ end
96
+ end
97
+ unless first_node
98
+ new_distance += min_distanced_node.node.length_alone
99
+ end
100
+
101
+ if to_return[onode.to_settable] and to_return[onode.to_settable] <= new_distance
102
+ # We already know a shorter path to this neighbour, so ignore it
103
+ log.debug "Already seen this node at the same or shorter distance, going no further" if log.debug?
104
+ else
105
+ log.debug "Queuing new distance for neighbour: #{onode}: #{new_distance}" if log.debug?
106
+ # new shortest distance found. queue it up
107
+ distanced_node = DistancedOrientedNode.new
108
+ distanced_node.node = onode.node
109
+ distanced_node.first_side = onode.first_side
110
+ distanced_node.distance = new_distance
111
+ to_return[onode.to_settable] = new_distance
112
+ pqueue.push distanced_node, new_distance
113
+
114
+ if options[:ignore_directions]
115
+ reverse = DistancedOrientedNode.new
116
+ reverse.node = onode.node
117
+ reverse.first_side = onode.reverse.first_side
118
+ reverse.distance = new_distance
119
+ to_return[onode.to_settable] = new_distance
120
+ pqueue.push reverse, new_distance
121
+ end
122
+ end
123
+ end
124
+
125
+ first_node = false
126
+ end
127
+
128
+ # if ignore directions, then fixup the return so that each direction is included
129
+ if options[:ignore_directions]
130
+ new_to_return = {}
131
+ to_return.each do |key, distance|
132
+ keys = [
133
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
134
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST].collect do |direction|
135
+ [key[0], direction]
136
+ end
137
+ new_distance = keys.collect{|k| to_return[k]}.reject{|d| d.nil?}.min
138
+ keys.each do |key|
139
+ new_to_return[key] = new_distance
140
+ end
141
+ end
142
+ to_return = new_to_return
143
+ end
144
+
145
+ return to_return
146
+ end
147
+
148
+ # like #min_distances except explores in both directions
149
+ def min_distances_in_both_directions(graph, node, options={})
150
+ all_min_distances = {}
151
+ [
152
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
153
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST,
154
+ ].each do |direction|
155
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
156
+ min_distances = min_distances(graph, onode, options)
157
+ min_distances.each do |node_direction, distance|
158
+ current = all_min_distances[node_direction]
159
+ unless current and current > distance
160
+ all_min_distances[node_direction] = distance
161
+ end
162
+ end
163
+ end
164
+ return all_min_distances
165
+ end
166
+
167
+ def min_distances_from_many_nodes_in_both_directions(graph, nodes, options={})
168
+ all_min_distances = {}
169
+ nodes.each do |node|
170
+ [
171
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
172
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST,
173
+ ].each do |direction|
174
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
175
+ min_distances = min_distances(graph, onode, options)
176
+ min_distances.each do |node_direction, distance|
177
+ current = all_min_distances[node_direction]
178
+ unless current and current > distance
179
+ all_min_distances[node_direction] = distance
180
+ end
181
+ end
182
+ end
183
+ end
184
+ return all_min_distances
185
+ end
186
+
187
+ # An oriented node some distance from the origin of exploration
188
+ class DistancedOrientedNode
189
+ attr_accessor :node, :first_side, :distance
190
+
191
+ # Using Set object, often we want two separate objects to be considered equal even if
192
+ # they are distinct objects
193
+ def to_settable
194
+ [@node.node_id, @first_side]
195
+ end
196
+
197
+ # Which side of the node is not first?
198
+ def second_side
199
+ @first_side == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ?
200
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST :
201
+ Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
202
+ end
203
+
204
+ def next_neighbours(graph)
205
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
206
+ onode.node = @node
207
+ onode.first_side = @first_side
208
+ return onode.next_neighbours(graph)
209
+ end
210
+
211
+ def inspect
212
+ "DistancedOrientedNode #{object_id}: node=#{@node.node_id} first=#{@first_side} distance=#{@distance}"
213
+ end
214
+ alias_method :to_s, :inspect
215
+ end
216
+ end
@@ -0,0 +1,253 @@
1
+ require 'ds'
2
+ require 'set'
3
+
4
+ module Bio
5
+ module AssemblyGraphAlgorithms
6
+ class Fluffer
7
+ include Bio::FinishM::Logging
8
+ BEYOND_LEASH_LENGTH_FATE = :beyond_leash_length
9
+ TERMINAL_NODE_FATE = :terminal_node
10
+ DEAD_END_FATE = :dead_end
11
+
12
+
13
+ class FlufferHalfResult
14
+ attr_accessor :golden_paths, :golden_path_fragments, :golden_path_fates
15
+
16
+ def initialize
17
+ @golden_paths = []
18
+ @golden_path_fragments = []
19
+ end
20
+ end
21
+
22
+ # Holds an array of paths, but also the fates of each of those paths - why were they halted?
23
+ class FlufferPathSet < Array
24
+ attr_accessor :fates
25
+
26
+ def initialize
27
+ @fates = []
28
+ end
29
+ end
30
+
31
+ # Return an array of array of paths.
32
+ def fluff(finishm_graph, leash_length, options={})
33
+ log.debug "Fluffing part 1.." if log.debug?
34
+ half_results = fluff_part1(finishm_graph, leash_length, options)
35
+
36
+ log.debug "Found fluff half results: #{half_results}" if log.debug?
37
+ log.debug "Fluffing part 2.." if log.debug?
38
+ return fluff_part2(half_results)
39
+ end
40
+
41
+ def fluff_part1(finishm_graph, leash_length, options={})
42
+ # Get a set of all probes so that they can be checked against
43
+ log.debug "Found #{finishm_graph.probe_nodes.reject{|node| node.nil?}.length} different probes that were in the final velvet graph" if log.debug?
44
+ half_results = []
45
+ graph = finishm_graph.graph
46
+
47
+ # For each probe in the graph
48
+ finishm_graph.probe_nodes.each_with_index do |node, probe_index|
49
+
50
+ # If the node is not found in the graph, then forget it
51
+ if node.nil?
52
+ half_results.push FlufferHalfResult.new
53
+
54
+ else # probe was found in the graph. Start finding them paths.
55
+ # start exploring the squid way
56
+ golden_paths = [] #These is the full paths found
57
+ golden_fragments = [] #paths to join up to the end
58
+ already_visited_nodes = Set.new #Nodes that have already been explored
59
+ golden_path_fates = [] #An array of how each of the paths were halted
60
+
61
+ golden_onodes = Set.new #Nodes that stop exploration
62
+ terminal_nodes = Set.new
63
+ # Add all the nodes that are being probed, because we don't want double exploration
64
+ # i.e. want probe1 => probe2, probe2 => probe3, but not probe1 => probe2 => probe3
65
+ finishm_graph.probe_nodes.each_with_index do |node, probe_index2|
66
+ # Don't add the node itself. This is a special case which is already handled below
67
+ unless probe_index == probe_index2 or node.nil?
68
+ terminal_nodes << finishm_graph.velvet_oriented_node(probe_index2).to_settable
69
+ end
70
+ end
71
+
72
+ stack = DS::Stack.new
73
+ stack.push finishm_graph.initial_path_from_probe(probe_index)
74
+
75
+ while current_path = stack.pop
76
+ log.debug "Perhaps #{current_path}?" if log.debug?
77
+ if golden_onodes.include?(current_path.last.to_settable)
78
+ # Probably a golden fragment, unless the node found is in the current path.
79
+ # if that is true, that's a loop along a golden path.
80
+ first_index = nil
81
+ current_path.each_with_index do |directed_node, i|
82
+ if directed_node.node == current_path.last.node and
83
+ directed_node.first_side == current_path.last.first_side
84
+ first_index = i
85
+ break
86
+ end
87
+ end
88
+
89
+ if first_index == current_path.length-1
90
+ # Found another golden path(s)
91
+ log.debug "Ran into a golden node" if log.debug?
92
+ golden_fragments.push current_path
93
+ current_path.each do |onode|
94
+ golden_onodes << onode.to_settable
95
+ end
96
+ else
97
+ # Loop found along a golden path or fragment
98
+ log.debug "Found a loop along a golden path: #{current_path}" if log.debug?
99
+ next
100
+ end
101
+ elsif already_visited_nodes.include?(current_path.last.to_settable) and
102
+ !terminal_nodes.include?(current_path.last.to_settable)
103
+ # Already seen this (non-golden) node, do nothing with it
104
+ log.debug "Skipping #{current_path.last} since that has already been seen" if log.debug?
105
+ next
106
+ else
107
+ if log.debug?
108
+ second_last_node = current_path[current_path.length-2]
109
+ second_last_node ||= 'initial_node'
110
+ log.debug "That's a new node, #{second_last_node}/#{current_path.last}" if log.debug?
111
+ end
112
+
113
+ # if we aren't beyond the leash. Presumably this
114
+ # doesn't happen much, but there is a possibility that the leash will
115
+ # prevent a real path being discovered. If there is two or more paths to a node
116
+ # and a path longer than the leash is discovered first, then all the
117
+ # nodes on that leash will be marked as discovered when they really aren't
118
+ # TODO: fix the above bug
119
+ if leash_length.nil? or current_path.length_in_bp < leash_length
120
+ # Found a new node for the user to play with
121
+ already_visited_nodes << current_path.last.to_settable
122
+
123
+ # Have we found a path to one of the other probes?
124
+ if terminal_nodes.include?(current_path.last.to_settable)
125
+ log.debug "Found the terminal node: #{current_path}" if log.debug?
126
+ golden_paths.push current_path
127
+ golden_path_fates.push TERMINAL_NODE_FATE
128
+
129
+ else # Use an else statement here because we want exploration to stop when other probes are encountered
130
+
131
+ # prep for next time
132
+ # Sort node IDs to simplify testing
133
+ next_nodes = current_path.neighbours_of_last_node(graph).sort{|n1, n2|
134
+ -(n1.node.node_id <=> n2.node.node_id)
135
+ }
136
+ next_nodes.each do |n|
137
+ path = current_path.copy
138
+ path.add_oriented_node n
139
+ log.debug "Pushing a path yet to be explored: #{path}" if log.debug?
140
+ stack.push path
141
+ end
142
+
143
+ # If we are at a dead end, add this whole path as a golden path. This
144
+ # is low coverage fluff, perhaps. Or it is nothing.
145
+ if next_nodes.empty?
146
+ log.debug "Found a dead end path: #{current_path}" if log.debug?
147
+ golden_paths.push current_path
148
+ golden_path_fates.push DEAD_END_FATE
149
+ current_path.each do |onode|
150
+ golden_onodes << onode.to_settable
151
+ end
152
+ end
153
+ end
154
+ else
155
+ if log.debug?
156
+ log.debug "Found a path that made it to the end of the leash, with path length #{current_path.length_in_bp} vs leash length #{leash_length}"
157
+ log.debug "Path given up on: #{current_path}"
158
+ log.debug "Path sequence given up on: #{current_path.sequence}"
159
+ log.debug "Node lengths: #{current_path.collect{|n| n.node.length_alone}.join(',')}"
160
+ end
161
+ # Record this past-leash-length path
162
+ golden_paths.push current_path
163
+ golden_path_fates.push BEYOND_LEASH_LENGTH_FATE
164
+ current_path.each do |onode|
165
+ golden_onodes << onode.to_settable
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ log.debug "Found #{golden_paths.length} golden paths and #{golden_fragments.length} golden fragments" if log.debug?
172
+ fluff_half_result = FlufferHalfResult.new
173
+ fluff_half_result.golden_paths = golden_paths
174
+ fluff_half_result.golden_path_fragments = golden_fragments
175
+ fluff_half_result.golden_path_fates = golden_path_fates
176
+
177
+ half_results.push fluff_half_result
178
+ end
179
+ end
180
+
181
+ return half_results
182
+ end
183
+
184
+ def fluff_part2(half_results)
185
+ all_all_paths = []
186
+
187
+ half_results.each do |segment_half_result|
188
+ # OK, so we've transformed the data into a state where there is
189
+ # at least one path through the data
190
+ # and tentacles hanging off various golden nodes.
191
+ # Now separate out the paths and return the array.
192
+ # First transform the data so it can be referenced by the end node
193
+ terminal_golden_nodes_to_paths = {}
194
+ segment_half_result.golden_path_fragments.each do |fragment|
195
+ l = fragment.last.to_settable
196
+ terminal_golden_nodes_to_paths[l] ||= []
197
+ terminal_golden_nodes_to_paths[l].push fragment
198
+ end
199
+ # Next backtrack through the paths
200
+ # Each path starts at the beginning and ends at a
201
+ # golden node
202
+ all_paths = FlufferPathSet.new
203
+ stack = DS::Stack.new
204
+ # Push the golden path and all paths that finish at the last node
205
+ segment_half_result.golden_paths.each_with_index do |golden_path, i|
206
+ stack.push [golden_path, 0, segment_half_result.golden_path_fates[i]]
207
+ end
208
+
209
+ while array = stack.pop
210
+ current_path = array[0]
211
+ num_to_ignore = array[1]
212
+ fate = array[2]
213
+
214
+ log.debug "Defragging #{current_path.to_s}/#{fate}, ignoring the last #{num_to_ignore} nodes" if log.debug?
215
+ all_paths.push current_path
216
+ all_paths.fates.push fate
217
+
218
+ # Iterate down this path, spawning new paths if there
219
+ # are paths that intersect
220
+ passed_nodes = []
221
+ current_path.trail.reverse.each_with_index do |onode, i|
222
+ unless i < num_to_ignore #ignore the last one(s) because they've already been handled
223
+ frags = terminal_golden_nodes_to_paths[onode.to_settable]
224
+ log.debug "Offshoots from #{onode}: #{frags.nil? ? '[]' : frags.collect{|f| f.collect{|n| n.node_id}.join(',')}.join(' and ')}" if log.debug?
225
+ if frags
226
+ frags.each do |fragment|
227
+ log.debug "Using an offshoot: #{fragment.to_s}" if log.debug?
228
+ # The fragment extends from the beginning to the golden node,
229
+ # the current node. So create a new complete path,
230
+ # And push it to the stack.
231
+ new_golden = fragment.copy
232
+ log.debug "Adding #{new_golden.to_s} and #{passed_nodes.collect{|n| n.node_id}}" if log.debug?
233
+ passed_nodes.reverse.each_with_index do |onode, i|
234
+ new_golden.add_oriented_node onode
235
+ end
236
+ log.debug "Enqueueing: #{new_golden.to_s} ignoring the last #{i+1} nodes" if log.debug?
237
+ stack.push [new_golden, i+1, fate]
238
+ end
239
+ end
240
+ end
241
+ passed_nodes.push onode
242
+ end
243
+ end
244
+
245
+ # All the paths are in an array, just a linear series of distinct paths
246
+ all_all_paths.push all_paths
247
+ end
248
+
249
+ return all_all_paths
250
+ end
251
+ end
252
+ end
253
+ end