finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'systemu'
6
+
7
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
8
+
9
+ # Parse command line options into the options hash
10
+ options = {
11
+ :logger => 'stderr',
12
+ :log_level => 'info',
13
+ }
14
+ o = OptionParser.new do |opts|
15
+ opts.banner = "
16
+ Usage: #{SCRIPT_NAME} -b <contigs_against_assembly.blast_outfmt6.csv>
17
+
18
+ Takes a set of contigs, and an assembly. Works out if there are any contigs where there is a blast hit spanning of the contigs using two of the assembly's contig ends.\n\n"
19
+
20
+ opts.on("--query FASTA_FILE", "new contigs fasta file [Required]") do |arg|
21
+ options[:query_file] = arg
22
+ end
23
+ opts.on("--blastdb FASTA_FILE_FORMATTED", "basename of makeblastdb output [Required]") do |arg|
24
+ options[:blastdb] = arg
25
+ end
26
+
27
+ # logger options
28
+ opts.separator "\nVerbosity:\n\n"
29
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
30
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
31
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
32
+ end; o.parse!
33
+ if ARGV.length != 0 or options[:query_file].nil? or options[:blastdb].nil?
34
+ $stderr.puts o
35
+ exit 1
36
+ end
37
+ # Setup logging
38
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
39
+
40
+
41
+ # Read in the blast file
42
+ blast_results = []
43
+ class BlastResult
44
+ attr_accessor :qseqid, :sseqid, :pident, :length, :mismatch, :gapopen, :qstart, :qend, :sstart, :subject_end, :evalue, :bitscore, :query_length, :subject_length
45
+
46
+ attr_accessor :cutoff_inwards
47
+
48
+ def initialize
49
+ @cutoff_inwards = 500
50
+ end
51
+
52
+ def hits_end_of_subject?
53
+ @subject_end >= @subject_length-@cutoff_inwards and @length >= 100
54
+ end
55
+
56
+ def hits_start_of_subject?
57
+ @sstart <= @cutoff_inwards and @length >= 100
58
+ end
59
+
60
+ def hits_end_of_query?
61
+ @qend >= @query_length-@cutoff_inwards and @length >= 100
62
+ end
63
+
64
+ def hits_start_of_query?
65
+ @qstart <= @cutoff_inwards and @length >= 100
66
+ end
67
+ end
68
+
69
+ status, blast_output, stderr = systemu "blastn -query #{options[:query_file].inspect} -db #{options[:blastdb].inspect} -outfmt '6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen' -evalue 1e-5"
70
+ raise stderr unless stderr==""
71
+ raise "bad status running blast" unless status.exitstatus == 0
72
+ log.debug "Finished running blast, presumably successfully"
73
+
74
+ blast_output.each_line do |line|
75
+ res = BlastResult.new
76
+ row = line.chomp.split "\t"
77
+ [:qseqid, :sseqid, :pident, :length, :mismatch, :gapopen, :qstart,
78
+ :qend, :sstart, :subject_end, :evalue, :bitscore,
79
+ :query_length, :subject_length].each_with_index do |attr, i|
80
+ res.send "#{attr}=".to_sym, row[i]
81
+ end
82
+ [:length, :mismatch, :gapopen, :qstart,
83
+ :qend, :sstart, :subject_end,:query_length, :subject_length].each do |attr|
84
+ res.send "#{attr}=".to_sym, res.send(attr).to_i
85
+ end
86
+ [:pident, :evalue, :bitscore].each do |attr|
87
+ res.send "#{attr}=".to_sym, res.send(attr).to_f
88
+ end
89
+
90
+ blast_results.push res
91
+ end
92
+ log.info "Parsed #{blast_results.length} blast results e.g. #{blast_results[0].inspect}"
93
+
94
+
95
+ query_to_blast_results = {}
96
+ hit_to_blast_results = {}
97
+ blast_results.each do |result|
98
+ query_to_blast_results[result.qseqid] ||= []
99
+ query_to_blast_results[result.qseqid].push result
100
+
101
+ hit_to_blast_results[result.sseqid] ||= []
102
+ hit_to_blast_results[result.sseqid].push result
103
+ end
104
+
105
+ # For each query sequence, does it map to the ends of both contigs
106
+ header = %w(query subject1 subject2 qstart1? qend1? sstart1? send1? qstart2? qend2? sstart2? send2?).join("\t")
107
+ query_to_blast_results.each do |query_id, hits|
108
+ query_length = hits[0].query_length
109
+ keepers = []
110
+
111
+ hits.each do |hit|
112
+ # perfect if it hits the start or the end (but not both) of both the query and the subject, unless it is circular
113
+ if hit.hits_start_of_query? ^ hit.hits_end_of_query? and
114
+ hit.hits_start_of_subject? ^ hit.hits_end_of_subject?
115
+ keepers.push hit
116
+ elsif hit.hits_start_of_query? or hit.hits_end_of_query? or
117
+ hit.hits_start_of_subject? or hit.hits_end_of_subject?
118
+ log.info "There's a half-correct hit for #{query_id}: qstart? #{hit.hits_start_of_query?} qend #{hit.hits_end_of_query?} "+
119
+ "sstart #{hit.hits_start_of_subject?} send #{hit.hits_end_of_subject?}, to subject sequence #{hit.sseqid}"
120
+ end
121
+ end
122
+
123
+ if keepers.empty?
124
+ log.debug "no latchings found for #{query_id}"
125
+ elsif keepers.length == 1
126
+ log.info "Query #{query_id} only latches on to a single end, maybe manually inspect"
127
+ elsif keepers.length == 2
128
+ log.debug "Query #{query_id} has 2 keepers!"
129
+ q = keepers.collect{|hit| hit.hits_start_of_query?}.join
130
+ s = keepers.collect{|hit| hit.hits_start_of_subject?}.join
131
+ if (q == 'truefalse' or q == 'falsetrue') and
132
+ (s == 'truefalse' or s == 'falsetrue')
133
+ outs = (0..1).collect{|i|
134
+ [
135
+ keepers[i].hits_start_of_query?,
136
+ keepers[i].hits_end_of_query?,
137
+ keepers[i].hits_start_of_subject?,
138
+ keepers[i].hits_end_of_subject?,
139
+ ]
140
+ }.flatten
141
+ unless header.nil?
142
+ puts header
143
+ header = nil
144
+ end
145
+ puts [query_id, keepers[0].sseqid, keepers[1].sseqid, outs].flatten.join("\t")
146
+ else
147
+ log.info "Query #{query_id} has 2 keepers, but they are fighting it seems"
148
+ end
149
+ else
150
+ log.info "More than 2 keepers found for #{query_id}, manual inspection likely required"
151
+ end
152
+ end
153
+
@@ -0,0 +1,143 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'bio-velvet'
6
+ require 'pp'
7
+
8
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
9
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
10
+ require 'priner'
11
+
12
+ # Parse command line options into the options hash
13
+ options = {
14
+ :logger => 'stderr',
15
+ :log_level => 'info',
16
+ }
17
+ global = OptionParser.new do |opts|
18
+ opts.banner = "
19
+ Usage: #{SCRIPT_NAME} <command> [<arguments>]
20
+
21
+ FinishM is a collection of tasks related to assembly and metagenome assembly. Available commands:
22
+
23
+ wander\tTry to connect contigs (experimental)
24
+ gapfill\tFill assembly gaps (N characters) (experimental)
25
+ explore\tWhat happens in the graph beyond the end of my contig(s)? (experimental)
26
+ visualise\tVisualise the DeBruijn graph (experimental)
27
+
28
+ Commands for PCR finishing:
29
+
30
+ primers\tdesign primers for multi-primer multi-lane PCR experimental setup (experimental)
31
+ primers_check\ttest a set of primers for incompatibility (experimental)
32
+ finish\tprocess results from multi-primer multi-lane PCR experimental setup (experimental)
33
+
34
+ Utility modes:
35
+
36
+ sequence\tGiven a defined sequence of nodes, what is the corresponding sequence?
37
+ count_paths\tCount the number of paths through assembly graph
38
+ find_orfs\tFind possible open reading frames in assembly graph
39
+ \n\n"
40
+ end
41
+ global.order!
42
+
43
+ operator = nil
44
+ subcommands = {
45
+ 'primers' => lambda {OptionParser.new do |opts|
46
+ operator = Bio::FinishM::Primers.new
47
+ operator.add_options(opts, options)
48
+ end},
49
+ 'primers_check' => lambda {OptionParser.new do |opts|
50
+ operator = Bio::FinishM::Primers::Checker.new
51
+ operator.add_options(opts, options)
52
+ end},
53
+ 'finish' => lambda {OptionParser.new do |opts|
54
+ operator = Bio::FinishM::Finisher.new
55
+ operator.add_options(opts, options)
56
+ end},
57
+ 'gapfill' => lambda {OptionParser.new do |opts|
58
+ operator = Bio::FinishM::GapFiller.new
59
+ operator.add_options(opts, options)
60
+ end},
61
+ 'wander' => lambda {OptionParser.new do |opts|
62
+ operator = Bio::FinishM::Wanderer.new
63
+ operator.add_options(opts, options)
64
+ end},
65
+ 'fluff' => lambda {OptionParser.new do |opts|
66
+ operator = Bio::FinishM::Fluff.new
67
+ operator.add_options(opts, options)
68
+ end},
69
+ 'explore' => lambda {OptionParser.new do |opts|
70
+ operator = Bio::FinishM::Explorer.new
71
+ operator.add_options(opts, options)
72
+ end},
73
+ 'assemble' => lambda {OptionParser.new do |opts|
74
+ operator = Bio::FinishM::Assembler.new
75
+ operator.add_options(opts, options)
76
+ end},
77
+ 'visualise' => lambda {OptionParser.new do |opts|
78
+ operator = Bio::FinishM::Visualise.new
79
+ operator.add_options(opts, options)
80
+ end},
81
+ 'sequence' => lambda {OptionParser.new do |opts|
82
+ operator = Bio::FinishM::Sequence.new
83
+ operator.add_options(opts, options)
84
+ end},
85
+ 'roundup' => lambda {OptionParser.new do |opts|
86
+ operator = Bio::FinishM::RoundUp.new
87
+ operator.add_options(opts, options)
88
+ end},
89
+ 'count_paths' => lambda {OptionParser.new do |opts|
90
+ operator = Bio::FinishM::PathCounter.new
91
+ operator.add_options(opts, options)
92
+ end},
93
+ 'find_orfs' => lambda {OptionParser.new do |opts|
94
+ operator = Bio::FinishM::ORFsFinder.new
95
+ operator.add_options(opts, options)
96
+ end}
97
+ }
98
+
99
+ subcommand = nil
100
+ if ARGV[0] and ARGV[0].match(/finishm$/) #if debugging e.g. 'pry finishm wander ..'. But be careful of finishm no arguments
101
+ subcommand = ARGV.shift
102
+ subcommand = ARGV.shift
103
+ else
104
+ # not debugging
105
+ subcommand = ARGV.shift
106
+ end
107
+
108
+ if subcommand.nil?
109
+ $stderr.puts global
110
+ exit 1
111
+ elsif !subcommands[subcommand]
112
+ $stderr.puts "Unrecognized subcommand: #{subcommand}"
113
+ exit 1
114
+ else
115
+ # Add options specific for subcommand
116
+ opts = subcommands[subcommand].call
117
+
118
+ # Add global logging options
119
+ opts.separator "\nVerbosity:\n\n"
120
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
121
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
122
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
123
+ opts.separator "\n"
124
+ opts.parse!
125
+
126
+ # Setup logging
127
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME); log.outputters.each {|o| o.formatter = Log4r::PatternFormatter.new(:pattern => "%5l %c %d: %m", :date_pattern => '%d/%m %T')}
128
+ Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
129
+
130
+ log.debug "Running FinishM #{subcommand} with these options: #{PP.pp(options, "").gsub(/\n$/,'')}"
131
+
132
+ error_message = operator.validate_options(options, ARGV)
133
+ if error_message.nil? or error_message == false
134
+ operator.run options, ARGV
135
+ else
136
+ $stderr.puts
137
+ $stderr.puts "ERROR parsing options: #{error_message}"
138
+ $stderr.puts
139
+ $stderr.puts opts
140
+ exit 1
141
+ end
142
+ end
143
+
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+
6
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
7
+
8
+ # Parse command line options into the options hash
9
+ options = {
10
+ :logger => 'stderr',
11
+ :log_level => 'info',
12
+ }
13
+ o = OptionParser.new do |opts|
14
+ opts.banner = "
15
+ Usage: #{SCRIPT_NAME} <arguments>
16
+
17
+ Description of what this program does...\n\n"
18
+
19
+ opts.on("--velvet-pregraph GRAPH_FILE", "PreGraph file output from velveth [required]") do |arg|
20
+ options[:velvet_pregraph_file] = arg
21
+ end
22
+
23
+ # logger options
24
+ opts.separator "\nVerbosity:\n\n"
25
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
26
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
27
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
28
+ end; o.parse!
29
+ if ARGV.length != 0 or options[:velvet_pregraph_file].nil?
30
+ $stderr.puts o
31
+ exit 1
32
+ end
33
+ # Setup logging
34
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
35
+
36
+
37
+ # Read in the velvet graph
38
+ log.info "Parsing graph from #{options[:velvet_pregraph_file]}"
39
+ graph = Bio::Velvet::Graph.parse_from_file(options[:velvet_pregraph_file])
40
+ log.info "Finished parsing graph, found #{graph.number_of_nodes} nodes"
41
+
42
+ # Log the number of nodes and arcs in the current graph
43
+
44
+ # Read in the fasta file of immutable nodes, and extract the two most immutable
45
+ # Log that they are found
46
+
47
+ # Determine that the graph is connected or not between the two most immutable nodes, using some graph theoretic algorithm
48
+ # If the graph is not connected, then there is no hope, exit
49
+
50
+ # Go through the graph to get a list of the cap nodes
51
+ # Log the number of cap nodes found
52
+
53
+ # Trim off all the cap nodes back to cross nodes, keeping track of the lengths
54
+
55
+ # Print the graph in graphviz dot format
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'bio-velvet'
6
+ require 'tempfile'
7
+ require 'pp'
8
+
9
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
10
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
11
+ require 'priner'
12
+
13
+ # Parse command line options into the options hash
14
+ options = {
15
+ :logger => 'stderr',
16
+ :log_level => 'info',
17
+ :velvet_kmer_size => 43,#TODO: these options should be exposed to the user, and perhaps not guessed at
18
+ :contig_end_length => 200,
19
+ :output_assembly_path => '/tmp/velvetAssembly',
20
+ :graph_search_leash_length => 3000,
21
+ :assembly_coverage_cutoff => 1.5,
22
+ }
23
+ o = OptionParser.new do |opts|
24
+ opts.banner = "
25
+ Usage: #{SCRIPT_NAME} --reads <read_file> --contig <contig_file>
26
+
27
+ Takes a set of reads and a contig that contains gap characters. Then it tries to fill in
28
+ these N characters. It is possible that there is multiple ways to close the gap - in that case
29
+ each is reported. \n\n"
30
+
31
+
32
+ opts.on("--reads FILE", "gzipped fastq file of reads to perform the gap closing with [required]") do |arg|
33
+ options[:reads_file] = arg
34
+ end
35
+ opts.on("--contig FILE", "fasta file of single contig containing Ns that are to be closed [required]") do |arg|
36
+ options[:contig_file] = arg
37
+ end
38
+ opts.on("--output-trails-fasta PATH", "Output found paths to this file in fasta format [default: off]") do |arg|
39
+ options[:overall_trail_output_fasta_file] = arg
40
+ end
41
+
42
+ opts.separator "\nOptional arguments:\n\n"
43
+ opts.on("--overhang NUM", "Start assembling this far from the gap [default: #{options[:contig_end_length]}]") do |arg|
44
+ options[:contig_end_length] = arg.to_i
45
+ end
46
+ opts.on("--start OFFSET", "Start trying to fill from this position in the contig, requires --stop [default: found from position of Ns}]") do |arg|
47
+ options[:start_offset] = arg.to_i-1
48
+ end
49
+ opts.on("--stop OFFSET", "Start trying to fill to this position in the contig, requires --start [default: found from position of Ns}]") do |arg|
50
+ options[:end_offset] = arg.to_i-1
51
+ end
52
+ opts.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg|
53
+ options[:output_graph_png] = arg
54
+ end
55
+ opts.on("--assembly-svg PATH", "Output assembly as an SVG file [default: off]") do |arg|
56
+ options[:output_graph_svg] = arg
57
+ end
58
+ opts.on("--assembly-dot PATH", "Output assembly as an DOT file [default: off]") do |arg|
59
+ options[:output_graph_dot] = arg
60
+ end
61
+ opts.on("--velvet-kmer KMER", "kmer size to use with velvet [default: #{options[:velvet_kmer_size]}]") do |arg|
62
+ options[:velvet_kmer_size] = arg.to_i
63
+ end
64
+
65
+ opts.separator "\nDebug-related options:\n\n"
66
+
67
+
68
+
69
+ # logger options
70
+ opts.separator "\nVerbosity:\n\n"
71
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
72
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
73
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
74
+ end; o.parse!
75
+ if ARGV.length != 0 or options[:reads_file].nil? or options[:contig_file].nil? or options[:overall_trail_output_fasta_file].nil?
76
+ $stderr.puts o
77
+ exit 1
78
+ end
79
+ # Setup logging
80
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
81
+ Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
82
+ log.outputters[0].formatter = Log4r::PatternFormatter.new(:pattern => "%5l %c %d: %m", :date_pattern => '%d/%m %T')
83
+
84
+ log.debug "Running finishm with options: #{PP.pp(options, "").gsub(/\n$/,'')}" if log.debug?
85
+
86
+ # Find where the Ns are
87
+ n_region_start = nil
88
+ n_region_end = nil
89
+ sequence = nil
90
+ Bio::FlatFile.foreach(options[:contig_file]) do |seq|
91
+ if sequence
92
+ raise Exception, "Sorry, this script can only handle single sequences to be gap filled at the moment"
93
+ end
94
+
95
+ sequence = seq.seq
96
+
97
+ if options[:start_offset] and options[:end_offset]
98
+ log.info "Trying to gap fill from #{options[:start_offset]+1} to #{options[:end_offset]+1}"
99
+ n_region_start = options[:start_offset]
100
+ n_region_end = options[:end_offset]
101
+ else
102
+ log.info "Determining where to fill from the presence of Ns"
103
+
104
+ matches = sequence.match(/(N+)/i)
105
+ if !matches
106
+ raise "Unable to find any gaps in the input sequence. That was a bit too easy.."
107
+ end
108
+ n_region_start = matches.offset(0)[0]
109
+ n_region_end = n_region_start + matches[1].length
110
+ log.info "Detected a gap between #{n_region_start} and #{n_region_end}"
111
+ end
112
+
113
+ # Check to make sure we are sufficiently distant from the ends
114
+ if n_region_start < options[:contig_end_length] or
115
+ sequence.length - n_region_end < options[:contig_end_length]
116
+ raise "The gap is too close to the end of the contig, sorry"
117
+ end
118
+ end
119
+
120
+ # Do the assembly
121
+ graph = nil
122
+ if options[:previously_serialized_parsed_graph_file].nil?
123
+ velvet_result = nil
124
+ if options[:previous_assembly].nil? #If assembly has not already been carried out
125
+ Tempfile.open('anchors.fa') do |tempfile|
126
+ tempfile.puts ">anchor1"
127
+ tempfile.puts sequence[n_region_start-options[:contig_end_length]-1...n_region_start]
128
+ tempfile.puts ">anchor2"
129
+ #Have to be in reverse, because the node finder finds the node at the start of the read, not the end
130
+ fwd2 = Bio::Sequence::NA.new(sequence[n_region_end..(n_region_end+options[:contig_end_length])])
131
+ tempfile.puts fwd2.reverse_complement.to_s
132
+ tempfile.close
133
+ log.debug "Inputting anchors into the assembly: #{File.open(tempfile.path).read}" if log.debug?
134
+
135
+ log.info "Assembling sampled reads with velvet"
136
+ # Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
137
+ velvet_result = Bio::Velvet::Runner.new.velvet(
138
+ options[:velvet_kmer_size],
139
+ "-short #{tempfile.path} -short2 -fastq.gz #{options[:reads_file]}",
140
+ "-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff]}",
141
+ :output_assembly_path => options[:output_assembly_path]
142
+ )
143
+ if log.debug?
144
+ log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
145
+ log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
146
+ log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
147
+ log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
148
+ end
149
+ log.info "Finished running assembly"
150
+ end
151
+ else
152
+ log.info "Using previous assembly stored at #{options[:previous_assembly]}"
153
+ velvet_result = Bio::Velvet::Result.new
154
+ velvet_result.result_directory = options[:previous_assembly]
155
+ end
156
+
157
+ log.info "Parsing the graph output from velvet"
158
+ graph = Bio::Velvet::Graph.parse_from_file(File.join velvet_result.result_directory, 'LastGraph')
159
+ log.info "Finished parsing graph: found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
160
+
161
+ if options[:serialize_parsed_graph_file]
162
+ log.info "Storing a binary version of the graph file for later use at #{options[:serialize_parsed_graph_file]}"
163
+ File.open(options[:serialize_parsed_graph_file],'wb') do |f|
164
+ f.print Marshal.dump(graph)
165
+ end
166
+ log.info "Stored a binary representation of the velvet graph at #{options[:serialize_parsed_graph_file]}"
167
+ end
168
+
169
+ if options[:assembly_coverage_cutoff]
170
+ log.info "Removing low-coverage nodes from the graph (less than #{options[:assembly_coverage_cutoff]})"
171
+ cutoffer = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
172
+ deleted_nodes, deleted_arcs = cutoffer.remove_low_coverage_nodes(graph, options[:assembly_coverage_cutoff], :whitelisted_sequences => [1,2])
173
+
174
+ log.info "Removed #{deleted_nodes.length} nodes and #{deleted_arcs.length} arcs from the graph due to low coverage"
175
+ log.info "Now there is #{graph.nodes.length} nodes and #{graph.arcs.length} arcs remaining"
176
+ end
177
+ else
178
+ log.info "Restoring graph file from #{options[:previously_serialized_parsed_graph_file]}.."
179
+ graph = Marshal.load(File.open(options[:previously_serialized_parsed_graph_file]))
180
+ log.info "Restoration complete"
181
+ end
182
+
183
+
184
+ # Find the anchor nodes again
185
+ finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
186
+ log.info "Finding node representing the end of the each contig"
187
+ i = 1
188
+ anchor_sequence_ids = [1,2]
189
+ start_node, start_node_forward = finder.find_unique_node_with_sequence_id(graph, 1)
190
+ end_node, end_node_forward = finder.find_unique_node_with_sequence_id(graph, 2)
191
+ if start_node and end_node
192
+ log.info "Found both anchoring nodes in the graph: #{start_node.node_id}/#{start_node_forward} and #{end_node.node_id}/#{end_node_forward}"
193
+ else
194
+ log.error "start node not found" if start_node.nil?
195
+ log.error "end node not found" if end_node.nil?
196
+ raise "Unable to find both anchor reads from the assembly, cannot continue. This is probably an error with this script, not you."
197
+ end
198
+
199
+ log.info "Removing nodes unconnected to either the start or the end from the graph.."
200
+ original_num_nodes = graph.nodes.length
201
+ original_num_arcs = graph.arcs.length
202
+ filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
203
+ filter.remove_unconnected_nodes(graph, [start_node, end_node])
204
+ log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs"
205
+
206
+
207
+ if options[:output_graph_png]
208
+ log.info "Converting assembly to a graphviz PNG"
209
+ viser = Bio::Assembly::ABVisualiser.new
210
+ gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id})
211
+ gv.output :png => options[:output_graph_png], :use => :neato
212
+ end
213
+ if options[:output_graph_svg]
214
+ log.info "Converting assembly to a graphviz SVG"
215
+ viser = Bio::Assembly::ABVisualiser.new
216
+ gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id})
217
+ gv.output :svg => options[:output_graph_svg], :use => :neato
218
+ end
219
+ if options[:output_graph_dot]
220
+ log.info "Converting assembly to a graphviz DOT"
221
+ viser = Bio::Assembly::ABVisualiser.new
222
+ gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id, :digraph => false})
223
+ gv.output :dot => options[:output_graph_dot]
224
+ end
225
+
226
+
227
+
228
+ log.info "Searching for trails between the nodes within the assembly graph"
229
+ cartographer = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
230
+ trails = cartographer.find_trails_between_nodes(graph, start_node, end_node, options[:graph_search_leash_length], start_node_forward)
231
+ log.info "Found #{trails.length} trail(s) in total"
232
+
233
+
234
+ log.debug "Outputing trail sequences"
235
+ File.open(options[:overall_trail_output_fasta_file],'w') do |f|
236
+ trails.each_with_index do |trail, i|
237
+ f.puts ">trail#{i+1}"
238
+ f.puts trail.sequence
239
+ end
240
+ end
241
+