finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cde22b93c4ebc35cf5a598e1d4264104743e0168
4
+ data.tar.gz: 5e9b6e324cbe45329886c5fbf3e48236a76c0aa8
5
+ SHA512:
6
+ metadata.gz: 5e2df4e2e7f9e1173d607bb7980189f942ca48980728945c57e50d0b9a15e110251ac22a93b289c94777d8277f79af381d2a4aa6957da981fd5b705c69a8674a
7
+ data.tar.gz: d3454311875a095f19da752d016b0bc954af6829d627480e416c071296103b59b9c448df335f53801d962f4a1c0433591ef16ce4bd5cc8e1f6c95133855f1435
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,3 @@
1
+ [submodule "ext/src"]
2
+ path = ext/src
3
+ url = https://github.com/wwood/velvet
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,31 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem 'bio-ipcress'
6
+ gem 'bio-logger'
7
+ gem 'bio'
8
+ gem 'progressbar'
9
+ gem 'bio-samtools'
10
+ gem 'ruby-graphviz'
11
+ gem 'ds'
12
+ gem 'hopcsv', '~> 0.4'
13
+ gem 'bio-velvet', '~>0.6'
14
+ gem 'bio-velvet_underground', '~>0.3'
15
+ gem 'ruby-progressbar'
16
+ gem 'yargraph', '~>0.0.4'
17
+
18
+ #only needed temporarily until the bio-velvet gem is a proper dependency
19
+ gem 'files'
20
+
21
+ # Add dependencies to develop your gem here.
22
+ # Include everything needed to run rake, tests, features, etc.
23
+ group :development do
24
+ gem "rspec", ">= 2.8.0"
25
+ gem "yard", ">= 0.7"
26
+ gem "rdoc", ">= 3.12"
27
+ gem "bundler", ">= 1.0.0"
28
+ gem 'jeweler'
29
+ gem 'bio-commandeer'
30
+ gem 'pry'
31
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Ben J. Woodcroft
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,59 @@
1
+ __WARNING__! FinishM is very alpha software and not ready for prime time. There are many unfinished parts of it, and many bugs. Please use with care, and don't judge the authors too harshly.
2
+
3
+ # FinishM
4
+
5
+ FinishM attempts to improve draft genomes by considering the computational problem to be about finishing, not assembly in the traditional sense.
6
+
7
+ ## A finishing approach to assembly
8
+ Metagenome and isolate assemblers generate contigs from reads, but still leave valuable information on the table. FinishM exploits this information to improve/finish a draft genome without any further laboratory-based work.
9
+
10
+ In even a moderately successful assembly, resultant contigs constitute the vast majority of the genome being sequenced, but this fact is ignored by assemblers. Unlike a traditional assembler FinishM does not attempt to directly extend contigs, but instead focuses on connecting already assembled contigs.
11
+
12
+ FinishM has several modes:
13
+ * Attempt to improve a genome. See `finishm roundup`. This mode fulfills both the `wander` and `gapfill` modes.
14
+ * Determine which contig ends are connected in the assembly graph. See `finishm wander`.
15
+ * FinishM 'gapfills' (replaces N characters) using a graph-theoretic approach that appears to outperform current gapfilling programs. See `finishm gapfill`.
16
+ * Sometimes a human is better able to interpret an assembly graph than a machine. FinishM creates human interpretable graph visualisations that let humans solve assembly problems. See `finishm visualise`.
17
+ * Some other experimental _de-novo_ (non-finishing) metagenome assembly techniques are implemented in `finishm assemble`.
18
+
19
+ ## Installation
20
+
21
+ First, you'll need Ruby (FinishM is tested on 2.1). Then to install:
22
+ ```sh
23
+ gem install finishm
24
+ ```
25
+
26
+ FinishM also has some external dependencies:
27
+ * clustalo (for `gapfilling`/`roundup`)
28
+ * GraphViz (for the `visualise` mode)
29
+
30
+ ## Usage
31
+ After installation, a listing of the modes and their usage:
32
+ ```sh
33
+ finishm
34
+ ```
35
+
36
+ ## Developing
37
+ To hack on finishm:
38
+ ```
39
+ git clone https://github.com/wwood/finishm.git
40
+ cd finishm
41
+ bundle install
42
+ git submodule update --init
43
+ cd ext/src
44
+ git checkout -b finishm origin/finishm #possibly this step is not required for newer versions of git
45
+ make MAXKMERLENGTH=255 finishm velveth velvetg
46
+ cp obj/shared/libfinishm.so.1.0 ../../lib/external/
47
+ cd ../..
48
+ ./bin/finishm -h
49
+ ```
50
+
51
+ ## Citation
52
+
53
+ A manuscript describing the tools described here is currently in preparation. However, FinishM reuses code from velvet and BioRuby, so these tools may be worth citing.
54
+
55
+ ## Copyright
56
+
57
+ Copyright (c) 2012-2014 Ben J. Woodcroft. See LICENSE.txt for
58
+ further details.
59
+
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "finishm"
18
+ gem.homepage = "http://github.com/wwood/finishm"
19
+ gem.license = "GPL"
20
+ gem.summary = %Q{Genome improvement and finishing with or without further sequencing effort}
21
+ gem.description = %Q{De-novo assemblies generally only provide draft genomes. FinishM is aimed at improving these draft assemblies.}
22
+ gem.email = "donttrustben near gmail.com"
23
+ gem.authors = ["Ben J. Woodcroft"]
24
+ # dependencies defined in Gemfile
25
+
26
+ gem.extensions = "ext/mkrf_conf.rb"
27
+
28
+ # by default, velvet as a git submodule is not included when making the gem
29
+ # but we need it to be.
30
+ gem.files.include "ext/src/src/*"
31
+ gem.files.include "ext/src/Makefile"
32
+ gem.files.include "ext/src/License"
33
+ gem.files.include "ext/src/third-party/**/*"
34
+ end
35
+ Jeweler::RubygemsDotOrgTasks.new
36
+
37
+ require 'rspec/core'
38
+ require 'rspec/core/rake_task'
39
+ RSpec::Core::RakeTask.new(:spec) do |spec|
40
+ spec.pattern = FileList['spec/**/*_spec.rb']
41
+ end
42
+
43
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
44
+ spec.pattern = 'spec/**/*_spec.rb'
45
+ spec.rcov = true
46
+ end
47
+
48
+ task :default => :spec
49
+
50
+ require 'yard'
51
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'bio-velvet'
6
+ require 'graphviz'
7
+ require 'bio'
8
+ require 'set'
9
+
10
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
11
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
12
+ require 'priner'
13
+
14
+ # Parse command line options into the options hash
15
+ options = {
16
+ :logger => 'stderr',
17
+ :log_level => 'info',
18
+ :start_kmers => [],
19
+ :end_kmers => [],
20
+ :coverage_cutoff => 0.0,
21
+ }
22
+ o = OptionParser.new do |opts|
23
+ opts.banner = "
24
+ Usage: #{SCRIPT_NAME} --velvet-graph PreGraphFile [options]
25
+
26
+ Take a graph pre-computed with velveth, and output a GraphViz file for visualisation.
27
+
28
+ Overlayed on top of this graph can be added information e.g. nodes that contain
29
+ particular kmers get coloured.
30
+ \n\n"
31
+
32
+ opts.on("--velvet-graph GRAPH_FILE", "PreGraph file output from velveth [required]") do |arg|
33
+ options[:velvet_pregraph_file] = arg
34
+ end
35
+
36
+ opts.separator "\nOptional arguments:\n\n"
37
+ opts.on("--dot OUTPUT_DOT_FILENAME", "Output the graph into PNG format [default: not output as DOT]") do |arg|
38
+ options[:dot_output_file] = arg
39
+ end
40
+ opts.on("--png OUTPUT_PNG_FILENAME", "Output the graph into PNG format [default: not output as PNG]") do |arg|
41
+ options[:neato_png] = arg
42
+ end
43
+ opts.on("--start-kmers-file FILE", "Path to file containing newline-separated kmers that are associated with the start of the assembly [default: none]") do |arg|
44
+ options[:start_kmers] = File.open(arg).read.split(/\s+/)
45
+ end
46
+ opts.on("--end-kmers-file FILE", "Path to file containing newline-separated kmers that are associated with the end of the assembly [default: none]") do |arg|
47
+ options[:end_kmers] = File.open(arg).read.split(/\s+/)
48
+ end
49
+ opts.on("--coverage-cutoff NUMBER", "Require at least this much coverage, otherwise the node and associated edges are not shown in the output [default: #{options[:coverage_cutoff]}]") do |arg|
50
+ options[:coverage_cutoff] = arg.to_f
51
+ end
52
+
53
+ # logger options
54
+ opts.separator "\nVerbosity:\n\n"
55
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
56
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
57
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
58
+ end; o.parse!
59
+ if ARGV.length != 0 or options[:velvet_pregraph_file].nil?
60
+ $stderr.puts o
61
+ exit 1
62
+ end
63
+ if !(options[:dot_output_file] or options[:neato_png])
64
+ $stderr.puts "Need to specify an output format with e.g. --png or --dot"
65
+ exit 1
66
+ end
67
+ # Setup logging
68
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
69
+
70
+
71
+ log.info "Parsing graph from #{options[:velvet_pregraph_file]}"
72
+ graph = Bio::Velvet::Graph.parse_from_file(options[:velvet_pregraph_file])
73
+ log.info "Finished parsing graph, found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
74
+
75
+ if options[:start_kmers].length > 0
76
+ log.info "Read in #{options[:start_kmers].length} kmers associated with the start of the assembly"
77
+ end
78
+ if options[:end_kmers].length > 0
79
+ log.info "Read in #{options[:end_kmers].length} kmers associated with the end of the assembly"
80
+ end
81
+ list_of_start_kmers = options[:start_kmers].collect{|k| [k, Bio::Sequence::NA.new(k).reverse_complement.to_s]}.flatten
82
+ list_of_end_kmers = options[:end_kmers].collect{|k| [k, Bio::Sequence::NA.new(k).reverse_complement.to_s]}.flatten
83
+
84
+ if options[:coverage_cutoff]
85
+ cutter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
86
+ log.info "Removing low coverage (<#{options[:coverage_cutoff]}) nodes"
87
+ cutter.remove_low_coverage_nodes(graph, options[:coverage_cutoff])
88
+ log.info "After removing low coverage nodes, there is #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
89
+ end
90
+
91
+ viser = Bio::Assembly::ABVisualiser.new
92
+ log.info "Converting assembly to GraphViz format"
93
+ graphviz = viser.graphviz(graph, {:start_kmers => list_of_start_kmers, :end_kmers => list_of_end_kmers})
94
+
95
+ # Print
96
+ log.info "Printing assembly graph"
97
+ if options[:dot_output_file]
98
+ graphviz.output :dot => options[:dot_output_file]
99
+ end
100
+ if options[:neato_png]
101
+ graphviz.output :png => options[:neato_png]
102
+ end
103
+
104
+
105
+
106
+
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+
6
+ $:.unshift File.join(ENV['HOME'],'git','bioruby-primer3','lib')
7
+ require 'bio-primer3'
8
+
9
+ if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
10
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
11
+
12
+ # Parse command line options into the options hash
13
+ options = {
14
+ :logger => 'stderr',
15
+ }
16
+ o = OptionParser.new do |opts|
17
+ opts.banner = "
18
+ Usage: #{SCRIPT_NAME} -p1 <primer1> -f2 <primer_list_file>
19
+
20
+ Uses primer3's \"check primers\" to find whether primers match against each other\n\n"
21
+
22
+ opts.on("--primer1 PRIMER", "Primer on one side [required]") do |arg|
23
+ options[:primer1] = arg
24
+ end
25
+ opts.on("--primers2 PRIMER_FILE", "A list of primers in a file, newline separated [required]") do |arg|
26
+ options[:primers2_file] = arg
27
+ end
28
+
29
+ # logger options
30
+ opts.separator "\nVerbosity:\n\n"
31
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
32
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
33
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
34
+ end; o.parse!
35
+ if ARGV.length != 0
36
+ $stderr.puts o
37
+ exit 1
38
+ end
39
+ # Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
40
+ Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
41
+
42
+
43
+ # Read in input data
44
+ primers1 = [options[:primer1]]
45
+ primers2 = File.open(options[:primers2_file]).read.split("\n").collect{|c| c.strip}
46
+ log.info "Read in #{primers1.length} left primers and #{primers2.length} right primers e.g. #{primers1[0]} and #{primers2[0]}"
47
+
48
+ goods = 0
49
+ bads = 0
50
+ failed_to_run = 0
51
+ primers1.each do |primer1|
52
+ primers2.each do |primer2|
53
+ begin
54
+ result, obj = Bio::Primer3.test_primer_compatibility primer1, primer2, 'PRIMER_EXPLAIN_FLAG'=>1
55
+
56
+ puts [
57
+ primer1, primer2, result, obj['PRIMER_LEFT_EXPLAIN'], obj['PRIMER_RIGHT_EXPLAIN']
58
+ ].join "\t"
59
+
60
+ if result
61
+ goods += 1
62
+ else
63
+ bads += 1
64
+ end
65
+
66
+ rescue Exception => e
67
+ failed_to_run += 1
68
+ end
69
+ end
70
+ end
71
+ log.info "Found #{goods} OK primer pairs and #{bads} not OK primer pairs"
72
+ log.warn "#{failed_to_run} weren't checked by Primer3 because it failed to run" if failed_to_run > 0
73
+ end #end if running as a script
@@ -0,0 +1,244 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'bio-velvet'
6
+ require 'tempfile'
7
+ require 'pp'
8
+
9
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
10
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
11
+ require 'priner'
12
+
13
+ # Parse command line options into the options hash
14
+ options = {
15
+ :logger => 'stderr',
16
+ :log_level => 'info',
17
+ :velvet_kmer_size => 73,#TODO: these options should be exposed to the user, and perhaps not guessed at
18
+ :velvetg_arguments => '-read_trkg yes',# -exp_cov 41 -cov_cutoff 12.0973243610491', #hack
19
+ :contig_end_length => 300,
20
+ :output_assembly_path => 'velvetAssembly',
21
+ :graph_search_leash_length => 3000,
22
+ }
23
+ o = OptionParser.new do |opts|
24
+ opts.banner = "
25
+ Usage: #{SCRIPT_NAME} --reads <read_file> --contigs <contigs_file>
26
+
27
+ Takes a set of reads and a set of contigs. Then it runs an assembly based on those reads,
28
+ and tries to fill in possible gaps between the contigs. There may be multiple ways
29
+ to join two contig ends together - in this that multiple cases are reported. \n\n"
30
+
31
+
32
+ opts.on("--reads FILE", "gzipped fastq file of reads to perform the re-assembly with [required]") do |arg|
33
+ options[:reads_file] = arg
34
+ end
35
+ opts.on("--contigs FILE", "fasta file of contigs to be joined together [required]") do |arg|
36
+ options[:contigs_file] = arg
37
+ end
38
+
39
+ opts.separator "\nOptional arguments:\n\n"
40
+ opts.on("--output-trails-fasta PATH", "Output found paths to this file in fasta format [default: off]") do |arg|
41
+ options[:overall_trail_output_fasta_file] = arg
42
+ end
43
+ opts.on("--already-assembled-velvet-directory PATH", "Skip until after assembly in this process, and start from this assembly directory created during a previous run of this script [default: off]") do |arg|
44
+ options[:previous_assembly] = arg
45
+ end
46
+ opts.on("--serialize-velvet-graph FILE", "So that the velvet graph does not have to be reparsed, serialise the parsed object for later use in this file [default: off]") do |arg|
47
+ options[:serialize_parsed_graph_file] = arg
48
+ end
49
+ opts.on("--already-serialized-velvet-graph FILE", "Restore the parsed velvet graph from this file [default: off]") do |arg|
50
+ options[:previously_serialized_parsed_graph_file] = arg
51
+ end
52
+
53
+
54
+ # logger options
55
+ opts.separator "\nVerbosity:\n\n"
56
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
57
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
58
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
59
+ end; o.parse!
60
+ if ARGV.length != 0 or options[:reads_file].nil? or options[:contigs_file].nil?
61
+ $stderr.puts o
62
+ exit 1
63
+ end
64
+ # Setup logging
65
+ Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
66
+ Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
67
+
68
+ # Extract contig ends from each of the input contigs, so that the contig ends can be found in the
69
+ # assembly graph structure.
70
+ contig_ends = []
71
+ velvet_sequence_id_to_contig_end = {}
72
+ contig_lengths = {}
73
+ class ContigEnd
74
+ attr_accessor :sequence, :start_or_end, :contig_name, :velvet_sequence_id
75
+ end
76
+ velvet_read_index = 1
77
+ Bio::FlatFile.foreach(options[:contigs_file]) do |seq|
78
+ contig_lengths[seq.definition] = seq.seq.length
79
+ if seq.seq.length < options[:contig_end_length]
80
+ log.warn "Contig #{seq.definition} is shorter than the end length used to anchor the contig in the assembly. This is not ideal but may be ok."
81
+ #TODO: fix this - should be counting from the middle. Should I just ignore those ones?
82
+ end
83
+ # Add the start of the contig
84
+ contig_end = ContigEnd.new
85
+ contig_end.start_or_end = :start
86
+ contig_end.sequence = Bio::Sequence::NA.new(seq.seq[0...options[:contig_end_length]]).reverse_complement.to_s
87
+ contig_end.contig_name = seq.definition
88
+ velvet_sequence_id_to_contig_end[velvet_read_index] = contig_end
89
+ contig_end.velvet_sequence_id = velvet_read_index; velvet_read_index += 1
90
+ contig_ends.push contig_end
91
+
92
+
93
+ # Add the back of the contig
94
+ contig_end = ContigEnd.new
95
+ contig_end.start_or_end = :end
96
+ s = seq.seq
97
+ contig_end.sequence = s[s.length-options[:contig_end_length]...s.length]
98
+ contig_end.contig_name = seq.definition
99
+ velvet_sequence_id_to_contig_end[velvet_read_index] = contig_end
100
+ contig_end.velvet_sequence_id = velvet_read_index; velvet_read_index += 1
101
+ contig_ends.push contig_end
102
+ end
103
+ log.info "Parsed in #{contig_ends.length} contig ends from the two sides of each input contig"
104
+
105
+
106
+ graph = nil
107
+ if options[:previously_serialized_parsed_graph_file].nil?
108
+ velvet_result = nil
109
+ if options[:previous_assembly].nil? #If assembly has not already been carried out
110
+ Tempfile.open('anchors.fa') do |tempfile|
111
+ contig_ends.each do |contig_end|
112
+ tempfile.puts ">anchor#{contig_end.velvet_sequence_id}"
113
+ tempfile.puts contig_end.sequence
114
+ end
115
+
116
+ log.info "Assembling sampled reads with velvet"
117
+ # Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
118
+ velvet_result = Bio::Velvet::Runner.new.velvet(
119
+ options[:velvet_kmer_size],
120
+ "-short #{tempfile.path} -short2 -fastq.gz #{options[:reads_file]}",
121
+ options[:velvetg_arguments],
122
+ :output_assembly_path => options[:output_assembly_path]
123
+ )
124
+ if log.debug?
125
+ log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
126
+ log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
127
+ log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
128
+ log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
129
+ end
130
+ log.info "Finished running assembly"
131
+ end
132
+ else
133
+ log.info "Using previous assembly stored at #{options[:previous_assembly]}"
134
+ velvet_result = Bio::Velvet::Result.new
135
+ velvet_result.result_directory = options[:previous_assembly]
136
+ end
137
+
138
+ require 'ruby-prof'
139
+ RubyProf.start
140
+
141
+ log.info "Parsing the graph output from velvet"
142
+ graph = Bio::Velvet::Graph.parse_from_file(File.join velvet_result.result_directory, 'Graph2')
143
+ log.info "Finished parsing graph: found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
144
+
145
+ result = RubyProf.stop
146
+ printer = RubyProf::FlatPrinter.new(result)
147
+ printer.print(STDOUT)
148
+
149
+ if options[:serialize_parsed_graph_file]
150
+ log.info "Storing a binary version of the graph file for later use at #{options[:serialize_parsed_graph_file]}"
151
+ File.open(options[:serialize_parsed_graph_file],'wb') do |f|
152
+ f.print Marshal.dump(graph)
153
+ end
154
+ log.info "Stored a binary representation of the velvet graph at #{options[:serialize_parsed_graph_file]}"
155
+ end
156
+ else
157
+ log.info "Restoring graph file from #{options[:previously_serialized_parsed_graph_file]}.."
158
+ graph = Marshal.load(File.open(options[:previously_serialized_parsed_graph_file]))
159
+ log.info "Restoration complete"
160
+ end
161
+
162
+
163
+
164
+ # Find the anchoring nodes for each of the contig ends
165
+ finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
166
+ log.info "Finding node representing the end of the each contig"
167
+ i = 1
168
+ anchor_sequence_ids = contig_ends.collect{|c| c.velvet_sequence_id}
169
+ anchoring_nodes_and_directions = finder.find_unique_nodes_with_sequence_ids(graph, anchor_sequence_ids)
170
+ num_anchors_found = anchoring_nodes_and_directions.reject{|s,e| e[0].nil?}.length
171
+ anchoring_node_id_to_contig_end = {}
172
+ anchoring_nodes_and_directions.each do |seq_id, node_and_direction|
173
+ next if node_and_direction[0].nil? #skip when there is no node found in the graph for this contig end
174
+ anchoring_node_id_to_contig_end[node_and_direction[0].node_id] = velvet_sequence_id_to_contig_end[seq_id]
175
+ end
176
+ log.info "Found anchoring nodes for #{num_anchors_found} out of #{contig_ends.length} contig ends"
177
+
178
+ log.info "Searching for trails between the nodes within the assembly graph"
179
+ cartographer = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
180
+ trail_sets = cartographer.find_trails_between_node_set(graph, anchoring_nodes_and_directions.values.reject{|v| v[0].nil?}, options[:graph_search_leash_length])
181
+ log.info "Found #{trail_sets.reduce(0){|s,set|s+=set.length}} trail(s) in total"
182
+
183
+ node_id_to_contig_description = {}
184
+ anchoring_nodes_and_directions.each do |seq_id, pair|
185
+ next if pair.empty? #When no nodes were found
186
+ node_id = pair[0].node_id
187
+ node_id_to_contig_description[node_id] = velvet_sequence_id_to_contig_end[seq_id]
188
+ end
189
+ contig_end_id_to_partners = {}
190
+ # Tabulate all the partners each way (complete the previously triangular matrix)
191
+ trail_sets.each do |trail_set|
192
+ trail_set.each do |trail|
193
+ start_id = trail.first.node.node_id
194
+ end_id = trail.last.node.node_id
195
+ contig_end_id_to_partners[start_id] ||= []
196
+ contig_end_id_to_partners[start_id].push node_id_to_contig_description[end_id]
197
+ contig_end_id_to_partners[end_id] ||= []
198
+ contig_end_id_to_partners[end_id].push node_id_to_contig_description[start_id]
199
+ end
200
+ end
201
+
202
+ puts %w(contig_end_id contig_name contig_length connections).join "\t"
203
+ trail_sets.each_with_index do |trail_set, i|
204
+ partner_contig_ends = contig_end_id_to_partners[contig_ends[i].velvet_sequence_id]
205
+ partner_contig_ends ||= []
206
+ # Each contig has 2 trail sets associated with it - one for the start and one for the end
207
+ puts [
208
+ contig_ends[i].velvet_sequence_id,
209
+ contig_ends[i].contig_name,
210
+ contig_lengths[contig_ends[i].contig_name],
211
+ partner_contig_ends.collect{|c| c.velvet_sequence_id}.sort.join(',')
212
+ ].join("\t")
213
+ end
214
+
215
+ if options[:overall_trail_output_fasta_file]
216
+ File.open(options[:overall_trail_output_fasta_file],'w') do |outfile|
217
+ trail_sets.each do |trail_set|
218
+ trail_set.each do |trail|
219
+ begin
220
+ trail_sequence = trail.sequence #Get the trail sequence first as this may not be possible.
221
+
222
+ start_id = trail.first.node.node_id
223
+ end_id = trail.last.node.node_id
224
+ start_contig_end = anchoring_node_id_to_contig_end[start_id]
225
+ end_contig_end = anchoring_node_id_to_contig_end[end_id]
226
+ outfile.print '>'
227
+ outfile.print start_contig_end.contig_name
228
+ outfile.print '_'
229
+ outfile.print start_contig_end.start_or_end
230
+ outfile.print ':'
231
+ outfile.print end_contig_end.contig_name
232
+ outfile.print '_'
233
+ outfile.puts end_contig_end.start_or_end
234
+
235
+ outfile.puts trail_sequence
236
+ rescue Bio::Velvet::NotImplementedException => e
237
+ log.warn "Problem getting sequence of found trail #{trail.to_s}, skipping this trail: #{e.to_s}"
238
+ end
239
+ end
240
+ end
241
+ end
242
+ end
243
+
244
+