finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,443 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'tempfile'
3
+
4
+ #Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('finishm'); Bio::Log::CLI.configure('finishm'); Bio::Log::CLI.configure('bio-velvet')
5
+
6
+ class GraphTesting
7
+ def self.sorted_path_results(paths, forwards=true)
8
+ paths.sort_by{|path| path.collect{|n| n.node_id}}.collect do |path|
9
+ forwards ? path.fwd_orfs_result : path.twin_orfs_result
10
+ end
11
+ end
12
+
13
+ def self.markers(start_positions, stop_positions)
14
+ [start_positions, stop_positions].collect do |positions|
15
+ positions.collect do |pos|
16
+ marker = Bio::AssemblyGraphAlgorithms::AllOrfsFinder::Marker.new
17
+ marker.position_in_trail = pos
18
+ marker
19
+ end
20
+ end
21
+ end
22
+
23
+ def self.sorted_marker_pair_positions(pair_array)
24
+ pair_array.collect do |pair|
25
+ pair.collect{|m| m.position_in_trail}
26
+ end.sort
27
+ end
28
+
29
+ def self.marker_positions(markers)
30
+ markers.collect{|m| m.position_in_trail}
31
+ end
32
+
33
+ def self.sorted_marker_pair_node_positions(pair_array)
34
+ pair_array.sort_by do |pair|
35
+ pair.collect{|m| m.position_in_trail}
36
+ end.collect do |pair|
37
+ pair.collect{|m| [m.node.node_id, m.position_in_node]}
38
+ end
39
+ end
40
+
41
+ def self.marker_node_positions(markers)
42
+ markers.collect do |m|
43
+ [m.node.node_id, m.position_in_node]
44
+ end
45
+ end
46
+ end
47
+
48
+ describe "AllOrfs" do
49
+
50
+ it 'should find a hello world ORF' do
51
+ graph, = GraphTesting.emit_otrails([
52
+ [1,2,3]
53
+ ])
54
+ graph.nodes[1].ends_of_kmers_of_node = 'TAAATGGAAA' #stop codon 'TAA', start codon 'ATG'
55
+ graph.nodes[3].ends_of_kmers_of_node = 'AAAAAAATAA' #stop codon 'TAA'
56
+ initial_path = GraphTesting.make_onodes(graph, %w(1s))
57
+
58
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
59
+ problems = orfer.find_all_problems(graph, [initial_path])
60
+ #pp problems
61
+
62
+ paths = orfer.find_orfs_from_problems(problems)
63
+ #pp paths
64
+ GraphTesting.sorted_paths(paths.trails).should == [
65
+ [1,2,3]
66
+ ]
67
+ res = paths.trails[0].fwd_orfs_result
68
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [
69
+ [6,30]
70
+ ]
71
+ GraphTesting.sorted_marker_pair_node_positions(res.start_stop_pairs).should == [
72
+ [[1,6],[3,10]]
73
+ ]
74
+ res.initial_start_markers.should == []
75
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [3]
76
+ GraphTesting.marker_node_positions(res.initial_stop_markers).should == [[1,3]]
77
+ res.final_start_markers.should == []
78
+ end
79
+
80
+ it 'should find a hello world ORF in twin direction' do
81
+ graph = GraphTesting.emit([
82
+ [1,2],
83
+ [2,3]
84
+ ])
85
+ graph.nodes[1].ends_of_kmers_of_twin_node = 'TTTAGTTTTT' # stop codon 'TAG'
86
+ graph.nodes[2].ends_of_kmers_of_twin_node = 'TAAATGTTTT' # stop codon 'TAA', start codon 'ATG'
87
+ initial_path = GraphTesting.make_onodes(graph, %w(1s))
88
+
89
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
90
+ problems = orfer.find_all_problems(graph, [initial_path])
91
+ #pp problems
92
+
93
+ paths = orfer.find_orfs_from_problems(problems)
94
+ #pp paths
95
+ GraphTesting.sorted_paths(paths.trails).should == [
96
+ [1,2,3]
97
+ ]
98
+ res = paths.trails[0].twin_orfs_result
99
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [
100
+ [16,25]
101
+ ]
102
+ GraphTesting.sorted_marker_pair_node_positions(res.start_stop_pairs).should == [
103
+ [[2,6],[1,5]]
104
+ ]
105
+ res.initial_start_markers.should == []
106
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [13]
107
+ GraphTesting.marker_node_positions(res.initial_stop_markers).should == [[2,3]]
108
+ res.final_start_markers.should == []
109
+ end
110
+
111
+ it 'should find ORFs over a bubble' do
112
+ graph = GraphTesting.emit([
113
+ [1,2],
114
+ [1,3],
115
+ [2,4],
116
+ [3,4]
117
+ ])
118
+ graph.nodes[1].ends_of_kmers_of_node = 'TAAATGGAAA' # stop codon 'TAA', start 'ATG'
119
+ graph.nodes[2].ends_of_kmers_of_node = 'C'
120
+ graph.nodes[3].ends_of_kmers_of_node = 'A'
121
+ graph.nodes[4].ends_of_kmers_of_node = 'AAATTAAAAA' # stop 'TAA'
122
+ initial_path = GraphTesting.make_onodes(graph, %w(1s))
123
+
124
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
125
+ problems = orfer.find_all_problems(graph, [initial_path])
126
+ #pp problems
127
+ paths = orfer.find_orfs_from_problems(problems)
128
+ #pp paths
129
+ GraphTesting.sorted_paths(paths.trails).should == [
130
+ [1,2,4],
131
+ [1,3,4]
132
+ ]
133
+ res = GraphTesting.sorted_path_results(paths.trails, true) # forward direction
134
+ res.collect{|result| GraphTesting.sorted_marker_pair_positions(result.start_stop_pairs)}.should == [
135
+ [[6,18]],
136
+ [[6,18]]
137
+ ]
138
+ res.collect{|result| result.final_start_markers}.should == [[],[]]
139
+ res.collect{|result| result.initial_start_markers}.should == [[],[]]
140
+ res.collect{|result| GraphTesting.marker_positions(result.initial_stop_markers)}.should == [
141
+ [3],
142
+ [3]
143
+ ]
144
+ res.collect{|result| GraphTesting.marker_node_positions(result.initial_stop_markers)}.should == [
145
+ [[1,3]],
146
+ [[1,3]]
147
+ ]
148
+ end
149
+
150
+ it 'should respect phase along each trail' do
151
+ graph = GraphTesting.emit([
152
+ [1,2],
153
+ [1,3],
154
+ [2,4],
155
+ [3,4]
156
+ ])
157
+ graph.nodes[1].ends_of_kmers_of_node = 'TAAATGGAAA' # stop 'TAA', start 'ATG'
158
+ graph.nodes[2].ends_of_kmers_of_node = 'C'
159
+ graph.nodes[3].ends_of_kmers_of_node = 'AAA'
160
+ graph.nodes[4].ends_of_kmers_of_node = 'AAATTAGAAA' # stop 'TAG'
161
+ initial_path = GraphTesting.make_onodes(graph, %w(1s))
162
+
163
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
164
+ problems = orfer.find_all_problems(graph, [initial_path])
165
+ #pp problems
166
+ paths = orfer.find_orfs_from_problems(problems)
167
+ #pp paths
168
+ GraphTesting.sorted_paths(paths.trails).should == [
169
+ [1,2,4],
170
+ [1,3,4]
171
+ ]
172
+ res = GraphTesting.sorted_path_results(paths.trails, true) # forwards direction
173
+ res.collect{|result| GraphTesting.sorted_marker_pair_positions(result.start_stop_pairs)}.should == [
174
+ [[6,18]],
175
+ []
176
+ ]
177
+ GraphTesting.sorted_marker_pair_node_positions(res[0].start_stop_pairs).should == [
178
+ [[1,6],[4,7]]
179
+ ]
180
+ res.collect{|result| GraphTesting.marker_positions(result.final_start_markers)}.should == [
181
+ [],
182
+ [6]
183
+ ]
184
+ GraphTesting.marker_node_positions(res[1].final_start_markers).should == [
185
+ [1,6]
186
+ ]
187
+ res.collect{|result| GraphTesting.marker_positions(result.initial_stop_markers)}.should ==[
188
+ [3],
189
+ [3,20]
190
+ ]
191
+ res.collect{|result| GraphTesting.marker_node_positions(result.initial_stop_markers)}.should == [
192
+ [[1,3]],
193
+ [[1,3],[4,7]]
194
+ ]
195
+ res.collect{|result| result.initial_start_markers}.should == [[],[]]
196
+ end
197
+
198
+
199
+ it 'should respect terminal nodes' do
200
+ fail '#todo'
201
+ end
202
+
203
+ it 'should respect minimum orf length' do
204
+ fail '#todo'
205
+ end
206
+
207
+ it 'should respect max gapfill paths' do
208
+ fail '#todo'
209
+ end
210
+
211
+ it 'should respect max cycles' do
212
+ fail '#todo'
213
+ end
214
+
215
+ describe 'search_for_codons' do
216
+ it 'should report end positions for codons starting in first node of trail' do
217
+ graph, otrails = GraphTesting.emit_otrails([[1,2]])
218
+ graph.nodes[1].ends_of_kmers_of_node = 'AAATGAAAAA' # start codon 'ATG', stop codon 'TGA'
219
+ graph.nodes[1].ends_of_kmers_of_twin_node = 'TTTTTAACTT' # stop codon 'TAA'
220
+
221
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
222
+ fwd_res, twin_res = orfer.search_for_codons(otrails[0])
223
+ fwd_res.start_markers.collect{|m| m.position_in_node}.should == [5]
224
+ fwd_res.stop_markers.collect{|m| m.position_in_node}.should == [6]
225
+ fwd_res.start_markers.all?{|n| n.node.node_id == 1}.should == true
226
+ fwd_res.stop_markers.all?{|n| n.node.node_id == 1}.should == true
227
+ twin_res.start_markers.should == []
228
+ twin_res.stop_markers.collect{|m| m.position_in_node}.should == [7]
229
+ twin_res.start_markers.all?{|n| n.node.node_id == 1}.should == true
230
+ twin_res.stop_markers.all?{|n| n.node.node_id == 1}.should == true
231
+ end
232
+
233
+ it 'should work on single-node trail' do
234
+ graph, otrails = GraphTesting.emit([[1,2]])
235
+ graph.nodes[1].ends_of_kmers_of_node = 'AAATAATAAA' # stop codon 'TAA'
236
+ graph.nodes[1].ends_of_kmers_of_twin_node = 'TTGATGTTTT' # start codon 'ATG', stop codon 'TGA'
237
+ otrail = Bio::Velvet::Graph::OrientedNodeTrail.new
238
+ otrail.add_node graph.nodes[1], Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
239
+
240
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
241
+ fwd_res, twin_res = orfer.search_for_codons(otrail)
242
+ fwd_res.start_markers.should == []
243
+ fwd_res.stop_markers.collect{|m| m.position_in_node}.should == [6,9]
244
+ twin_res.start_markers.collect{|m| m.position_in_node}.should == [6]
245
+ twin_res.stop_markers.collect{|m| m.position_in_node}.should == [4]
246
+ end
247
+ end
248
+
249
+ describe 'get_overlap_sequences' do
250
+ it 'should traverse trail to get enough sequence' do
251
+ graph, otrails = GraphTesting.emit_otrails([[1,2]])
252
+ graph.nodes[2].ends_of_kmers_of_node = 'G'*10 # Second node is sequence of G's
253
+ graph.nodes[2].ends_of_kmers_of_twin_node = 'C'*10
254
+
255
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
256
+ orfer.get_overlap_sequences(otrails[0], 3).should == [
257
+ 'AAGG',
258
+ 'CCTT'
259
+ ]
260
+ end
261
+
262
+ it 'should look across multiple nodes if necessary' do
263
+ graph, otrails = GraphTesting.emit_otrails([[1,2,3]])
264
+ graph.nodes[2].ends_of_kmers_of_node = 'G' # Second node is single G
265
+ graph.nodes[2].ends_of_kmers_of_twin_node = 'C'
266
+
267
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
268
+ orfer.get_overlap_sequences(otrails[0], 4).should == [
269
+ 'AAAGAA',
270
+ 'TTCTTT'
271
+ ]
272
+ end
273
+
274
+ it 'should handle a short initial node' do
275
+ graph, otrails = GraphTesting.emit_otrails([[1,2]])
276
+ graph.nodes[1].ends_of_kmers_of_node = 'C' # First node is single C
277
+ graph.nodes[1].ends_of_kmers_of_twin_node = 'G'
278
+
279
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
280
+ orfer.get_overlap_sequences(otrails[0], 4).should == [
281
+ 'CAAA',
282
+ 'TTTG'
283
+ ]
284
+ end
285
+
286
+ it 'should be able to work back from end of trail' do
287
+ graph, otrails = GraphTesting.emit_otrails([[1,2,3]])
288
+ graph.nodes[2].ends_of_kmers_of_node = 'C'
289
+ graph.nodes[2].ends_of_kmers_of_twin_node = 'G'
290
+
291
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
292
+ orfer.get_overlap_sequences(otrails[0], 5, true).should == [
293
+ 'AAACAAAA',
294
+ 'TTTTGTTT'
295
+ ]
296
+ end
297
+ end
298
+
299
+ describe 'get_sequences' do
300
+ it 'should get forward and twin sequences of a node alone' do
301
+ graph = GraphTesting.emit([[1,2]])
302
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new graph.nodes[1], true
303
+
304
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
305
+ orfer.get_sequences(onode).should == [
306
+ 'A'*10,
307
+ 'T'*10
308
+ ]
309
+ end
310
+ end
311
+
312
+ describe 'word_search' do
313
+ it 'should report end position of words within strings' do
314
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
315
+ orfer.word_search('Hello', ['lo'], 2).should == {
316
+ 'lo' => [5]
317
+ }
318
+ orfer.word_search('Agitate, infiltrate', ['ate', 'lat'], 3).should == {
319
+ 'ate' => [7,19]
320
+ }
321
+ end
322
+ end
323
+
324
+ describe 'orfs_from_start_stop_markers' do
325
+ it 'should work when there are no orfs' do
326
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
327
+ res = orfer.orfs_from_start_stop_markers([],[],0)
328
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
329
+ res.start_stop_pairs.should == []
330
+ res.initial_start_markers.should == []
331
+ res.initial_stop_markers.should == []
332
+ res.final_start_markers.should == []
333
+ end
334
+
335
+ it 'should skip an orf before first stop' do
336
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
337
+ start, stop = GraphTesting.markers [0],[6]
338
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
339
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
340
+ res.start_stop_pairs == []
341
+ GraphTesting.marker_positions(res.initial_start_markers).should == [0]
342
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [6]
343
+ res.final_start_markers.should == []
344
+ end
345
+
346
+ it 'should find an orf after a stop' do
347
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
348
+ start, stop = GraphTesting.markers [6], [0,9]
349
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
350
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
351
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [[6,9]]
352
+ res.initial_start_markers.should == []
353
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [0]
354
+ res.final_start_markers.should == []
355
+ end
356
+
357
+ it 'should work for one orf in 2 frames' do
358
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
359
+ start, stop = GraphTesting.markers([3,5],[0,2,6,11])
360
+ res = orfer.orfs_from_start_stop_markers(start, stop, 0)
361
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
362
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [[3,6],[5,11]]
363
+ res.initial_start_markers.should == []
364
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [0,2]
365
+ res.final_start_markers.should == []
366
+ end
367
+
368
+ it 'should work with unclosed orfs' do
369
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
370
+ start, stop = GraphTesting.markers([],[7])
371
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
372
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
373
+ res.start_stop_pairs.should == []
374
+ res.initial_start_markers.should == []
375
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [7]
376
+ res.final_start_markers.should == []
377
+
378
+ start, stop = GraphTesting.markers([7],[])
379
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
380
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
381
+ res.start_stop_pairs.should == []
382
+ GraphTesting.marker_positions(res.initial_start_markers).should == [7]
383
+ res.initial_stop_markers.should == []
384
+ res.final_start_markers.should == []
385
+ end
386
+
387
+ it 'should work with 3 orfs' do
388
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
389
+ start, stop = GraphTesting.markers([8,14,20],[2,11,17,44])
390
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
391
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
392
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [[8,11],[14,17],[20,44]]
393
+ res.initial_start_markers.should == []
394
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [2]
395
+ res.final_start_markers.should == []
396
+ end
397
+
398
+ it 'should work with an internal start codon' do
399
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
400
+ start, stop = GraphTesting.markers([8,14,20],[5,17,44])
401
+ res = orfer.orfs_from_start_stop_markers(start,stop,0)
402
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
403
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [[8,17],[20,44]]
404
+ res.initial_start_markers.should == []
405
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [5]
406
+ res.final_start_markers.should == []
407
+ end
408
+
409
+ it 'should find first stop codon in a frame before an orf and first start after' do
410
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
411
+ start, stop = GraphTesting.markers([7,13,19],[1,4,10])
412
+ res = orfer.orfs_from_start_stop_markers(start, stop, 0)
413
+ res.kind_of?(Bio::AssemblyGraphAlgorithms::AllOrfsFinder::ORFsResult).should == true
414
+ GraphTesting.sorted_marker_pair_positions(res.start_stop_pairs).should == [[7,10]]
415
+ GraphTesting.marker_positions(res.initial_stop_markers).should == [1]
416
+ GraphTesting.marker_positions(res.final_start_markers).should == [13]
417
+ end
418
+ end
419
+
420
+ describe 'orf_sequences_from_trails' do
421
+ it 'should return orf sequences for a hello world orf' do
422
+ graph = GraphTesting.emit([
423
+ [1,2],
424
+ [2,3]
425
+ ])
426
+ graph.nodes[1].ends_of_kmers_of_node = 'TAAATGGAAA' #stop codon 'TAA', start codon 'ATG'
427
+ graph.nodes[3].ends_of_kmers_of_node = 'AAAAAAATAA' #stop codon 'TAA'
428
+ initial_path = GraphTesting.make_onodes(graph, %w(1s))
429
+
430
+ orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
431
+ problems = orfer.find_all_problems(graph, [initial_path])
432
+ #pp problems
433
+
434
+ paths = orfer.find_orfs_from_problems(problems)
435
+ #pp paths
436
+ orfer.orf_sequences_from_trails(paths.trails).should == {
437
+ '(1s:6),2s,(3s:10)' => 'ATGGAAAAAAAAAAAAAAAAAAAATAA',
438
+ '1s,2s,3s' => 'T'*30,
439
+ ',(1s:3)' => 'TAA'
440
+ }
441
+ end
442
+ end
443
+ end
@@ -0,0 +1,186 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'bio-commandeer'
3
+
4
+ #Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('finishm'); Bio::Log::CLI.configure('finishm')
5
+
6
+ describe "FinishM assemble" do
7
+ it 'should assemble something easy' do
8
+ data_path = File.join(TEST_DATA_DIR,'explore','1')
9
+ trails = nil
10
+ Dir.chdir(data_path) do
11
+ trails = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --quiet --output-contigs /dev/stdout --assemble-from '1e' --fasta 2seqs.sammy.fa"#, :log => log
12
+ end
13
+ splits = trails.split("\n")
14
+ splits.length.should == 2
15
+
16
+ splits[0].should == ">1e"
17
+ splits[1][0..60].should == 'TATCCGGTCCCCCTAGAATGTTGATTCCTCCGTCTTCTGATTTCCGTTGGCGGTTCGTATC'
18
+ splits[1].length.should == 613
19
+ end
20
+
21
+ it 'should output a pathspec' do
22
+ data_path = File.join(TEST_DATA_DIR,'explore','1')
23
+ trails = nil
24
+ Dir.chdir(data_path) do
25
+ trails = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --quiet --output-pathspec --output-contigs /dev/stdout --assemble-from '1e' --fasta 2seqs.sammy.fa"#, :log => log
26
+ end
27
+ splits = trails.split("\n")
28
+ splits.length.should == 2
29
+
30
+ splits[0].should == ">1e 1e"
31
+ splits[1][0..60].should == 'TATCCGGTCCCCCTAGAATGTTGATTCCTCCGTCTTCTGATTTCCGTTGGCGGTTCGTATC'
32
+ splits[1].length.should == 613
33
+ end
34
+
35
+ it 'should work when assembling the entire graph' do
36
+ data_path = File.join(TEST_DATA_DIR,'explore','1')
37
+ trails = nil
38
+ Dir.chdir(data_path) do
39
+ trails = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --no-progressbar --quiet --output-pathspec --min-contig-length 0 --output-contigs /dev/stdout --fasta 2seqs.sammy.fa"#, :log => log
40
+ end
41
+ splits = trails.split("\n")
42
+
43
+ splits[0].should == ">contig1 1s"
44
+ splits[1][0..60].should == 'AGGGCAGATTCCCACGCGTTACGCACCCGTGCGCCACTAGACCCGAAGGTCTCGTTCGACT'
45
+ splits[1].length.should == 613
46
+ end
47
+
48
+ it 'should not crash when recoherence kmer is given' do
49
+ data_path = File.join(TEST_DATA_DIR,'explore','1')
50
+ trails = nil
51
+ Dir.chdir(data_path) do
52
+ trails = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --no-progressbar --assembly-kmer 7 --recoherence-kmer 51 --quiet --output-pathspec --output-contigs /dev/stdout --fasta 2seqs.sammy.fa"#, :log => log
53
+ end
54
+ # Not sure what is supposed to come out of here with such a short assembly-kmer, but just so long as it doesn't crash.
55
+ end
56
+
57
+ it 'should use recoherence' do
58
+ trails = nil
59
+ Dir.mktmpdir do |tmpdir|
60
+ Dir.chdir(tmpdir) do
61
+ cmd = "#{FINISHM_SCRIPT_PATH} assemble --no-progressbar --assembly-kmer 51 --recoherence-kmer 90 --quiet --output-contigs /dev/stdout --fasta-gz #{TEST_DATA_DIR}/gapfilling/6/reads.random1.fa.gz,#{TEST_DATA_DIR}/gapfilling/6/reads.random2.fa.gz"
62
+ puts cmd
63
+ trails = Bio::Commandeer.run cmd
64
+ end
65
+ end
66
+ trails.should == '>contig1
67
+ GTGACGGTGGAGCCCACGCAACGACCCGAGGAGTGGCCGGGTTTACAGCAGCGGGGGGAGCTGGGCAAACCCTCAGTTGACCGGTTCTCACTTATCGGGTCAAAGGATTAGGCATAATAAGGAGAGTTAGAATGTCTTACCTACTTAAAAACGCCCGCAGCAAAGGGACTAACGATGTTTGAACGAGCTCCTGCGCCCATGTGCTGGGTAGGAAGTGCGACCGGGAATTGGTTTTGGATCCAGGCGGATGGGCACTAACCGGCGGTTAAACCACGGGGCTAGTCGCATTGTTACCTAGTGGGTTGAGCGCAACCAATGATGGGCCTCCGCCTCGCGACTCCTCTCGAGTACCTCCTATGTGGGGAGTGATAGGGATTCGGGAATGCCATTTTGACCCTGAGCTGTTGAAGGAACTTTGCAGACGCCCTCACGTATACCTATTAGGGGAACGGTGCTAGGTATTAGATATACTTCTGCCTAGACAATAGCAGCGGTTCTCAGCGCACCACTACCGCACGATAGCCACTCATGGATGCGTTATATCGTGTTCCAAGTCCAAGCGTTCTCCTCGGGTTAGTTCCCATCATAGATGCTGGTGGAGCTCAATCCGGCAGCATAATGAAAGGTTACCCGATGTGGCCTATGTGCGACCTCACTGGAAGGAAACGCAGACGAGGGTAAAAGGCCTCTGGGAGCCCCCCCGCGACTATGCGTGGCCGTGTTTCCTTCCTCTGCATAAGACCCGATCTGTTATTCCTTTAGAGTGCGCCAACCTGGAACTCGCCTCCGCCATCGTGTATCGGAACTAAACTCGCGAGTGTTTGAAACGAACATGATGTAGTTTTCAGGATACTTGATGCATACGCACACGTACTCGCATGCCAAGGTATAGATGCTCCCCGTATGAAGTCACGTGGCCTGATCGCACAGAAACATTTCGGTGGACAGCAGATTCGGAACGACCGCGCAGCGCCGTGAGAAAACGTCACTAAAATAAAAACCATAGGCAAGATATACGAAGCGTGGGTCATTCTACGGCATCAAACGTTTCTATTTGTCTTTCTCGGGCCTGGTACGTTAAAAAACCGGTGGCCACCCCGTATTCCGGTAAAGTCAACTCTGGGACGCAAATGCCGAACACGGTGATTACTCAAAATTTCGTGAGCAGACGCCTACGTTAGTTTGGTAATGAAATATATTTCGGGTCGGTTGTTGGTTTTGGCCACTTCAGATGCATACTGGCCCGACTATCCTATACATTGGAGATTTATCAATGCTTGCATCGGGAGCCCCGATTCGAGCGTATTGTTGGAACAACCATCGCCGCGTCCATAACCACTTGTCAGTTCTTCACATTGCGTTCGGAACGTTCACCCCTCAGGATTTGGGACAGTGTGAATTCGATTGTCTGAAGTGTTTTTAAGCAGGCCTCCAGGTCGCGTGTTTTACATGGGTGGCTCGGGCTGGGACTCCGTGCCGTAGTCGACGTCGACCGCAACTGATGAGCGTATACGGAGATATTACTCTGCGATTTCAAGAGAGCGCCCGGGTTAGGCTGTCGCTTCATAATGACAGTCTGTGCCGGAGAGGGCATGTTAGCCAGGCTAGACCTGCACGGGAACTCTTTAGAGTTAGGAGTAAATATGGAATGCGTAAAATCCTAAGATTGTGACTGTCTAATGACCTCGCGCTGCAAATGCGCATCACTAGTAATAGGGTGGGGATTCGGCAAAACCGATTGATCGTTGCAACGGTTCCCAACTCGAAAGTTCAAAAAATTCAACGAGCAGACTACCGCCATCACCTCCTTTACCTGGGGGAGCTCCCTAGCGGTCCCTAGATTGGGCACAATTGCGCGATGGAGGCCCCGGATGTTCTTGGAAACGCGTTGAACATCATTCATTTAATCTGGAAACTGTTAGACCTGGGCTACTATCCGTTTGTCAGTTCGGAAACTATATTTCATACAGGAATGAATAGGCCCCGATCACCTGCCCCG
68
+ >contig2
69
+ AGCACACCTTGCCCAGATGTGGCCGAAGCCGGGCGCCAGTAGACGGCACCCCTGTATACAGGTCCTCTGCGTCAACCAGGGAACCCCTCCATCGACCGGGGTCGCACACCTTTAATCATAAAAATTGTGTTGTGTAGCGCCTCTCTGGAAAGTAAAGTAACCGCTTTCCCCTGACCACGTGTGACGACCCCCCATTTGGCAAGAGCCCGAGGGACCCGCTGCGTATTATCTCCAACGTCGCAAGTCAGGCGCCTTATTCGGGTGCATCGATATGGCGTCCCCACTTTGAGGTTAGTAGCTTACCAAACCAGAGATACCGTCAAGACTATTCATTTCTCGGACACGTTTCATTTTGGACAGTGATGTGGGTCTGCAAATTCGGGTTTATTTACTTCCCACATGTGGACCTGTGCACTTCTTAGGTTGAAGCCAATGAACAAAGCCTCGTCACCGACGATGCTTACGTTTCTCACCTCGCGTTATTGATTGCCTTGTGCCTTGCGTAATCATAGTCGTCAATCAATGACTTAAGTTATTCCATCCAAATCCCGGGTCACCTCTTATGTAGACAGGAATCCCGCCAACTTTGTACAAAGGGGAACCGGCTAGGGACAATGGCCTCGATAGGAGCCTACCATCTGACTCTTTACAGGACGGTTGGCACGAACAGATGCGAATTTCTAATACCATGTTAATAGCCCTCTGGGGATCGCGCCGGCTACGTAATGGTCGCGGCTGGATGTTCCGGCTTAAAAGTTTGGCAACCTGTGGACTCAACGTCGCTTGTCAATCGGATCCCGGAAGCCCTATAACCTTTTGTCAGCTCTGAACTAACAGGCATGGTGGAGCTAAGCACTCATTAGCTTGCAATTATTACAGGATGATATGCCACAAGATCTACGAGTGATTTGAAGGCACACTCCCCGAGGTGGTACCGATGAAATAGGTGGTGCTATCGAAATATATTTCATTACCAAACTAACGTAGGCGTCTGCTCACGAAATTTTGAGTAATCACCGTGTTCGTGAGACCTTAAGGTCCGGGGCTACCAGAGCCGATGGATCGGAGCGCCCGAACAACGTGAAGGTGACTACGTTCTACCACGGTTGACTCGTACGTCGACTAGCAAATTCTTGGACTTTAGGGGCGAAAAAAGTTGGAATAATTCTTCTCGGCTTGGGTGATAGCTTTCCACCTAACCCTAAGGAGTGTAGTGCAATAGTGGATGTTATTCAGGTTGCAACTCTAGTTGGGCCCGGAACCATGCCGTAGAATGACCCACGCTTCGTATATCTTGCCTATGGTTTTTATTTTAGTGACGTTTTCTCGCTTTTACCCGGCTCGAAAAGCGTGGGGCCCGTTTAGACCGCGGGCGTCCGCTGGCCTATTGATCAGAGTTGTCTAGCGCGTCGAGGAGGGTGGCATGATTTACAGCTTCCGCCTGAGCAACGGACGGATGTGTCCGGGGTGTAACCCATCTATAAGCTATCGTCAGAGTTGTGATCCTAGTAGCTATTATAGCAAGGCTGTAGGGATCGTCGCGGGGACTGACCAATACGCACCCGTTCCCGAGGACGGGAATCCACCAGAGTTTGGGTCTTCTTTCGCCATTGGTTGCTACTGCCAACACTTCTGACCTGCGGGAGTCAGTAGGACCCCAGACTTAATAATCCGCCCACGGCGAAGGAGTATTCGTACACAATCAGAGGCGGCAGAGCGCTAGCGGGGCAGCGTCAGCGCCTGCGGCGCATTGAGTTGGCTGGCGACCAGAGGTTAAGCTTCGATGGTCGGCCCCTTGCAAGGCCCTTTAACAGTTTTAGGACACGCGTGCCTCAGCATTAGTAGTGTCACCTCCCGTCCACGTCCACCTTAGTGTTTTGCGCGCTGCCACGTAGTACTGCGCTTAGCTTCTTCTGGTGGAAACGGACCTGGATTACTAACAAAGATGGTGTAGCAAGACCCGCCAGGTCAATTCAATTTCGGCCGCATTCCACCGACGATTAGTTTGAAAGGGTTATAGGAGCCGAGATCTGCTCGGGGGGACTTACAGTCTACTTCATGATCCGTACGCAGCGGTAGGTATGGCGTTACAGAGTTGAATGAATTCTCAGCACCGATGAACGATG
70
+ '
71
+ end
72
+
73
+ it 'should output contig stats' do
74
+ data_path = File.join(TEST_DATA_DIR,'explore','1')
75
+ stats = nil
76
+ Dir.chdir(data_path) do
77
+ stats = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --no-progressbar --min-contig-length 0 --quiet --output-pathspec --output-contigs /dev/null --output-contig-stats /dev/stdout --fasta 2seqs.sammy.fa"#, :log => log
78
+ end
79
+ # +contig1 21.15370018975332
80
+ # +contig2 62.07488986784141
81
+ # +contig3 22.654135338345863
82
+ # +contig4 34.90963855421687
83
+ # ...
84
+ puts stats
85
+ stats.match(/name\tcoverage\ncontig1\t21.15370018975332\ncontig2\t62.07488986784141\ncontig3\t22.654135338345863\n/).nil?.should == false
86
+ end
87
+
88
+ it 'should bubbly assemble' do
89
+ trails = Bio::Commandeer.run "#{FINISHM_SCRIPT_PATH} assemble --bubbly --no-progressbar --quiet --output-contigs /dev/stdout --fasta-gz "+
90
+ "#{TEST_DATA_DIR}/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz"
91
+ # --output-pathspec
92
+ answer = <<EOF
93
+ >contig1
94
+ GACCCGCGTGAGCTCTTAAGTACCTAGACTTCACTCCCGGCTTTAGAGTAGTATCGGTACGGGACTCCCTGACCTAAGTT
95
+ AGGACGGCGTCCGCCGTAGAACGTGGATGATCTGAAGCATTTCGGAGCAGGCCCTAAGCGCGGCTCCTCTTCCATCGCAG
96
+ TCCAAACGCCATTTCAGTTAGTGCGCGCGAGCAGCTCCAGGAGAGCGCTCCGGTGAAACTATCCGTTTGTATACGTCTCC
97
+ TACCTGATTCCAGACCAACAAACGGTAACCCTAGCTAAGACTCGTTGCTGAGACCACATCAATCAAGGCGACAGGGAATA
98
+ GGAAGTTAGTCTCGAAATTAGCTACAGGGACGTACACCTATTACCGAGCCCACTCGCTAACTCTAACTAAGACTCCTGCG
99
+ CCACCGGCAAACATGTTGAACTTCTTATTTCGGCCGTCCAATTAAATCGTTCTTGGTGTGGCGACTACATTAAACATTAT
100
+ CACATGCCGGCATTTGTTCAACTTCTAGGGGCCCCGGAATTGGCCTGACCCTCTTAAGGCTGTTGCCGTACGATTGAATC
101
+ AGTAGTTGGTTGACGATAGCCCCCGGGGTACAGAGGCTCCTAGTCGCATTTATTTGAGTAACGCAGGCCCATGAAAGATG
102
+ GCCTTCGACAAAAAGTAATTAAGCACCGTATAATTTGTGGGTGATTTGCCGGAACATAACATTCCTACCTGACCGAGTCT
103
+ CTATTAACCAGCGGGCAGGTGACGAAATGCGCCACACTACTCCTCTGACAGCTGAGGATCACAGCCGAGATGTAAGTCTC
104
+ AGCAATAGGCAAAACCCGAATAGTTTTTGGGATATAGGAACTCCTTGCCTCTACCGTTTGCGCGCAGATGCCATAAGGTG
105
+ CACGCCATCTAACCGTCCTCGCCGTATCGTGTCTCTGGCGAGCTTTTAATGGCAACATAGAACTTACATTTATAACGCGC
106
+ ATAGGACCCATCATACCTGCCACTTTTGAAGGGGATCCACGTTTCCTAAATCAGAATACTGATTAATTAATTTACGGATG
107
+ TATACCCACAGTCTGTAGAAATAGAAAAGGTTGTAACGTGCATCCCGGCGACACTGGTAATGGTGCGTCTTGCTTCGGAG
108
+ GTTGATTTATTGTGCCCCCCGTCACACAGCATCCCTCCGACGTGTTAAGTTAGTAAGCAACATTAAAAAAAACTCACGCT
109
+ CGATCTTACGTATCTTCGAAGGACGGTATGGGATGGTAGGTCGCCTGCGGGGTACTGTGGCAACGCACGGATCTTAATAC
110
+ GGAGCTATCGCGTGTATACCGTACGGGAGAACTGCGGATTAGCTATAGCAAAAAACTACGGTGTCCCATTCGCCGTAAAG
111
+ TAACGCAGTAGGGAGGGGGGGCCGATCATGGTCCACAACCTAGGGCATGCCTCCTCGGTAAAACACTATCCCCAGAGTTG
112
+ TAAACTAGCCTGGGAGCAGGTACTTGAAGCACCTTGCATCTTTAACTTAGCTTACGATGGAGACTACCCACCCGATAATG
113
+ ACTGCTTTGGCGTGGACCTCTGGATGAAGAACGGACTTCCTTACCAGCGCGGAATTGTGGTTCATCTGGACCCAGTTCCG
114
+ ACCTGAGCATTCGGTTCGGCTAACGAATGTAAATCGGTCTGAAACAGTGATCCTCGCTGCCTATACGTTGACAGTGGTCA
115
+ AATACAACGTTAGTGGTTCTTTCTTACATCATTATCGTGCTAGCCCTGCGCTTCTCTTCCCAAATGGCTTCTAATCCTGT
116
+ ACCTGGAATCATTCTCTCAGCCCGGATCGAGTTCTGCCGGGATAACGCCGATTACGCATACTTACGTAAAAGACCAGAGG
117
+ ATTAGCACTGCGATAAGGCCGTAATGAAGGTAAGAGGGAAACAGATCATTTGGGTATATATCACAACTGTCGCCCCCTTT
118
+ AGCTGTGTAGTCATATGTCGCCAGTACGAAATATTATCCCAATGCAGTGTCCGGCCTTCGCATTTGTCACGGCATGCTTG
119
+ AAAGATAGCGGTGGGGTTGAGCAGCGTGGGCGCTCCTTCAGATCACCGCACTGTATGGCAGCTCGCTCCGGTAATAGTCT
120
+ GCACGAACCGACGCAACATCAGTATCGGGTACGATAGTTAAGAGATACGTGCAGTAGCATGCTCTCTTTGGCAGAGCGCC
121
+ CCCTCGAGCAATTTATGGCCCAAGACTCAACAGACAAGCATCGTGTGGCGATGAGTAACTATGCCAGACGAGTTGACTGG
122
+ TAGTTGTGACGCTCATTGTCTCAATCAATCGGTGGTCGTCACGGGACCAGACTGCCCCCATTTGGCTCGCTGTGAACCCG
123
+ AGACGTAATGTCAGGACTCTCTGGGGTCGTGGGAATCATCCAAGTACCCAACCCCCCGGAGTCATGGGCGCAACATCCGT
124
+ CTCAGAATGATTTTGAAGGAAGCTCTGGGAAGGTTCAATTAATGAACTGGTCGGCATAATTATTCATTCGTATCGGCTCG
125
+ ATCTGAGAGATGGATATGGAAAAATCAAGATAACGCCCATTTGACCGACCGTACCTTAAAGCAGACGCGAATTATAGCGA
126
+ GCGGTTATACCAGCAATAATTATGGGGGTAGACTTGTTCGTGTAGACAGCGTAATAATTCCGTCAGGTGCACTTTGCTGT
127
+ CTTAACCGGCCGGTAGTCGAGCGGACAGGTCCACCGCACTAGGTGGTGCTATTCGATGGTTTCATGCCACCACGCAAGGC
128
+ TAACACGGTCGCGCATATACATCCCGACAGACAAGAGTGGATGCTTCATTGATTGCTTGATAATCTCTTGTACAGGAGAT
129
+ TGTTGGCGTGTCATACCGATAAGATGCTGCCATACATAGGCAGACTGGGCCGGATTCTACAGTCGGTGGGCTGCAGCCGA
130
+ AGCTTAGTTGTGCAGGTCGTAGCGATAAGCGAGTCTTGTTTACGGTTAACCCCCGTCTCCTGCAACTATAACCGAGGGAA
131
+ TCGTGACTACCAAGAGCCCTATGAGATCCCTC
132
+ EOF
133
+ answer = answer.gsub(/([ATGC])\n/,'\1')+"\n"
134
+ trails.should == answer
135
+ end
136
+
137
+ it 'bubbly assembler should output pathspec' do
138
+ command = "#{FINISHM_SCRIPT_PATH} assemble --output-pathspec --bubbly --no-progressbar --quiet --output-contigs /dev/stdout --fasta-gz "+
139
+ "#{TEST_DATA_DIR}/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz"
140
+ puts command
141
+ trails = Bio::Commandeer.run command
142
+ answer = <<EOF
143
+ >contig1 1s,2s,3s
144
+ GACCCGCGTGAGCTCTTAAGTACCTAGACTTCACTCCCGGCTTTAGAGTAGTATCGGTACGGGACTCCCTGACCTAAGTT
145
+ AGGACGGCGTCCGCCGTAGAACGTGGATGATCTGAAGCATTTCGGAGCAGGCCCTAAGCGCGGCTCCTCTTCCATCGCAG
146
+ TCCAAACGCCATTTCAGTTAGTGCGCGCGAGCAGCTCCAGGAGAGCGCTCCGGTGAAACTATCCGTTTGTATACGTCTCC
147
+ TACCTGATTCCAGACCAACAAACGGTAACCCTAGCTAAGACTCGTTGCTGAGACCACATCAATCAAGGCGACAGGGAATA
148
+ GGAAGTTAGTCTCGAAATTAGCTACAGGGACGTACACCTATTACCGAGCCCACTCGCTAACTCTAACTAAGACTCCTGCG
149
+ CCACCGGCAAACATGTTGAACTTCTTATTTCGGCCGTCCAATTAAATCGTTCTTGGTGTGGCGACTACATTAAACATTAT
150
+ CACATGCCGGCATTTGTTCAACTTCTAGGGGCCCCGGAATTGGCCTGACCCTCTTAAGGCTGTTGCCGTACGATTGAATC
151
+ AGTAGTTGGTTGACGATAGCCCCCGGGGTACAGAGGCTCCTAGTCGCATTTATTTGAGTAACGCAGGCCCATGAAAGATG
152
+ GCCTTCGACAAAAAGTAATTAAGCACCGTATAATTTGTGGGTGATTTGCCGGAACATAACATTCCTACCTGACCGAGTCT
153
+ CTATTAACCAGCGGGCAGGTGACGAAATGCGCCACACTACTCCTCTGACAGCTGAGGATCACAGCCGAGATGTAAGTCTC
154
+ AGCAATAGGCAAAACCCGAATAGTTTTTGGGATATAGGAACTCCTTGCCTCTACCGTTTGCGCGCAGATGCCATAAGGTG
155
+ CACGCCATCTAACCGTCCTCGCCGTATCGTGTCTCTGGCGAGCTTTTAATGGCAACATAGAACTTACATTTATAACGCGC
156
+ ATAGGACCCATCATACCTGCCACTTTTGAAGGGGATCCACGTTTCCTAAATCAGAATACTGATTAATTAATTTACGGATG
157
+ TATACCCACAGTCTGTAGAAATAGAAAAGGTTGTAACGTGCATCCCGGCGACACTGGTAATGGTGCGTCTTGCTTCGGAG
158
+ GTTGATTTATTGTGCCCCCCGTCACACAGCATCCCTCCGACGTGTTAAGTTAGTAAGCAACATTAAAAAAAACTCACGCT
159
+ CGATCTTACGTATCTTCGAAGGACGGTATGGGATGGTAGGTCGCCTGCGGGGTACTGTGGCAACGCACGGATCTTAATAC
160
+ GGAGCTATCGCGTGTATACCGTACGGGAGAACTGCGGATTAGCTATAGCAAAAAACTACGGTGTCCCATTCGCCGTAAAG
161
+ TAACGCAGTAGGGAGGGGGGGCCGATCATGGTCCACAACCTAGGGCATGCCTCCTCGGTAAAACACTATCCCCAGAGTTG
162
+ TAAACTAGCCTGGGAGCAGGTACTTGAAGCACCTTGCATCTTTAACTTAGCTTACGATGGAGACTACCCACCCGATAATG
163
+ ACTGCTTTGGCGTGGACCTCTGGATGAAGAACGGACTTCCTTACCAGCGCGGAATTGTGGTTCATCTGGACCCAGTTCCG
164
+ ACCTGAGCATTCGGTTCGGCTAACGAATGTAAATCGGTCTGAAACAGTGATCCTCGCTGCCTATACGTTGACAGTGGTCA
165
+ AATACAACGTTAGTGGTTCTTTCTTACATCATTATCGTGCTAGCCCTGCGCTTCTCTTCCCAAATGGCTTCTAATCCTGT
166
+ ACCTGGAATCATTCTCTCAGCCCGGATCGAGTTCTGCCGGGATAACGCCGATTACGCATACTTACGTAAAAGACCAGAGG
167
+ ATTAGCACTGCGATAAGGCCGTAATGAAGGTAAGAGGGAAACAGATCATTTGGGTATATATCACAACTGTCGCCCCCTTT
168
+ AGCTGTGTAGTCATATGTCGCCAGTACGAAATATTATCCCAATGCAGTGTCCGGCCTTCGCATTTGTCACGGCATGCTTG
169
+ AAAGATAGCGGTGGGGTTGAGCAGCGTGGGCGCTCCTTCAGATCACCGCACTGTATGGCAGCTCGCTCCGGTAATAGTCT
170
+ GCACGAACCGACGCAACATCAGTATCGGGTACGATAGTTAAGAGATACGTGCAGTAGCATGCTCTCTTTGGCAGAGCGCC
171
+ CCCTCGAGCAATTTATGGCCCAAGACTCAACAGACAAGCATCGTGTGGCGATGAGTAACTATGCCAGACGAGTTGACTGG
172
+ TAGTTGTGACGCTCATTGTCTCAATCAATCGGTGGTCGTCACGGGACCAGACTGCCCCCATTTGGCTCGCTGTGAACCCG
173
+ AGACGTAATGTCAGGACTCTCTGGGGTCGTGGGAATCATCCAAGTACCCAACCCCCCGGAGTCATGGGCGCAACATCCGT
174
+ CTCAGAATGATTTTGAAGGAAGCTCTGGGAAGGTTCAATTAATGAACTGGTCGGCATAATTATTCATTCGTATCGGCTCG
175
+ ATCTGAGAGATGGATATGGAAAAATCAAGATAACGCCCATTTGACCGACCGTACCTTAAAGCAGACGCGAATTATAGCGA
176
+ GCGGTTATACCAGCAATAATTATGGGGGTAGACTTGTTCGTGTAGACAGCGTAATAATTCCGTCAGGTGCACTTTGCTGT
177
+ CTTAACCGGCCGGTAGTCGAGCGGACAGGTCCACCGCACTAGGTGGTGCTATTCGATGGTTTCATGCCACCACGCAAGGC
178
+ TAACACGGTCGCGCATATACATCCCGACAGACAAGAGTGGATGCTTCATTGATTGCTTGATAATCTCTTGTACAGGAGAT
179
+ TGTTGGCGTGTCATACCGATAAGATGCTGCCATACATAGGCAGACTGGGCCGGATTCTACAGTCGGTGGGCTGCAGCCGA
180
+ AGCTTAGTTGTGCAGGTCGTAGCGATAAGCGAGTCTTGTTTACGGTTAACCCCCGTCTCCTGCAACTATAACCGAGGGAA
181
+ TCGTGACTACCAAGAGCCCTATGAGATCCCTC
182
+ EOF
183
+ answer = answer.gsub(/([ATGC])\n/,'\1')+"\n"
184
+ trails.should == answer
185
+ end
186
+ end