finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,73 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'bio'
3
+
4
+ class DummyTrail
5
+ attr_accessor :sequence
6
+
7
+ def initialize(seq)
8
+ @sequence = seq
9
+ end
10
+
11
+ def self.trails(seqs)
12
+ seqs.collect{|s| DummyTrail.new(s)}
13
+ end
14
+ end
15
+
16
+ #Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('finishm'); Bio::Log::CLI.configure('finishm')
17
+
18
+ describe "kmer coverage path filter" do
19
+ it 'should rule things out when no appropriate kmers are present' do
20
+ kmers = Bio::KmerMultipleAbundanceHash.new
21
+ kmers['AAAA'] = [0, 0]
22
+ trail = DummyTrail.new('ATATATTTA')
23
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [1]*2)
24
+ paths.should == []
25
+ end
26
+
27
+ it 'should not rule things out when appropriate kmers are present' do
28
+ kmers = Bio::KmerMultipleAbundanceHash.new
29
+ kmers['ATATATTT'] = [1, 10]
30
+ kmers['TATATTTA'] = [3, 1]
31
+ trail = DummyTrail.new('ATATATTTA')
32
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [1]*2)
33
+ paths.collect{|t| t.sequence}.should == %w(ATATATTTA)
34
+ end
35
+
36
+ it 'should not accept different thresholds for different timepoints' do
37
+ kmers = Bio::KmerMultipleAbundanceHash.new
38
+ kmers['ATATATTT'] = [1, 10]
39
+ kmers['TATATTTA'] = [1, 3]
40
+ trail = DummyTrail.new('ATATATTTA')
41
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [2]*2)
42
+ paths.should == []
43
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [0,2])
44
+ paths.collect{|t| t.sequence}.should == %w(ATATATTTA)
45
+ end
46
+
47
+ it 'should rule some things out in a figure 8 style graph' do
48
+ kmers = Bio::KmerMultipleAbundanceHash.new
49
+ kmers['ATA'] = [1, 10]
50
+ kmers['AGA'] = [1, 10]
51
+ kmers['TAG'] = [1, 10]
52
+ kmers['GAT'] = [1, 10]
53
+ kmers['ATC'] = [1, 10]
54
+ kmers['AGC'] = [1, 10]
55
+ trails = DummyTrail.trails %w(ATATC ATAGC AGATC AGAGC)
56
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter(trails, kmers, [1]*2)
57
+ paths.collect{|t| t.sequence}.should == %w(ATAGC AGATC)
58
+ end
59
+
60
+ it 'should respect exclusion of filtering at the ends' do
61
+ kmers = Bio::KmerMultipleAbundanceHash.new
62
+ kmers['ATATATTT'] = [1, 10]
63
+ kmers['TATATTTA'] = [3, 1]
64
+ trail = DummyTrail.new('GATATATTTAC')
65
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [1]*2)
66
+ paths.collect{|t| t.sequence}.should == %w()
67
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [1]*2, :exclude_ending_length => 1)
68
+ paths.collect{|t| t.sequence}.should == %w(GATATATTTAC)
69
+ trail = DummyTrail.new('GATATATTTAGC')
70
+ paths = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new.filter([trail], kmers, [1]*2, :exclude_ending_length => 1)
71
+ paths.collect{|t| t.sequence}.should == %w()
72
+ end
73
+ end
@@ -0,0 +1,38 @@
1
+ require 'rspec'
2
+ require 'pp'
3
+ require 'systemu'
4
+
5
+ # To run this test:
6
+ # $ rspec /path/to/test_script_being_tested.rb
7
+
8
+ # Assumes that the name of the file being tested is ../something.rb relative to the directory containing this test scripts, and the name of this tes script is test_something.rb
9
+ $:.unshift File.join(File.dirname(__FILE__),'..')
10
+ script_under_test = File.basename(__FILE__).gsub(/_spec.rb$/,'.rb')
11
+ path_to_script = File.join(File.dirname(__FILE__),'..','bin',script_under_test)
12
+
13
+
14
+ describe script_under_test do
15
+ it 'should scripting test ok' do
16
+ inputs = <<EOF
17
+ AAAAAAAAAATCAACCAAAATGTTCCATCACTTGACAATTTTTGGTTATCGGTTGTATACCTCAAGATTTGCTACTTTGTCAGAAGAGGGGAAAAAAAAGC 36 0 0 18 29 37 32 0 0 41
18
+ AAAAAAAAAGACGCCCGTTCACCGGGCGCCTCTTGTTGTGAAATGAATTATCTCCGATGGTTCGTTATCTGATCGCCTCTTCGGTCTCGGCATCGAAGATG 0 0 0 0 0 0 0 0 0 0
19
+ AAAAAAAAAGATCTCAGCCTCTAAGAAACGAAAACATGGTCTCTCTCAATCCATAACGTCGCCTGTTGATCGCGTTCTGTTACTTCAAAGAACCTTCGGCA 0 0 0 0 0 0 0 0 0 0
20
+ AAAAAAAAATCAACCAAAATGTTCCATCACTTGACAATTTTTGGTTATCGGTTGTATACCTCAAGATTTGCTACTTTGTCAGAAGAGGGGAAAAAAAAGCC 2121 0 0 1416 1790 2229 2115 0 1 2399
21
+ AAAAAAAAATCAACCAAAATGTTCCATCACTTGACAATTTTTGGTTATCGGTTGTATACCTCAAGATTTGCTACTTTGTCAGAAGAGGGGAAAAAAAGCCA 68 0 0 49 60 94 80 0 0 86
22
+ AAAAAAAACAATAGGTTTGTACTCCCGTTAGAGGTGCTGCCACCACGGTGGTTGCTGAGAACACACTCTTGGAACCTGATTCGGATAATGCCGACGAAGGA 0 0 0 0 0 0 0 0 0 0
23
+ AAAAAAAACCCAGCAACGGTTACATACGCTCAGAGCAGAAGAAAAAAAAGCATCGCGCTCACTAACGGGTCTTCAACGGCAGCGTCACTCCATCACGCTGT 0 0 0 0 0 0 0 0 0 0
24
+ AAAAAAAACCCGAATCAGATGCTGGTACTCAAGGTCTTAAAGACCGTTGTCCTATCAGCAGATAAAAAATCCCATATCAAGCAGAGGATAGAGCACCTCAA 2 0 0 0 0 0 0 0 0 1
25
+ AAAAAAAAGACGCCCGTTCACCGGGCGCCTCTTGTTGTGAAATGAATTATCTCCGATGGTTCGTTATCTGATCGCCTCTTCGGTCTCGGCATCGAAGATGT 0 0 0 0 0 0 0 0 0 0
26
+ AAAAAAAAGAGAATAGTGATCAAGTTTGATAGTACATCGACGTTTTAGCTGATAGCTTCCTCCAGAAGCTGTCTGCCATAAACCTGCGCATATTCACCGTT 0 0 0 0 0 0 0 0 0 0
27
+ EOF
28
+
29
+ status, stdout, stderr = systemu "#{path_to_script} --threshold 1000 -", 'stdin' => inputs
30
+ raise stderr if stderr != ''
31
+ stderr.should eq("")
32
+ status.exitstatus.should eq(0), "testing #{path_to_script}"
33
+
34
+ expected = (1..632).collect{|s| "#{s}\t"}.join("\n")+
35
+ "\n633\t1\n"
36
+ stdout.should eq(expected)
37
+ end
38
+ end
@@ -0,0 +1,120 @@
1
+ require 'rspec'
2
+ require 'pp'
3
+ require 'systemu'
4
+ require 'tempfile'
5
+
6
+ # To run this test:
7
+ # $ rspec /path/to/test_script_being_tested.rb
8
+
9
+ # Assumes that the name of the file being tested is ../something.rb, and the name of this script is test_something.rb
10
+ $:.unshift File.join(File.dirname(__FILE__),'..')
11
+ script_under_test = File.basename(__FILE__).gsub(/_spec/,'')
12
+ def assert_equal(e,o); o.should eq(e); end
13
+ path_to_script = File.join(File.dirname(__FILE__),'..','bin',script_under_test)
14
+
15
+
16
+
17
+ describe script_under_test do
18
+ it 'should single file test' do
19
+ Tempfile.open('spec') do |temp1|
20
+ temp1.puts 'AAA 1'
21
+ temp1.puts 'AAT 2'
22
+ temp1.close
23
+
24
+ status, stdout, stderr = systemu "#{path_to_script} #{temp1.path}"
25
+ stderr.should eq("")
26
+ status.exitstatus.should eq(0)
27
+ stdout.should eq(["\t#{File.basename temp1.path}",
28
+ "AAA\t1",
29
+ "AAT\t2"].join("\n")+"\n")
30
+ end
31
+ end
32
+
33
+ it 'should two file test' do
34
+ Tempfile.open('spec') do |temp1|
35
+ temp1.puts 'AAA 1'
36
+ temp1.puts 'AAT 2'
37
+ temp1.close
38
+
39
+ Tempfile.open('spec') do |temp2|
40
+ temp2.puts 'AAA 1'
41
+ temp2.puts 'ATA 3'
42
+ temp2.close
43
+
44
+ status, stdout, stderr = systemu "#{path_to_script} #{temp1.path} #{temp2.path}"
45
+ stderr.should eq("")
46
+ status.exitstatus.should eq(0)
47
+ stdout.should eq(["\t#{File.basename temp1.path}\t#{File.basename temp2.path}",
48
+ "AAA\t1\t1",
49
+ "AAT\t2\t0",
50
+ "ATA\t0\t3"].join("\n")+"\n")
51
+ end
52
+ end
53
+ end
54
+
55
+
56
+ it 'should two file test as percentage' do
57
+ Tempfile.open('spec') do |temp1|
58
+ temp1.puts 'AAA 1'
59
+ temp1.puts 'AAT 3'
60
+ temp1.close
61
+
62
+ Tempfile.open('spec') do |temp2|
63
+ temp2.puts 'AAA 1'
64
+ temp2.puts 'ATA 4'
65
+ temp2.close
66
+
67
+ status, stdout, stderr = systemu "#{path_to_script} --percentage --trace error #{temp1.path} #{temp2.path}"
68
+ stderr.should eq("")
69
+ status.exitstatus.should eq(0)
70
+ stdout.should eq(["\t#{File.basename temp1.path}\t#{File.basename temp2.path}",
71
+ "AAA\t0.25\t0.2",
72
+ "AAT\t0.75\t0",
73
+ "ATA\t0\t0.8"].join("\n")+"\n")
74
+ end
75
+ end
76
+ end
77
+
78
+ it 'should cutoff kmers with overly low abundances' do
79
+ Tempfile.open('spec') do |temp1|
80
+ temp1.puts 'AAA 1'
81
+ temp1.puts 'AAT 2'
82
+ temp1.close
83
+
84
+ Tempfile.open('spec') do |temp2|
85
+ temp2.puts 'AAT 1'
86
+ temp2.puts 'ATA 3'
87
+ temp2.close
88
+
89
+ status, stdout, stderr = systemu "#{path_to_script} --trace error --min-count 2 #{temp1.path} #{temp2.path}"
90
+ raise stderr unless stderr.nil? or stderr==''
91
+ status.exitstatus.should eq(0)
92
+ stdout.should eq(["\t#{File.basename temp1.path}\t#{File.basename temp2.path}",
93
+ "AAT\t2\t1",
94
+ "ATA\t0\t3"].join("\n")+"\n")
95
+ end
96
+ end
97
+ end
98
+
99
+ it 'should two file test as percentage with min count' do
100
+ Tempfile.open('spec') do |temp1|
101
+ temp1.puts 'AAA 1'
102
+ temp1.puts 'AAT 3'
103
+ temp1.close
104
+
105
+ Tempfile.open('spec') do |temp2|
106
+ temp2.puts 'AAT 1'
107
+ temp2.puts 'ATA 4'
108
+ temp2.close
109
+
110
+ status, stdout, stderr = systemu "#{path_to_script} --percentage --min-count 2 --trace error #{temp1.path} #{temp2.path}"
111
+ stderr.should eq("")
112
+ status.exitstatus.should eq(0)
113
+ stdout.should eq(["\t#{File.basename temp1.path}\t#{File.basename temp2.path}",
114
+ "AAT\t0.75\t0.2",
115
+ "ATA\t0\t0.8"].join("\n")+"\n")
116
+ end
117
+ end
118
+ end
119
+ end
120
+
@@ -0,0 +1,221 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'bio'
3
+ require 'tempfile'
4
+
5
+ #Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('finishm'); Bio::Log::CLI.configure('finishm'); Bio::Log::CLI.configure('bio-velvet')
6
+
7
+ class String
8
+ def revcom
9
+ Bio::Sequence::NA.new(self).reverse_complement.to_s.upcase
10
+ end
11
+ end
12
+
13
+ describe "OrientedNodeTrail" do
14
+ it "should be able to store a sequence of oriented nodes" do
15
+ graph = Bio::Velvet::Graph.parse_from_file File.expand_path("#{TEST_DATA_DIR}/velvet_test_trails/Assem/LastGraph")
16
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
17
+
18
+ trail.to_a.length.should == 0
19
+ trail.add_node graph.nodes[1], :start_is_first
20
+ trail.to_a.length.should == 1
21
+ trail.to_a[0].node.should == graph.nodes[1]
22
+ trail.to_a[0].first_side.should == :start_is_first
23
+
24
+ trail.add_node graph.nodes[2], :start_is_first
25
+ trail.add_node graph.nodes[4], :end_is_first
26
+ trail.to_a.length.should == 3
27
+ trail.to_a[2].node.should == graph.nodes[4]
28
+ trail.to_a[2].first_side.should == :end_is_first
29
+
30
+ expect {trail.add_node graph.nodes[4], :no_side}.to raise_error
31
+ end
32
+
33
+ it 'should get the sequence of no nodes' do
34
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
35
+ trail.sequence.should == ''
36
+ end
37
+
38
+ it 'should get the sequence of one node' do
39
+ graph = Bio::Velvet::Graph.parse_from_file File.expand_path("#{TEST_DATA_DIR}/velvet_test_trails/Assem/LastGraph")
40
+
41
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
42
+ trail.add_node graph.nodes[1], :start_is_first
43
+ trail.sequence.should == graph.nodes[1].sequence
44
+ trail.sequence
45
+
46
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
47
+ trail.add_node graph.nodes[1], :end_is_first
48
+ trail.sequence.should == graph.nodes[1].sequence.revcom
49
+ end
50
+
51
+
52
+ it 'should get the sequence of three nodes' do
53
+ graph = Bio::Velvet::Graph.parse_from_file File.expand_path("#{TEST_DATA_DIR}/velvet_test_trails/Assem/LastGraph")
54
+
55
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
56
+ trail.add_node graph.nodes[1], :start_is_first
57
+ trail.add_node graph.nodes[2], :start_is_first
58
+ trail.add_node graph.nodes[4], :end_is_first
59
+ exp = 'CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
60
+ TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
61
+ ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
62
+ CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
63
+ GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
64
+ CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
65
+ ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
66
+ GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
67
+ ACTATGCTGGTATTTCACTTCCAGGTACAGG'.gsub(/\n/,'')
68
+ trail.sequence.should == exp
69
+ end
70
+
71
+ it 'should not do sequence right when there is not enough info' do
72
+ graph = Bio::Velvet::Graph.parse_from_file File.expand_path("#{TEST_DATA_DIR}/velvet_test_trails/Assem/LastGraph")
73
+
74
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
75
+ trail.add_node graph.nodes[2], :start_is_first
76
+ expect {trail.sequence}.to raise_error
77
+ end
78
+
79
+ # # Below are two things I was testing, which I think might be a problem with velvet, but not sure.
80
+ # it 'should not have the bug I found any more full' do
81
+ # # This graph is somewhat abbreviated and not a full and complete LastGraph file, but should be ok
82
+ # lastgraph = <<EOF
83
+ #5 2805 43 1
84
+ #NODE 1 71 0 0 13651 13188
85
+ #ATAGATTATTTTTATTTTTCAGAGAATTTACAGAAAGATCAGTTAAAATCCAGAGCAAGAAAAGCATTGCA
86
+ #AAATTCTCTGAAAAATAAAAATAATCTATGCTGCTCGGTTTGACAAATTTTATCCCGTAAACTCCCTTTTT
87
+ #NODE 2 47 0 0 9848 9462
88
+ #GAAATTAAAAGAAGCTAAAGAGATTCAAAAATCGATCGATAACAAGA
89
+ #ATTTCTGCAATGCTTTTCTTGCTCTGGATTTTAACTGATCTTTCTGT
90
+ #NODE 3 11 0 0 2338 2246
91
+ #AACAGCTTCCA
92
+ #AGCTTCTTTTA
93
+ #NODE 4 2 0 0 396 378
94
+ #TA
95
+ #CA
96
+ #NODE 5 36 0 0 7023 6808
97
+ #TCATCAACAAGCTCAGCTTTGGATTTATCGAATTCT
98
+ #AGGAGTTTACGGGATAAAATTTGTCAAACCGAGCAG
99
+ #ARC 1 2 202
100
+ #EOF
101
+ # lastgraph.gsub!(/ +/,"\t")
102
+ # Tempfile.open('spec') do |f|
103
+ # f.puts lastgraph
104
+ # f.close
105
+ #
106
+ # graph = Bio::Velvet::Graph.parse_from_file f.path
107
+ # graph.nodes.length.should == 5
108
+ # trail = Bio::Velvet::Graph::OrientedNodeTrail.new
109
+ # trail.add_node graph.nodes[3], :end_is_first
110
+ # trail.add_node graph.nodes[2], :end_is_first
111
+ # trail.add_node graph.nodes[1], :end_is_first
112
+ # trail.add_node graph.nodes[4], :start_is_first
113
+ # trail.add_node graph.nodes[5], :start_is_first
114
+ # trail.sequence.should == 'TGGAAGCTGTTTCTTGTTATCGATCGATTTTTGAATCTCTTTAGCTTCTTTTAATTTCTGCAATGCTTTTCTTGCTCTGGATTTTAACTGATCTTTCTGTAAATTCTCTGAAAAATAAAAATAATCTAT GCTGCTCGGTTTGACAAATTTTATCCCGTAAACTCCTTTTTTATCATCAACAAGCTCAGCTTTGGATTTATCGAATTCT'
115
+ # end
116
+ # end
117
+ #
118
+ # it 'should not have the bug I found any more cut down version' do
119
+ # # This graph is somewhat abbreviated and not a full and complete LastGraph file, but should be ok
120
+ # lastgraph = <<EOF
121
+ #416 2705 43 1
122
+ #NODE 62 71 13651 13188 0 0
123
+ #ATAGATTATTTTTATTTTTCAGAGAATTTACAGAAAGATCAGTTAAAATCCAGAGCAAGAAAAGCATTGCA
124
+ #AAATTCTCTGAAAAATAAAAATAATCTATGCTGCTCGGTTTGACAAATTTTATCCCGTAAACTCCCTTTTT
125
+ #NODE 165 2 396 378 0 0
126
+ #TA
127
+ #CA
128
+ #ARC -62 165 198
129
+ #EOF
130
+ # lastgraph.gsub!(/ +/,"\t")
131
+ # Tempfile.open('spec') do |f|
132
+ # f.puts lastgraph
133
+ # f.close
134
+ #
135
+ # graph = Bio::Velvet::Graph.parse_from_file f.path
136
+ # graph.nodes.length.should == 2
137
+ # trail = Bio::Velvet::Graph::OrientedNodeTrail.new
138
+ # trail.add_node graph.nodes[62], :end_is_first
139
+ # trail.add_node graph.nodes[165], :start_is_first
140
+ # trail.sequence.should == 'TGCAATGCTTTTCTTGCTCTGGATTTTAACTGATCTTTCTGTAAATTCTCTGAAAAATAAAAATAATCTAT GCTGCTCGGTTTGACAAATTTTATCCCGTAAACTCCTTTTTTA'
141
+ # end
142
+ # end
143
+
144
+ it 'should give only 1 direction when entering a 2 node loop' do
145
+ graph = GraphTesting.emit([
146
+ [1,2],
147
+ [2,4],
148
+ [4,2],
149
+ ])
150
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
151
+ trail.add_node graph.nodes[1], :start_is_first
152
+ trail.add_node graph.nodes[2], :start_is_first
153
+ trail.neighbours_of_last_node(graph).collect{|n| [n.node.node_id, n.first_side]}.should == [[4,:start_is_first]]
154
+ end
155
+
156
+ it 'should to_shorthand' do
157
+ graph = GraphTesting.emit([
158
+ [1,2],
159
+ [2,4],
160
+ [4,2],
161
+ ])
162
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
163
+ trail.to_shorthand.should == ''
164
+ trail.add_node graph.nodes[1], :start_is_first
165
+ trail.to_shorthand.should == '1s'
166
+ trail.add_node graph.nodes[2], :end_is_first
167
+ trail.to_shorthand.should == '1s,2e'
168
+ end
169
+
170
+ it 'should be able to parse super-shorthand form easy' do
171
+ graph = GraphTesting.emit([
172
+ [1,2],
173
+ [2,3],
174
+ [2,4],
175
+ [4,2],
176
+ ])
177
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('2,3', graph).to_shorthand.should == '2s,3s'
178
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('1,2,3', graph).to_shorthand.should == '1s,2s,3s'
179
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('3,2,1', graph).to_shorthand.should == '3e,2e,1e'
180
+
181
+ expect {
182
+ #one node only not enough
183
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('3', graph).to_shorthand
184
+ }.to raise_error
185
+ expect {
186
+ # 2,4 have confusing connections
187
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('2,4', graph).to_shorthand
188
+ }.to raise_error
189
+ expect {
190
+ # 1,4 not directly connected
191
+ Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('1,4', graph).to_shorthand
192
+ }.to raise_error
193
+
194
+ end
195
+
196
+ it 'should calculate coverage' do
197
+ graph = GraphTesting.emit([
198
+ [1,2],
199
+ [2,3],
200
+ ])
201
+ path = Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand('1,2,3', graph)
202
+ path.coverage.should == 0.5
203
+ graph.nodes[2].coverages = [10]
204
+ (path.coverage*100).round.should == (20.0 / 3 / 10 *100).round
205
+ end
206
+
207
+ it 'should give the right neighbours when neighbours are the s and e of the same node' do
208
+ graph, initial_path, terminal = GraphTesting.emit_ss([
209
+ [1,2],
210
+ ],1,1)
211
+ arc = Bio::Velvet::Graph::Arc.new
212
+ arc.begin_node_id = 1
213
+ arc.end_node_id = 2
214
+ arc.begin_node_direction = true
215
+ arc.end_node_direction = false
216
+ graph.arcs.push arc
217
+ graph.neighbours_of(graph.nodes[1], :start_is_first).collect{|n| n.to_shorthand}.should == [
218
+ '2s','2e'
219
+ ]
220
+ end
221
+ end