finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'c_probe_node_finder' do
4
+ it 'should find set of probe nodes' do
5
+ Dir.mktmpdir do |tmpdir|
6
+ probes = [
7
+ 'AGAGTTTGATCATGGCTCAGGATGAACGCTAGCGGCAGGCCTAACACATGCAAGTCGAGGGGTAGAGGCTTTCGGGCCTTGAGACCGGCGCACGGGTGC',
8
+ 'ACATGCAAGTCGAGGGGTAGAGGCTTTCGGGCCTTGAGACCGGCGCACGGGTGCGTAACGCGTATGCAATCTGCCTTGTACTAAGGGATAGCCCAGAGA',
9
+ ]
10
+ read_inputs = Bio::FinishM::ReadInput.new
11
+ read_inputs.fasta_singles_gz = [
12
+ File.join(File.dirname(__FILE__),'data','gapfilling','3','reads.fa.gz')
13
+ ]
14
+ probed_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probes, read_inputs, {
15
+ :assembly_coverage_cutoff => 0,
16
+ :velvet_kmer_size => 31,
17
+ #:output_assembly_path => '/tmp/v',
18
+ :output_assembly_path => tmpdir,
19
+ })
20
+
21
+ finder = Bio::FinishM::CProbeNodeFinder.new
22
+ read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(TEST_DATA_DIR,'c_probe_node_finder','1','LastGraph')
23
+ finder.find_probe_nodes(read_probing_graph, [4,5,6]).should == [1]
24
+ finder.find_probe_nodes(read_probing_graph, [515,135]).should == [3,4]
25
+ end
26
+ end
27
+
28
+ it 'should return the same as the Ruby node_finder' do
29
+ Dir.mktmpdir do |tmpdir|
30
+ probes = [
31
+ 'AGAGTTTGATCATGGCTCAGGATGAACGCTAGCGGCAGGCCTAACACATGCAAGTCGAGGGGTAGAGGCTTTCGGGCCTTGAGACCGGCGCACGGGTGC',
32
+ 'ACATGCAAGTCGAGGGGTAGAGGCTTTCGGGCCTTGAGACCGGCGCACGGGTGCGTAACGCGTATGCAATCTGCCTTGTACTAAGGGATAGCCCAGAGA',
33
+ ]
34
+ read_inputs = Bio::FinishM::ReadInput.new
35
+ read_inputs.fasta_singles_gz = [
36
+ File.join(File.dirname(__FILE__),'data','gapfilling','3','reads.fa.gz')
37
+ ]
38
+ # First assembly run is fine
39
+ probed_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probes, read_inputs, {
40
+ :assembly_coverage_cutoff => 0,
41
+ :velvet_kmer_size => 31,
42
+ :output_assembly_path => tmpdir,
43
+ })
44
+
45
+ finder = Bio::FinishM::CProbeNodeFinder.new
46
+ read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(TEST_DATA_DIR,'c_probe_node_finder','1','LastGraph')
47
+ probes = finder.find_probes(read_probing_graph, [1,544])
48
+ probes[0][0].node_id.should == 1
49
+ probes[1][0].node_id.should == 2
50
+ probes[0][1].should == true
51
+ probes[1][1].should == false
52
+ probes[0][2].read_id.should == 1
53
+ probes[1][2].read_id.should == 544
54
+
55
+ finder.find_probes(read_probing_graph, []).should == []
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,284 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ class GraphTesting
4
+ def self.create_connection(first_second,distance=10)
5
+ doing_first = true
6
+ probes = first_second.collect do |namer|
7
+ if namer.kind_of?(Fixnum)
8
+ probe = Bio::FinishM::ConnectionInterpreter::Probe.new
9
+ probe.sequence_index = namer
10
+ if doing_first
11
+ probe.side = :end
12
+ doing_first = false
13
+ else
14
+ probe.side = :start
15
+ end
16
+ probe #'return'
17
+
18
+ elsif matches = namer.match(/^(.+)([se])$/)
19
+ probe = Bio::FinishM::ConnectionInterpreter::Probe.new
20
+ probe.sequence_index = matches[1].to_i
21
+ probe.side = matches[2] == 's' ? :start : :end
22
+ probe #'return'
23
+ else
24
+ raise namer
25
+ end
26
+ end
27
+ conn = Bio::FinishM::ConnectionInterpreter::Connection.new
28
+ conn.probe1 = probes[0]
29
+ conn.probe2 = probes[1]
30
+ conn.distance = distance
31
+ return conn
32
+ end
33
+
34
+ def self.create_connections(array_of_conn_strings)
35
+ conns = array_of_conn_strings.collect{|a| create_connection a}
36
+ seqs = {}
37
+ conns.each_with_index do |conn, i|
38
+ [conn.probe1, conn.probe2].each_with_index do |probe, j|
39
+ seqs[probe.sequence_index] ||= (['A']*10).join
40
+ end
41
+ end
42
+ return conns, seqs
43
+ end
44
+ end
45
+
46
+ describe "ConnectionInterpreter" do
47
+ it 'should find doubly_single_contig_connections hello world' do
48
+ conns, seqs = GraphTesting.create_connections([
49
+ %w(1s 3e)
50
+ ])
51
+ Bio::FinishM::ConnectionInterpreter.new(
52
+ conns, seqs
53
+ ).doubly_single_contig_connections.collect{|c| c.to_s}.should == [
54
+ '1s/3e:10'
55
+ ]
56
+ end
57
+
58
+ it 'should find inter-contig connections in a loop' do
59
+ conns, seqs = GraphTesting.create_connections([
60
+ [1,2],
61
+ [2,3],
62
+ [3,1],
63
+ ])
64
+ Bio::FinishM::ConnectionInterpreter.new(
65
+ conns, seqs.keys
66
+ ).doubly_single_contig_connections.collect{|c| c.to_s}.sort.should == [
67
+ '1e/2s:10',
68
+ '2e/3s:10',
69
+ '3e/1s:10',
70
+ ].sort
71
+ end
72
+
73
+ it 'should not include all connections in inter-contig connections' do
74
+ conns, seqs = GraphTesting.create_connections([
75
+ [1,2],
76
+ [2,3],
77
+ [3,4],
78
+ [3,5],
79
+ ])
80
+ Bio::FinishM::ConnectionInterpreter.new(
81
+ conns, seqs.keys
82
+ ).doubly_single_contig_connections.collect{|c| c.to_s}.sort.should == [
83
+ '1e/2s:10',
84
+ '2e/3s:10',
85
+ ].sort
86
+ end
87
+
88
+ it 'should be able to handle loops where there is only one contig' do
89
+ conns, seqs = GraphTesting.create_connections([
90
+ [1,1],
91
+ ])
92
+ Bio::FinishM::ConnectionInterpreter.new(
93
+ conns, seqs.keys
94
+ ).doubly_single_contig_connections.collect{|c| c.to_s}.sort.should == [
95
+ '1e/1s:10',
96
+ ].sort
97
+ end
98
+
99
+ it 'should scaffold hello world' do
100
+ conns, seqs = GraphTesting.create_connections([
101
+ [1,2],
102
+ ])
103
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
104
+ conns, seqs.keys
105
+ )
106
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
107
+ observed.should be_kind_of(Array)
108
+ observed.length.should == 1
109
+ o = observed[0]
110
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
111
+ o.contigs.collect{|c| c.sequence_index}.should == [1,2]
112
+ o.contigs.collect{|c| c.direction}.should == [true, true]
113
+ o.gap_lengths.should === [10]
114
+ o.sequence(seqs).should == 'AAAAAAAAAANNNNNNNNNNAAAAAAAAAA'
115
+ observed.collect{|o| o.circular?}.uniq.should == [false]
116
+ end
117
+
118
+ it 'should scaffold 3 contigs together' do
119
+ conns, seqs = GraphTesting.create_connections([
120
+ [1,2],
121
+ [2,3],
122
+ ])
123
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
124
+ conns, seqs.keys
125
+ )
126
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
127
+ observed.should be_kind_of(Array)
128
+ observed.length.should == 1
129
+ o = observed[0]
130
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
131
+ o.sequence(seqs).should == 'AAAAAAAAAANNNNNNNNNNAAAAAAAAAANNNNNNNNNNAAAAAAAAAA'
132
+ o.contigs.collect{|c| c.sequence_index}.should == [1,2,3]
133
+ o.contigs.collect{|c| c.direction}.should == [true, true, true]
134
+ observed.collect{|o| o.circular?}.uniq.should == [false]
135
+ end
136
+
137
+ it 'should scaffold two separate scaffolds and a leftover' do
138
+ conns, seqs = GraphTesting.create_connections([
139
+ [1,2],
140
+ [3,4],
141
+ ])
142
+ seqs[99] = 'ATGC'
143
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
144
+ conns, seqs.keys
145
+ )
146
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
147
+
148
+ observed.should be_kind_of(Array)
149
+ observed.length.should == 3
150
+ o = observed[0]
151
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
152
+ o.sequence(seqs).should == 'AAAAAAAAAANNNNNNNNNNAAAAAAAAAA'
153
+ o.contigs.collect{|c| c.sequence_index}.should == [1,2]
154
+ o.contigs.collect{|c| c.direction}.should == [true, true]
155
+ o = observed[1]
156
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
157
+ o.sequence(seqs).should == 'AAAAAAAAAANNNNNNNNNNAAAAAAAAAA'
158
+ o.contigs.collect{|c| c.sequence_index}.should == [3,4]
159
+ o.contigs.collect{|c| c.direction}.should == [true, true]
160
+ o = observed[2]
161
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
162
+ o.contigs[0].sequence_index.should == 99
163
+ o.sequence(seqs).should == 'ATGC'
164
+ observed.collect{|o| o.circular?}.uniq.should == [false]
165
+ end
166
+
167
+ it 'should scaffold single contig circular scaffolds' do
168
+ conns, seqs = GraphTesting.create_connections([
169
+ [1,1],
170
+ ])
171
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
172
+ conns, seqs.keys
173
+ )
174
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
175
+
176
+ observed.should be_kind_of(Array)
177
+ observed.length.should == 1
178
+ o = observed[0]
179
+ o.should be_kind_of(Bio::FinishM::ConnectionInterpreter::Scaffold)
180
+ o.sequence(seqs).should == 'AAAAAAAAAA'
181
+ o.contigs.collect{|c| c.sequence_index}.should == [1]
182
+ o.contigs.collect{|c| c.direction}.should == [true]
183
+ o.circular?.should == true
184
+ end
185
+
186
+ it 'should scaffold multi-contig circular scaffolds' do
187
+ conns, seqs = GraphTesting.create_connections([
188
+ [1,2],
189
+ [2,3],
190
+ [3,1],
191
+ [9,10],
192
+ ])
193
+ seqs[87] = 'ATGC'
194
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
195
+ conns, seqs.keys
196
+ )
197
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
198
+
199
+ observed.length.should == 3
200
+ observed.collect{|o| o.circular?}.should == [true, false, false]
201
+ observed[0].contigs.collect{|c| c.sequence_index}.should == [3,1,2]
202
+ end
203
+
204
+ it 'should respect the given distance given in the Connection' do
205
+ conns, seqs = GraphTesting.create_connections([
206
+ %w(1s 3e)
207
+ ])
208
+ conns[0].distance = 5
209
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
210
+ conns, seqs.keys
211
+ )
212
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
213
+ observed.length.should == 1
214
+ observed[0].gap_lengths.should == [5]
215
+ end
216
+
217
+ it 'should be able to handle reverse scaffolding' do
218
+ conns, seqs = GraphTesting.create_connections([
219
+ [1,2],
220
+ ])
221
+ conns[0].probe1.side = :start #reverse both contigs
222
+ conns[0].probe2.side = :end
223
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
224
+ conns, seqs.keys
225
+ )
226
+ observed = interpreter.scaffolds(interpreter.doubly_single_contig_connections)
227
+ observed.length.should == 1
228
+ observed[0].sequence(seqs).should == 'TTTTTTTTTTNNNNNNNNNNTTTTTTTTTT'
229
+ end
230
+
231
+ it 'should report unconnected probes' do
232
+ conns, seqs = GraphTesting.create_connections([
233
+ [1,2],
234
+ ])
235
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
236
+ conns, [1,2]
237
+ )
238
+ interpreter.unconnected_probes.collect{|pro| pro.to_settable}.should == [
239
+ [1, :start],
240
+ [2, :end],
241
+ ]
242
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
243
+ conns, [1,2,3]
244
+ )
245
+ interpreter.unconnected_probes.collect{|pro| pro.to_settable}.should == [
246
+ [1, :start],
247
+ [2, :end],
248
+ [3, :start],
249
+ [3, :end]
250
+ ]
251
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
252
+ conns, [1,2,4]
253
+ )
254
+ interpreter.unconnected_probes.collect{|pro| pro.to_settable}.should == [
255
+ [1, :start],
256
+ [2, :end],
257
+ [4, :start],
258
+ [4, :end]
259
+ ]
260
+ end
261
+
262
+ it 'should report unconnected sequences' do
263
+ conns, seqs = GraphTesting.create_connections([
264
+ [1,2],
265
+ ])
266
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
267
+ conns, [1,2]
268
+ )
269
+ interpreter.unconnected_sequences.should == [
270
+ ]
271
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
272
+ conns, [1,2,3]
273
+ )
274
+ interpreter.unconnected_sequences.should == [
275
+ 3
276
+ ]
277
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(
278
+ conns, [1,2,4,5,6]
279
+ )
280
+ interpreter.unconnected_sequences.should == [
281
+ 4,5,6
282
+ ]
283
+ end
284
+ end
@@ -0,0 +1,291 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ContigPrinter" do
4
+ describe 'sequences_to_variants_conservative' do
5
+ it 'should handle a multi-variant' do
6
+ seqs = [
7
+ 'ATGAATATGTGCATAGGATT',
8
+ 'ATGAATCGATGCAGTTGATT',
9
+ # ### ###
10
+ #01234567890123456789
11
+ ]
12
+ printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
13
+ ref, variants = printer.send(:sequences_to_variants_conservative, seqs)
14
+
15
+ ref.should == 'ATGAATNNNTGCANNNGATT'
16
+ variants.collect{|v| v.to_shorthand
17
+ }.sort.should == [
18
+ '7S:ATG',
19
+ '7S:CGA',
20
+ '14S:TAG',
21
+ '14S:GTT',
22
+ ].sort
23
+ end
24
+
25
+ it 'should handle a semi-redundant multi-variant' do
26
+ seqs = [
27
+ 'ATGAATATGTGCATAGGATT',
28
+ 'ATGAATCGATGCAGTTGATT',
29
+ 'ATGAATCGATGCATAGGATT',
30
+ # ### ###
31
+ #01234567890123456789
32
+ ]
33
+ printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
34
+ ref, variants = printer.send(:sequences_to_variants_conservative, seqs)
35
+
36
+ ref.should == 'ATGAATNNNTGCANNNGATT'
37
+ variants.collect{|v| v.to_shorthand
38
+ }.sort.should == [
39
+ '7S:ATG',
40
+ '7S:CGA',
41
+ '14S:TAG',
42
+ '14S:GTT',
43
+ ].sort
44
+ end
45
+
46
+ it 'should handle not variants' do
47
+ seqs = [
48
+ 'ATGAATATGTGCATAGGATT',
49
+ ]
50
+ printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
51
+ ref, variants = printer.send(:sequences_to_variants_conservative, seqs)
52
+
53
+ ref.should == 'ATGAATATGTGCATAGGATT'
54
+ variants.collect{|v| v.to_shorthand
55
+ }.sort.should == [].sort
56
+ end
57
+
58
+ it 'should handle gaps' do
59
+ seqs = [
60
+ 'ATTCTGAACGTAAGCATTATATGAATATGTGCATAGGATTTATTGGATCAGTGGCACGTA',
61
+ 'ATTCTGAACGTAAGCATTATATGAATATGTGCAGTTGATTTATTGGATCAGTGGCACGTA',
62
+ 'ATTCTGAACGTAAGCATTATATGAATCGATGAGTT GATTTATTGGATCAGTGGCACGTA',
63
+ # ### - !!!
64
+ #1234567890123456789012345678901234567890123456..........
65
+ # 1 2 3 4
66
+ ]
67
+ printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
68
+ ref, variants = printer.send(:sequences_to_variants_conservative, seqs)
69
+
70
+ ref.should == 'ATTCTGAACGTAAGCATTATATGAATNNNTGNNNNNGATTTATTGGATCAGTGGCACGTA'
71
+ variants.collect{|v| v.to_shorthand
72
+ }.sort.should == [
73
+ '27S:ATG',
74
+ '27S:CGA',
75
+ '32S:CATAG',
76
+ '32S:CAGTT',
77
+ '32S:AGTT',
78
+ '36D:1',
79
+ ].sort
80
+ end
81
+ end
82
+
83
+
84
+
85
+ # it 'should work with 3 variants' do
86
+ # graph, paths = GraphTesting.emit_paths([
87
+ # [1,2,3],
88
+ # [1,4,3],
89
+ # [1,5,3],
90
+ # [1,6,3],
91
+ # ])
92
+ # printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
93
+ # conn = printer.two_contigs_and_connection_to_printable_connection(paths)
94
+ # conn.comparable.should == GraphTesting.emit_printer_connection(graph,
95
+ # [1,2,3], [
96
+ # [1,3,4],
97
+ # [1,3,5],
98
+ # [1,3,6],
99
+ # ])
100
+ # end
101
+
102
+ # it 'should work with a >1 node variant' do
103
+ # graph, paths = GraphTesting.emit_paths([
104
+ # [1,2,3],
105
+ # [1,4,5,6,3],
106
+ # ])
107
+ # printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
108
+ # conn = printer.two_contigs_and_connection_to_printable_connection(paths)
109
+ # conn.comparable.should == GraphTesting.emit_printer_connection(graph,
110
+ # [1,2,3], [[1,3,4,5,6]]
111
+ # )
112
+ # end
113
+
114
+ # it 'should work with 2 bubbles' do
115
+ # graph, paths = GraphTesting.emit_paths([
116
+ # [1,2,3,4,5],
117
+ # [1,6,3,7,5],
118
+ # ])
119
+ # printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
120
+ # conn = printer.two_contigs_and_connection_to_printable_connection(paths)
121
+ # conn.comparable.should == GraphTesting.emit_printer_connection(graph,
122
+ # [1,2,3,4,5], [[1,3,6],[3,5,7]]
123
+ # )
124
+ # end
125
+
126
+ # it 'should work with 2 overlapping bubbles' do
127
+ # graph, paths = GraphTesting.emit_paths([
128
+ # [1,2,3,4,5],
129
+ # [1,6,3,7,5],
130
+ # [1,2,3,7,5],
131
+ # ])
132
+ # printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
133
+ # conn = printer.two_contigs_and_connection_to_printable_connection(paths)
134
+ # conn.comparable.should == GraphTesting.emit_printer_connection(graph,
135
+ # [1,2,3,4,5], [[1,3,6],[3,5,7]]
136
+ # )
137
+ # end
138
+ # end
139
+
140
+ describe 'one_connection_between_two_contigs first' do
141
+ it 'should work with a path with two nodes both in the same direction' do
142
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
143
+ graph.nodes.length.should == 13
144
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
145
+ acon.start_probe_noded_read = graph.nodes[9].short_reads.select{|nr| nr.read_id == 161}[0] #Found these by using bwa and inspecting the Sequence velvet file
146
+ acon.end_probe_noded_read = graph.nodes[4].short_reads.select{|nr| nr.read_id == 1045}[0]
147
+ acon.start_probe_contig_offset = 0
148
+ acon.end_probe_contig_offset = 0
149
+ acon.paths = [
150
+ GraphTesting.make_onodes(graph, %w(9s 12s 7e 13s 5e 11e 2s 10s 4e))
151
+ ]
152
+ expected = '12345'+
153
+ File.open(File.join TEST_DATA_DIR, 'contig_printer','1','seq2_1to550.fa').readlines[1].strip.gsub(/..$/,'') +
154
+ '67890'
155
+ observed = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.one_connection_between_two_contigs(
156
+ graph,'12345',acon,'67890',[]
157
+ )
158
+ observed.should == expected
159
+ end
160
+
161
+ it 'should handle reads not starting right at the end of the contig' do
162
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
163
+ graph.nodes.length.should == 13
164
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
165
+ acon.start_probe_noded_read = graph.nodes[9].short_reads.select{|nr| nr.read_id == 161}[0] #Found these by using bwa and inspecting the Sequence velvet file
166
+ acon.end_probe_noded_read = graph.nodes[4].short_reads.select{|nr| nr.read_id == 1045}[0]
167
+ acon.start_probe_contig_offset = 1
168
+ acon.end_probe_contig_offset = 2
169
+ acon.paths = [
170
+ GraphTesting.make_onodes(graph, %w(9s 12s 7e 13s 5e 11e 2s 10s 4e))
171
+ ]
172
+ expected = '1234'+
173
+ File.open(File.join TEST_DATA_DIR, 'contig_printer','1','seq2_1to550.fa').readlines[1].strip.gsub(/..$/,'') +
174
+ '890'
175
+ observed = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.one_connection_between_two_contigs(
176
+ graph,'12345',acon,'67890',[]
177
+ )
178
+ observed.should == expected
179
+ end
180
+
181
+ it 'should reverse reads starting from reverse onodes to start the process' do
182
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
183
+ graph.nodes.length.should == 13
184
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
185
+ # #<Bio::Velvet::Graph::NodedRead:0x0000000293fd08 @direction=false, @offset_from_start_of_node=2, @read_id=289, @start_coord=0>,
186
+ acon.start_probe_noded_read = graph.nodes[7].short_reads.select{|nr| nr.read_id == 289}[0] #Found these by using bwa and inspecting the Sequence velvet file
187
+ acon.end_probe_noded_read = graph.nodes[4].short_reads.select{|nr| nr.read_id == 1045}[0]
188
+ acon.start_probe_contig_offset = 0
189
+ acon.end_probe_contig_offset = 0
190
+ acon.paths = [
191
+ GraphTesting.make_onodes(graph, %w(7e 13s 5e 11e 2s 10s 4e))
192
+ ]
193
+ expected = '12345'+
194
+ 'TAATACCGTATAATGACTTCGGTCCAAAGATTTATCGCCCAGGGATGAGCCCGCGTAGGATTAGCTTGTTGGTGAGGTAAAGGCTCACCAAGGCGACGATCCTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACATGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCCGGGATGATAATGACAGTACCGGGAGAATAAGCCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTTAGAGGTGAAAGCCCGGGGCTCAACTCCGGAATT' +
195
+ '67890'
196
+ observed = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.one_connection_between_two_contigs(
197
+ graph,'12345',acon,'67890',[]
198
+ )
199
+ observed.should == expected
200
+ end
201
+
202
+ it 'should reverse reads starting from reverse onodes to start the process' do
203
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
204
+ graph.nodes.length.should == 13
205
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
206
+ # #<Bio::Velvet::Graph::NodedRead:0x0000000293fd08 @direction=false, @offset_from_start_of_node=2, @read_id=289, @start_coord=0>,
207
+ acon.start_probe_noded_read = graph.nodes[7].short_reads.select{|nr| nr.read_id == 289}[0] #Found these by using bwa and inspecting the Sequence velvet file
208
+ # #<Bio::Velvet::Graph::NodedRead:0x00000002763ef8 @direction=false, @offset_from_start_of_node=3, @read_id=800, @start_coord=0>,
209
+ acon.end_probe_noded_read = graph.nodes[10].short_reads.select{|nr| nr.read_id == 800}[0]
210
+ acon.start_probe_contig_offset = 0
211
+ acon.end_probe_contig_offset = 0
212
+ acon.paths = [
213
+ GraphTesting.make_onodes(graph, %w(7e 13s 5e 11e 2s 10s))
214
+ ]
215
+ expected = '12345'+
216
+ 'TAATACCGTATAATGACTTCGGTCCAAAGATTTATCGCCCAGGGATGAGCCCGCGTAGGATTAGCTTGTTGGTGAGGTAAAGGCTCACCAAGGCGACGATCCTTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACATGGCCCAGACTCCTACGGGAGGCAGCAG' +
217
+ '67890'
218
+ observed = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.one_connection_between_two_contigs(
219
+ graph,'12345',acon,'67890',[]
220
+ )
221
+ observed.should == expected
222
+ end
223
+ end
224
+
225
+ describe 'ready_two_contigs_and_connections' do
226
+ it 'should handle a single SNP' do
227
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
228
+ graph.nodes.length.should == 13
229
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
230
+ acon.start_probe_noded_read = graph.nodes[9].short_reads.select{|nr| nr.read_id == 161}[0] #Found these by using bwa and inspecting the Sequence velvet file
231
+ acon.end_probe_noded_read = graph.nodes[4].short_reads.select{|nr| nr.read_id == 1045}[0]
232
+ acon.start_probe_contig_offset = 2
233
+ acon.end_probe_contig_offset = 3
234
+ acon.paths = [
235
+ GraphTesting.make_onodes(graph, %w(9s 12s 7e 13s 5e 11e 2s 10s 4e)),#highest coverage
236
+ GraphTesting.make_onodes(graph, %w(9s 12s 7e 13s 5e 1e 2e 10s 4e)),
237
+ ]
238
+ expected =
239
+ 'ATGAACGAACGCTGGCGGCATGCCTAACACATGCAAGTCGAACGAGACCTTCGGGTCTAGTGGCGCACGGGTGCGTAACGCGTGGGAATCTGCCCTTGGGTACGG'+
240
+ 'AATAACAGTTAGAAATGACTGCTAATACCGTATAATGACTTCGGTCCAAAGATTTATCGCCCAGGGATGAGCCCGCGTAGGATTAGCTTGTTGGTGAGGTAAANN'+
241
+ 'NTNNCNNANNNNNNNNNNNNTNNNNNGNNNNNNNNNNNGNTNAGNNNCNNNGNNNNNGNGANNTGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGC'+
242
+ 'GAAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCCGGGATGATAATGACAGTACCGGGAGAATAAGCCCCGGCTAACTCCGTG'+
243
+ 'CCAGCAGCCGCGGTAATACGGAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTGTAAGTTAGAGGTGAAAGCCCGGGGCTCAACTCCGGAATTCA'
244
+ observed_sequence, observed_variants = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.ready_two_contigs_and_connections(
245
+ graph,'ATGCA',acon,'ATGCA',[]
246
+ )
247
+ observed_sequence.should == expected
248
+ observed_variants.collect{|v|v.to_shorthand}.sort.should == [
249
+ "210S:GGC", "214S:CA", "217S:CA", "220S:GGCGACGATCCT", "233S:AGCTG", "239S:TCTGAGAGGAT", "251S:A", "253S:C", "256S:CCA", "260S:ACT", "264S:GGACT", "270S:A", "273S:CA",
250
+ "210S:TTT", "214S:AC", "217S:TC", "220S:CCAACAAGCTAA", "233S:CCTAC", "239S:CGCTCAGACCA", "251S:C", "253S:A", "256S:GAT", "260S:GTC", "264S:CCTTG", "270S:T", "273S:GC",
251
+ ].sort
252
+ end
253
+
254
+ it 'should handle when start_coord is not == 0 and both reads are inwards facing' do
255
+ graph = Bio::Velvet::Graph.parse_from_file(File.join TEST_DATA_DIR, 'contig_printer','1','seq.fa.velvet','LastGraph')
256
+ graph.nodes.length.should == 13
257
+ acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
258
+ acon.start_probe_noded_read = graph.nodes[9].short_reads.select{|nr| nr.read_id == 161}[0] #Found these by using bwa and inspecting the Sequence velvet file
259
+ acon.end_probe_noded_read = graph.nodes[4].short_reads.select{|nr| nr.read_id == 1045}[0]
260
+ acon.start_probe_contig_offset = 0
261
+ acon.end_probe_contig_offset = 0
262
+
263
+ # introduce badness
264
+ acon.start_probe_noded_read.start_coord = 3
265
+ acon.end_probe_noded_read.start_coord = 4
266
+ reads = {
267
+ 161 => 'A'*100,
268
+ 1045 => 'C'*100,
269
+ }
270
+
271
+ acon.paths = [
272
+ GraphTesting.make_onodes(graph, %w(9s 12s 7e 13s 5e 11e 2s 10s 4e))
273
+ ]
274
+ expected = '12345'+'AAA'+
275
+ File.open(File.join TEST_DATA_DIR, 'contig_printer','1','seq2_1to550.fa').readlines[1].strip.gsub(/..$/,'') +
276
+ 'CCCC'+'67890'
277
+ observed = Bio::AssemblyGraphAlgorithms::ContigPrinter.new.one_connection_between_two_contigs(
278
+ graph,'12345',acon,'67890', reads
279
+ )
280
+ observed.should == expected
281
+ end
282
+
283
+ it 'should handle when start_coord is not == 0 and both reads are outwards facing' do
284
+ raise
285
+ end
286
+
287
+ it 'should handle when the example path is not the same length as the reference path' do
288
+ fail
289
+ end
290
+ end
291
+ end