finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,38 @@
1
+ class Bio::FinishM::CProbeNodeFinder
2
+ include Bio::FinishM::Logging
3
+
4
+ # Return an array of [best_node, best_noded_read] that represent the probes in the graph
5
+ def find_probes(velvet_underground_graph, probe_read_ids)
6
+ # First use the C method to extract the set of interesting nodes
7
+ log.debug "Extracting target nodes using the C method.." if log.debug?
8
+ target_node_ids = find_probe_nodes(velvet_underground_graph, probe_read_ids)
9
+
10
+ # Then iterate over just those nodes we know are interesting
11
+ log.debug "Extracting from only those #{target_node_ids.length} nodes that are interesting.." if log.debug?
12
+ target_node_ids_set = Set.new target_node_ids
13
+ return Bio::AssemblyGraphAlgorithms::NodeFinder.new.find_unique_nodes_with_sequence_ids(
14
+ velvet_underground_graph, probe_read_ids, :target_node_ids => target_node_ids_set
15
+ )
16
+ end
17
+
18
+ # Return a minimal Array of node IDs that contain all probe read IDs
19
+ def find_probe_nodes(velvet_underground_graph, probe_read_ids)
20
+ @bindings ||= Bio::FinishM::VelvetCBinding.new
21
+
22
+ c_probe_read_ids = FFI::MemoryPointer.new(:int32, probe_read_ids.length)
23
+ c_probe_read_ids.write_array_of_int32(probe_read_ids)
24
+
25
+ probe_nodes = @bindings.extract_best_probe_reads(
26
+ velvet_underground_graph.internal_graph_struct,
27
+ c_probe_read_ids,
28
+ probe_read_ids.length)
29
+ probe_nodes2 = probe_nodes.read_array_of_int32(probe_read_ids.length).collect{|n| n.abs}.uniq
30
+
31
+ #clean up
32
+ c_probe_read_ids.free
33
+
34
+ return probe_nodes2
35
+ end
36
+ end
37
+
38
+
@@ -0,0 +1,350 @@
1
+ require 'yargraph'
2
+
3
+
4
+ class Bio::FinishM::ConnectionInterpreter
5
+ include Bio::FinishM::Logging
6
+
7
+ # connections is an Enumerable of Probe object , sequences is a hash of name => DNA string
8
+ def initialize(connections, sequence_ids)
9
+ @graph = Yargraph::UndirectedGraph.new
10
+ @circular_probes = []
11
+ @sequence_ids = sequence_ids
12
+
13
+ # Setup hash of setable to original
14
+ # Assume there is only 1 connection between two contig ends
15
+ @connection_hash = {}
16
+ connections.each do |conn|
17
+ key = conn.to_settable
18
+ raise "Duplicate connections not handled (yet?), found #{conn} => #{key}" if @connection_hash.key?(key)
19
+ @connection_hash[key] = conn
20
+ end
21
+
22
+ # Add connections
23
+ connections.each do |conn|
24
+ if conn.probe1.to_settable == conn.probe2.to_settable
25
+ @circular_probes.push con..probe1
26
+ else
27
+ @graph.add_edge conn.probe1.to_settable, conn.probe2.to_settable
28
+ end
29
+ end
30
+
31
+ log.debug "Created a graph with #{@graph.vertices.to_a.length} vertices and #{@graph.edges.length} edges" if log.debug?
32
+ end
33
+
34
+ def connections
35
+ @connection_hash.values
36
+ end
37
+
38
+ # Return sequences that exclusively connect the start to the end. In particular,
39
+ # return an Array of sequence names
40
+ def circular_sequences
41
+ to_return = []
42
+ connections.each do |conn|
43
+ if conn.probe1.sequence_index == conn.probe2.sequence_index and
44
+ conn.probe1.side != conn.probe2.side and
45
+ @graph.edges[conn.probe1.to_settable].length == 1 and
46
+ @graph.edges[conn.probe2.to_settable].length == 1
47
+
48
+ to_return.push conn.probe1.sequence_index
49
+ end
50
+ end
51
+ return to_return
52
+ end
53
+
54
+
55
+ # Return an Array of Connection objects that represent edges where
56
+ # there is only a single connection from both side
57
+ def doubly_single_contig_connections
58
+ likelies = []
59
+
60
+ already_seen_connections = Set.new
61
+
62
+ @graph.vertices.each do |v|
63
+ # If there is only 1 connection on both sides, then go with that
64
+ neighbours = @graph.neighbours(v)
65
+ log.debug "Testing connection between #{v} and #{neighbours}"
66
+ if neighbours.length == 1 and @graph.neighbours(neighbours[0]).length == 1
67
+ log.debug "Connection passed the doubly-test" if log.debug?
68
+ neighbour = neighbours[0]
69
+
70
+ conn = Connection.new
71
+ conn.probe1 = Probe.new(v)
72
+ conn.probe2 = Probe.new(neighbour)
73
+ settable = conn.to_settable
74
+ # Record the connection unless it is duplicate
75
+ unless already_seen_connections.include?(settable)
76
+ likelies.push @connection_hash[settable]
77
+ already_seen_connections << settable
78
+ end
79
+ end
80
+ end
81
+
82
+ return likelies
83
+ end
84
+
85
+ # Single linkage cluster the likely_inter_contig_connections
86
+ # and the start to ends for each of the contigs. Assumes
87
+ def scaffolds(contig_connections)
88
+ # It is like an (easy)
89
+ # assembly problem because each vertex can only be connected to
90
+ # two others - 1 intra-contig and 1 inter-contig (unless it is circular)
91
+ likelies_edge_set = Yargraph::UndirectedGraph::EdgeSet.new
92
+ contig_connections.each do |conn|
93
+ likelies_edge_set.add_edge conn.probe1.to_settable, conn.probe2.to_settable
94
+ end
95
+
96
+ scaffolded_paths = []
97
+ circular_single_contigs = Set.new
98
+
99
+ # while there is more elements in the likelies set,
100
+ # 'pop' an arbitrary edge out of the graph
101
+ while starting_edge = likelies_edge_set.pop
102
+ log.debug "starting to scaffold from #{starting_edge}" if log.debug?
103
+
104
+ # Ignore likelies that are circular
105
+ if starting_edge[0][0] == starting_edge[1][0]
106
+ log.debug "Not scaffolding contig #{starting_edge[0][0] } since it appears to be circular" if log.debug?
107
+ circular_single_contigs << starting_edge[0][0]
108
+ next
109
+ end
110
+
111
+ circular = false
112
+
113
+ # go 'left'. Connect the other side of the left.
114
+ lefts = [Probe.new(starting_edge[0])]
115
+ rights = [Probe.new(starting_edge[1])]
116
+ log.debug "rights was #{rights[0].to_s}" if log.debug?
117
+ # while there is another node to the left
118
+ while next_probe = likelies_edge_set[lefts[-1].companion.to_settable].to_a[0]
119
+ next_probe_probe = Probe.new(next_probe)
120
+ companion = lefts[-1].companion
121
+
122
+ likelies_edge_set.delete next_probe, companion.to_settable
123
+ if next_probe_probe.companion.to_settable == rights[0].to_settable
124
+ log.debug "Found multi-contig circularity between #{next_probe_probe.companion} and #{rights[0] }" if log.debug?
125
+ circular = true
126
+ break
127
+ end
128
+
129
+ lefts.push companion
130
+ lefts.push next_probe_probe
131
+ log.debug "Adding node to the left: #{next_probe} and companion #{companion}" if log.debug?
132
+ end
133
+ # and go right
134
+ while next_probe = likelies_edge_set[rights[-1].companion.to_settable].to_a[0]
135
+ companion = rights[-1].companion
136
+ rights.push companion
137
+ rights.push Probe.new(next_probe)
138
+ log.debug "Adding node to the right: #{next_probe} and companion #{companion}" if log.debug?
139
+ likelies_edge_set.delete next_probe, companion.to_settable
140
+ end
141
+
142
+ # Add the left and the right together into one path
143
+ scaffolded_paths.push(
144
+ PossiblyCircularArray.new(
145
+ [lefts[-1].companion]+
146
+ lefts.reverse+
147
+ rights+
148
+ [rights[-1].companion],
149
+ circular)
150
+ )
151
+ end
152
+ if log.debug?
153
+ log.debug "Found #{scaffolded_paths.length} multi-contig scaffold(s):"
154
+ scaffolded_paths.each do |path|
155
+ log.debug "Scaffold: #{path.collect{|e| e.to_s}.join(', ') }"
156
+ end
157
+ end
158
+
159
+ # for each scaffolded set, create new scaffold object
160
+ scaffolds = []
161
+ scaffolded_contigs = Set.new
162
+ scaffolded_paths.each do |path|
163
+ raise if path.length % 2 != 0
164
+ scaffold = Scaffold.new
165
+ scaffold.circular = path.circular
166
+ previous_probe = nil
167
+ path.each_with_index do |probe, i|
168
+ if i % 2 == 1
169
+ previous_probe = probe
170
+ next
171
+ end
172
+ contig = UnscaffoldedContig.new
173
+ contig.sequence_index = probe.sequence_index
174
+ if probe.side == :start
175
+ contig.direction = true
176
+ else
177
+ contig.direction = false
178
+ end
179
+ scaffold.contigs ||= []
180
+ unless scaffold.contigs.empty?
181
+ dummy_conn = Connection.new
182
+ dummy_conn.probe1 = previous_probe
183
+ dummy_conn.probe2 = probe
184
+ original_connection = @connection_hash[dummy_conn.to_settable]
185
+ scaffold.gap_lengths.push original_connection.distance
186
+ end
187
+ scaffold.contigs.push contig
188
+ scaffolded_contigs << probe.sequence_index
189
+ end
190
+ scaffolds.push scaffold
191
+ end
192
+
193
+ # for each contig that is not in a contig, add as singleton
194
+ @sequence_ids.each do |i|
195
+ unless scaffolded_contigs.include?(i)
196
+ scaff = Scaffold.new
197
+ contig = UnscaffoldedContig.new
198
+ contig.sequence_index = i
199
+ contig.direction = true
200
+ scaff.contigs = [contig]
201
+ if circular_single_contigs.include?(i)
202
+ scaff.circular = true
203
+ else
204
+ scaff.circular = false
205
+ end
206
+ scaffolds.push scaff
207
+ end
208
+ end
209
+
210
+ return scaffolds
211
+ end
212
+
213
+ # Assuming the sequence_ids given in the initialize
214
+ # are the same as the sequence_index
215
+ def unconnected_probes
216
+ observed_connections = Set.new
217
+ connections.each do |conn|
218
+ observed_connections << conn.probe1.to_settable
219
+ observed_connections << conn.probe2.to_settable
220
+ end
221
+ to_return = []
222
+ @sequence_ids.each do |index|
223
+ [:start, :end].each do |side|
224
+ probe = Probe.new
225
+ probe.sequence_index = index
226
+ probe.side = side
227
+ unless observed_connections.include?(probe.to_settable)
228
+ to_return.push probe
229
+ end
230
+ end
231
+ end
232
+ return to_return
233
+ end
234
+
235
+ # Return an Array of sequence indices that did not have any connections
236
+ # to any others.
237
+ def unconnected_sequences
238
+ observed_sequences = Set.new
239
+ connections.each do |conn|
240
+ observed_sequences << conn.probe1.sequence_index
241
+ observed_sequences << conn.probe2.sequence_index
242
+ end
243
+ return @sequence_ids.to_a - observed_sequences.to_a
244
+ end
245
+
246
+ class Connection
247
+ # Probe objects
248
+ attr_accessor :probe1, :probe2
249
+
250
+ attr_accessor :distance
251
+
252
+ def to_s
253
+ [@probe1, @probe2].join('/')+":#{@distance}"
254
+ end
255
+
256
+ def to_settable
257
+ if @probe1.sequence_index < @probe2.sequence_index
258
+ return [@probe1.to_settable, @probe2.to_settable].flatten
259
+ elsif @probe1.sequence_index == @probe2.sequence_index
260
+ if @probe1.side < @probe2.side
261
+ return [@probe1.to_settable, @probe2.to_settable].flatten
262
+ else
263
+ return [@probe2.to_settable, @probe1.to_settable].flatten
264
+ end
265
+ else
266
+ return [@probe2.to_settable, @probe1.to_settable].flatten
267
+ end
268
+ end
269
+ end
270
+
271
+ class Probe
272
+ attr_accessor :side #:start or :end
273
+ attr_accessor :sequence_index #ID of the underlying sequence as an Integer
274
+
275
+ def initialize(settable_representation=nil)
276
+ unless settable_representation.nil?
277
+ @sequence_index = settable_representation[0]
278
+ @side = settable_representation[1]
279
+ end
280
+ end
281
+
282
+ def to_settable
283
+ [@sequence_index, @side]
284
+ end
285
+
286
+ def to_s
287
+ side = @side == :start ? 's' : 'e'
288
+ "#{@sequence_index}#{side}"
289
+ end
290
+
291
+ # Return a probe representing the other side of the contig
292
+ def companion
293
+ companion = Probe.new
294
+ companion.sequence_index = @sequence_index
295
+ companion.side = @side == :start ? :end : :start
296
+ return companion
297
+ end
298
+ end
299
+
300
+ class Scaffold
301
+ attr_accessor :contigs, :gap_lengths
302
+
303
+ attr_accessor :circular
304
+ def circular?
305
+ @circular
306
+ end
307
+
308
+ def initialize
309
+ @contigs = []
310
+ @gap_lengths = []
311
+ end
312
+
313
+ def sequence(sequence_id_to_nucleotides_hash)
314
+ raise "Programming error" unless @contigs.length == @gap_lengths.length + 1
315
+ parts = []
316
+
317
+ add_sequence_of = lambda do |contig|
318
+ seq = sequence_id_to_nucleotides_hash[contig.sequence_index]
319
+ if contig.direction == true
320
+ parts.push seq
321
+ elsif contig.direction == false
322
+ parts.push Bio::Sequence::NA.new(seq).reverse_complement.to_s.upcase
323
+ else
324
+ raise "Programming error"
325
+ end
326
+ end
327
+
328
+ add_sequence_of.call @contigs[0]
329
+
330
+ @gap_lengths.each_with_index do |gap_length, i|
331
+ parts.push 'N'*gap_length
332
+ add_sequence_of.call @contigs[i+1]
333
+ end
334
+ return parts.join('')
335
+ end
336
+ end
337
+
338
+ class UnscaffoldedContig
339
+ attr_accessor :sequence_index, :direction
340
+ end
341
+
342
+ class PossiblyCircularArray < Array
343
+ attr_accessor :circular
344
+
345
+ def initialize(array, circular)
346
+ @circular = circular
347
+ super(array)
348
+ end
349
+ end
350
+ end
@@ -0,0 +1,400 @@
1
+ require 'bio'
2
+
3
+ class Bio::Velvet::Graph::NodedRead
4
+ def adjusted_position(parent_node)
5
+ if @direction == true
6
+ return @offset_from_start_of_node
7
+ elsif @direction == false
8
+ return parent_node.length - @offset_from_start_of_node
9
+ else
10
+ raise "programming error"
11
+ end
12
+ end
13
+ end
14
+
15
+ module Bio
16
+ module AssemblyGraphAlgorithms
17
+ class ContigPrinter
18
+ include Bio::FinishM::Logging
19
+
20
+ class AnchoredConnection
21
+ # The identifiers of the probe reads in the velvet assembly graph
22
+ attr_accessor :start_probe_noded_read, :end_probe_noded_read
23
+
24
+ # number of nucleotides between the start of the start probe read and the start of the end of the contig
25
+ attr_accessor :start_probe_contig_offset
26
+
27
+ # number of nucleotides until the end of the end probe read in the start of the second contig
28
+ attr_accessor :end_probe_contig_offset
29
+
30
+ # Length of the start and end probe sequences
31
+ attr_accessor :start_probe_read_length
32
+ attr_accessor :end_probe_read_length
33
+
34
+ # Enumerable of Enumerables of OrientedNode objects, each list of OrientedNode objects
35
+ # corresponds to a path that forms the connection
36
+ attr_accessor :paths
37
+ end
38
+
39
+ # Given two contigs, return a consensus path and variants of the path.
40
+ #
41
+ # ----------> <-------- start and end probes (ends of probe sequences may not form part of final path). Directions not variable.
42
+ # --------------------->NNNN-------------------> original sequence to be gapfilled (contig1, NNNN, contig2). Directions not variable
43
+ # ----------- -------> path across the gap. Direction not variable
44
+ # \ /
45
+ # --------------
46
+ # ---------->|<-----|----->|---------> nodes that make up the path (directions and boundaries variable)
47
+ # stage1| stage2 |stage3 stages of sequence construction in this method
48
+ # Much like one_connection_between_two_contigs except can handle multiple connections
49
+ # (but cannot handle 0 connections)
50
+ def ready_two_contigs_and_connections(graph, contig1, anchored_connection, contig2, sequences)
51
+ to_return = ''
52
+ variants = []
53
+
54
+ log.debug "Working with anchored_connection: #{anchored_connection.inspect}" if log.debug?
55
+
56
+ # Stage1 - contig1 before the path begins
57
+ to_return = nil
58
+ if anchored_connection.start_probe_contig_offset == 0
59
+ # 0 is a special case because negative 0 doesn't make sense
60
+ to_return = contig1
61
+ else
62
+ to_return = contig1[0...-(anchored_connection.start_probe_contig_offset)]
63
+ end
64
+ log.debug "After first chunk of sequence added, sequence is #{to_return.length}bp long" if log.debug?
65
+
66
+ # Stage2 - path sequence, beginning and ending with
67
+ # beginning and ending probes
68
+ begin
69
+ example_path = anchored_connection.paths[0]
70
+ path_sequence, variants = sequences_to_variants_conservative(
71
+ anchored_connection.paths.collect{|path| path.sequence}
72
+ )
73
+ log.debug "Reference path has a sequence length #{path_sequence.length}" if log.debug?
74
+
75
+ # Find start index
76
+ begin_onode = example_path[0]
77
+ begin_noded_read = anchored_connection.start_probe_noded_read
78
+ raise if begin_noded_read.nil?
79
+ extra_bit_on_start = ''
80
+ if begin_noded_read.start_coord != 0
81
+ log.warn "Unexpectedly the start of the start probe not did not form part of the path, which is a little suspicious"
82
+ extra_bit_on_start = sequences[begin_noded_read.read_id][0...begin_noded_read.start_coord]
83
+ end
84
+ offset_of_begin_probe_on_path = nil
85
+ # xor read direction on node, and node direction on path
86
+ if (begin_noded_read.direction == true) ^ begin_onode.starts_at_start?
87
+ offset_of_begin_probe_on_path = begin_onode.node.corresponding_contig_length - begin_noded_read.offset_from_start_of_node
88
+ # extra bit on read needs to be reverse complemented
89
+ extra_bit_on_start = Bio::Sequence::NA.new(extra_bit_on_start).reverse_complement.to_s.upcase unless extra_bit_on_start == ''
90
+ else
91
+ offset_of_begin_probe_on_path = begin_noded_read.offset_from_start_of_node
92
+ end
93
+
94
+ # Correct variants' positions to be relative to the full contig,
95
+ # not just the path sequence
96
+ variants.each do |variant|
97
+ variant.position = variant.position - offset_of_begin_probe_on_path + to_return.length + 1
98
+ end
99
+
100
+ # Find end index
101
+ end_onode = example_path[-1]
102
+ end_noded_read = anchored_connection.end_probe_noded_read
103
+ raise if end_noded_read.nil?
104
+ extra_bit_on_end = ''
105
+ if end_noded_read.start_coord != 0
106
+ log.warn "Unexpectedly the end of the end probe not did not form part of the path, which is a little suspicious"
107
+ extra_bit_on_end = sequences[end_noded_read.read_id][0...end_noded_read.start_coord]
108
+ end
109
+ # Potentially the example_path has a different length than the reference sequence in bp.
110
+ # Correct this ? Or not a bug? confused. I hate this method. TODO. There is a test for this which is unwritten but it fails
111
+ offset_of_end_node_on_path = example_path[0...-1].reduce(0){|sum, onode| sum += onode.node.length_alone}
112
+ if (end_noded_read.direction == false) ^ end_onode.starts_at_start?
113
+ offset_of_end_node_on_path += end_noded_read.offset_from_start_of_node
114
+ extra_bit_on_end = Bio::Sequence::NA.new(extra_bit_on_end).reverse_complement.to_s.upcase unless extra_bit_on_end == ''
115
+ else
116
+ offset_of_end_node_on_path += end_onode.node.corresponding_contig_length - end_noded_read.offset_from_start_of_node
117
+ end
118
+
119
+ log.debug "Found start index #{offset_of_begin_probe_on_path} and end index #{offset_of_end_node_on_path}" if log.debug?
120
+ to_return += extra_bit_on_start+
121
+ path_sequence[offset_of_begin_probe_on_path...offset_of_end_node_on_path]+
122
+ extra_bit_on_end
123
+ log.debug "After path chunk of sequence added, sequence is #{to_return.length}bp long" if log.debug?
124
+ end #end stage 2
125
+
126
+ # Stage 3
127
+ to_return += contig2[anchored_connection.end_probe_contig_offset..-1]
128
+ log.debug "After last chunk of sequence added, sequence is #{to_return.length}bp long" if log.debug?
129
+
130
+ return to_return, variants
131
+ end
132
+
133
+ # Like ready_two_contigs_and_connections except assumes that there is only a single
134
+ # connection between the two sides
135
+ def one_connection_between_two_contigs(graph, contig1, anchored_connection, contig2, sequences)
136
+ raise "programming error: only one path expected here" if anchored_connection.paths.length > 1
137
+ return ready_two_contigs_and_connections(graph, contig1, anchored_connection, contig2, sequences)[0]
138
+ end
139
+
140
+ private
141
+ # Given an Array of sequences (each representing a path), do a MSA and return as a list of
142
+ # variants from a sequence that is defintely true. A little hard to define.
143
+ def sequences_to_variants_conservative(sequences)
144
+ if sequences.length == 1
145
+ # No variants here
146
+ return sequences[0], []
147
+ end
148
+
149
+ # Do alignment
150
+ # Run multiple sequence alignment of each sequence, with the reference sequence first
151
+ log.debug "Running MSA with #{sequences.length} sequences.." if log.debug?
152
+ original_alignments = clustalo(sequences)
153
+ log.debug "Finished running MSA" if log.debug?
154
+ if log.debug?
155
+ log.debug "Alignment found was:"
156
+ original_alignments.each do |align|
157
+ log.debug align
158
+ end
159
+ end
160
+
161
+ # Work out reference path
162
+ ref = []
163
+ original_alignments[0].split('').each_index do |i|
164
+ base_counts = {}
165
+ original_alignments.each do |aln|
166
+ base = aln[i]
167
+ base_counts[base] ||= 0
168
+ base_counts[base] += 1
169
+ end
170
+
171
+ if base_counts.length == 1
172
+ # where all paths agree, use that base
173
+ ref.push base_counts.keys[0]
174
+ else
175
+ # otherwise use - or N, depending on how many things have a base at each position.
176
+ num_gaps = base_counts['-']
177
+ if num_gaps.nil? or num_gaps < base_counts.values.reduce(:+).to_f / 2
178
+ ref.push 'N'
179
+ else
180
+ ref.push '-'
181
+ end
182
+ end
183
+ end
184
+
185
+ # return reference path, and variants
186
+ reference_sequence = ref.join('')
187
+ return reference_sequence, alignment_to_variants(reference_sequence, original_alignments)
188
+ end
189
+
190
+ # Given a MSA (as a single reference and an array of alternates),
191
+ # return a condensed set of variants
192
+ def alignment_to_variants(reference_alignment, alternate_sequences_alignment)
193
+ return [] if alternate_sequences_alignment.empty?
194
+
195
+ # Collect the variants at each sequence at each column
196
+ variants = [] #Array of empty arrays
197
+ reference_position = 0
198
+ i = 0
199
+ reference_alignment.each_char do |ref_base|
200
+ alternate_sequences_alignment.each_with_index do |alignment, sequence_id|
201
+ nonref = alignment[i]
202
+ if nonref != ref_base
203
+ variant = nil
204
+ if ref_base == '-'
205
+ variant = Variant.new reference_position, nonref, Variant::INSERT
206
+ elsif nonref == '-'
207
+ variant = Variant.new reference_position, 1, Variant::DELETION
208
+ else
209
+ variant = Variant.new reference_position, nonref, Variant::SWAP
210
+ end
211
+ variants[sequence_id] ||= []
212
+ variants[sequence_id].push variant
213
+ end
214
+ end
215
+ reference_position += 1 unless ref_base == '-'
216
+ i += 1
217
+ end
218
+
219
+ # Condense the single column, single species variants into a condensed set
220
+ return condense_variants!(variants)
221
+ end
222
+
223
+ # Sometimes several paths will contain the same variant. Remove these duplications.
224
+ def condense_variants!(variant_array_of_arrays)
225
+ all_variants = {}
226
+
227
+ variant_array_of_arrays.each_with_index do |variant_array, i|
228
+ last_variant = nil
229
+ current_variants = []
230
+ variant_array.each do |variant|
231
+ # Combine last_variant and this one if
232
+ # their positions are consecutive and their types are the same
233
+ if !last_variant.nil? and last_variant.type == variant.type
234
+
235
+ if variant.type == Variant::INSERT and last_variant.position == variant.position
236
+ last_variant.sequence += variant.sequence
237
+
238
+ elsif variant.type == Variant::DELETION and last_variant.position == variant.position - last_variant.deletion_length
239
+ last_variant.deletion_length += 1
240
+
241
+ elsif variant.type == Variant::SWAP and last_variant.position + last_variant.sequence.length == variant.position
242
+ last_variant.sequence += variant.sequence
243
+
244
+ else
245
+ # Start a new variant
246
+ last_variant = variant
247
+ current_variants.push variant
248
+ end
249
+ else
250
+ last_variant = variant
251
+ current_variants.push variant
252
+ end
253
+ end
254
+ if log.debug?
255
+ log.debug "Found #{current_variants.length} variants in sequence #{i}:"
256
+ current_variants.each do |variant|
257
+ log.debug variant.to_shorthand
258
+ end
259
+ end
260
+
261
+ # Multiple paths can have the same variant. Don't duplicate
262
+ current_variants.each do |variant|
263
+ key = [
264
+ variant.position,
265
+ variant.sequence,
266
+ variant.deletion_length,
267
+ variant.type
268
+ ]
269
+ all_variants[key] ||= variant
270
+ end
271
+ end
272
+
273
+ return all_variants.values
274
+ end
275
+
276
+ # # Given an Enumerable of nucleic acid sequences, align them with MAFFT,
277
+ # # and return an Array of the same size as the input
278
+ # def mafft(sequences)
279
+ # i = 0
280
+ # stdin = sequences.collect{|s| i+=1; ">#{i}\n#{s}\n"}.join('')
281
+ # stdout = Bio::Commandeer.run "mafft --retree 1 --quiet --nuc /dev/stdin", {:stdin => stdin, :log => log}
282
+ # to_return = []
283
+ # header = true
284
+ # stdout.each_line do |line|
285
+ # if !header
286
+ # to_return.push line.strip
287
+ # end
288
+ # header = !header
289
+ # end
290
+ # return to_return
291
+ # end
292
+
293
+ def clustalo(sequences)
294
+ i = 0
295
+ stdin = sequences.collect{|s| i+=1; ">#{i}\n#{s}\n"}.join('')
296
+ log.info "Running clustalo with #{sequences.length} sequences, specifically: #{stdin}" #if log.debug?
297
+ stdout = Bio::Commandeer.run "clustalo -t DNA -i - --output-order=input-order", {:stdin => stdin, :log => log}
298
+ to_return = []
299
+ header = true
300
+ Bio::FlatFile.foreach(Bio::FastaFormat, StringIO.new(stdout)) do |seq|
301
+ to_return.push seq.seq.to_s
302
+ end
303
+ return to_return
304
+ end
305
+
306
+
307
+
308
+
309
+ class Variant
310
+ #Types:
311
+ INSERT = :insert
312
+ DELETION = :deletion
313
+ SWAP = :swap #n bases swapped for another n bases
314
+
315
+ attr_accessor :reference_name
316
+
317
+ # 0-based position on the contig
318
+ attr_accessor :position
319
+
320
+ # sequence (or nil if variant is a deletion)
321
+ attr_accessor :sequence
322
+
323
+ # length of deletion (or nil if not a deletion)
324
+ attr_accessor :deletion_length
325
+
326
+ # See constants in this class
327
+ attr_accessor :type
328
+
329
+ def initialize(position=nil, sequence_or_deletion_length=nil, type=nil)
330
+ @position = position
331
+ @type = type
332
+ if type == DELETION
333
+ @deletion_length = sequence_or_deletion_length
334
+ else
335
+ @sequence = sequence_or_deletion_length
336
+ end
337
+ end
338
+
339
+ def base_number
340
+ @position+1
341
+ end
342
+
343
+ def to_shorthand
344
+ if type == DELETION
345
+ "#{base_number}D:#{deletion_length}"
346
+ elsif type == SWAP
347
+ "#{base_number}S:#{sequence.upcase}"
348
+ elsif type == INSERT
349
+ "#{base_number}I:#{sequence.upcase}"
350
+ else
351
+ raise
352
+ end
353
+ end
354
+
355
+ # The reference sequence has been reverse complemented. Fix this
356
+ # variant so it makes sense again (position aside)
357
+ def reverse!
358
+ if type == SWAP or type == INSERT
359
+ @sequence = Bio::Sequence::NA.new(@sequence).reverse_complement.to_s.upcase
360
+ end
361
+ end
362
+
363
+ #CHROM POS ID REF ALT QUAL FILTER INFO
364
+ def vcf_array(reference_sequence)
365
+ bits = [
366
+ @reference_name,
367
+ @position+1,
368
+ '.',
369
+ ]
370
+ case type
371
+ when SWAP then
372
+ bits.push reference_sequence[@position...(@position+@sequence.length) ]
373
+ bits.push @sequence
374
+ when INSERT then
375
+ bits.push '.'
376
+ bits.push @sequence
377
+ when DELETION then
378
+ bits.push reference_sequence[@position...(@position+@deletion_length) ]
379
+ bits.push '.'
380
+ else
381
+ raise
382
+ end
383
+
384
+ bits.push '20'
385
+ bits.push 'PASS'
386
+ bits.push 'finishm'
387
+ return bits
388
+ end
389
+
390
+ def vcf(reference_sequence)
391
+ vcf_array(reference_sequence).join("\t")
392
+ end
393
+ end
394
+
395
+ class PrintableConnection
396
+ attr_accessor :reference_path, :variants
397
+ end
398
+ end
399
+ end
400
+ end