finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,142 @@
1
+ class Bio::FinishM::Sequence
2
+ include Bio::FinishM::Logging
3
+
4
+ class PathSteppingStone
5
+ attr_accessor :node_id, :first_side
6
+ end
7
+
8
+ def add_options(optparse_object, options)
9
+ optparse_object.banner = "\nUsage: finishm sequence --assembly-??? --path PATH
10
+
11
+ Given a series of nodes and orientations, print the DNA sequence of the given path
12
+ \n\n"
13
+
14
+ options.merge!({
15
+ })
16
+
17
+ # Parse a string like '4s,2s,3e' into a programmatic version of a path
18
+ parse_path_string = lambda do |path_string|
19
+ path_string.collect do |str|
20
+ if matches = str.match(/^([01-9]+)([se])$/)
21
+ stone = PathSteppingStone.new
22
+ stone.node_id = matches[1].to_i
23
+ if matches[2] == 's'
24
+ stone.first_side = Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
25
+ elsif matches[2] == 'e'
26
+ stone.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST
27
+ else
28
+ raise "programming error"
29
+ end
30
+ stone
31
+ else
32
+ raise "Unable to parse stepping stone along the path: `#{arg}'. Entire path was `#{arg}'."
33
+ end
34
+ end
35
+ end
36
+
37
+ optparse_object.separator "\nOne of the following path defining arguments must be defined:\n\n"
38
+ optparse_object.on("--path-ids PATH", "A comma separated list of node IDs - the program attempts to determine the orientations automatically") do |arg|
39
+ options[:path_ids] = arg
40
+ end
41
+ optparse_object.on("--path PATH", Array, "A comma separated list of node IDs and orientations - explore from these probe IDs in the graph e.g. '4s,2s,3e' means start at the start of node 4, connecting to the beginning of node 2 and finally the end of probe 3.") do |arg|
42
+ options[:paths] = [parse_path_string.call(arg)]
43
+ end
44
+ optparse_object.on("--paths PATHS", "A colon separated list of comma separated lists of node IDs and orientations - e.g. '4s,2s,3e:532s,465s' means print 2 different paths") do |arg|
45
+ raise "Only one of --path and --paths can be specified" unless options[:paths].nil?
46
+ options[:paths] = []
47
+ arg.split(':').each do |split|
48
+ split.strip!
49
+ next if split == ''
50
+ options[:paths].push parse_path_string.call(split.split(','))
51
+ end
52
+ log.info "Read in #{options[:paths] } path definitions"
53
+ if log.debug?
54
+
55
+ end
56
+ end
57
+
58
+ optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
59
+ Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
60
+
61
+ optparse_object.separator "\nOptional graph-related arguments:\n\n"
62
+ Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
63
+ end
64
+
65
+ def validate_options(options, argv)
66
+ #TODO: give a better description of the error that has occurred
67
+ #TODO: require reads options
68
+ if argv.length != 0
69
+ return "Dangling argument(s) found e.g. #{argv[0] }"
70
+ else
71
+ if options[:path_ids]
72
+ if options[:paths]
73
+ return "Multiple ways to define the path given, one at a time please"
74
+ end
75
+ else
76
+ if options[:paths].nil? or options[:paths].empty?
77
+ return "No path defined, so don't know how to procede through the graph"
78
+ end
79
+ end
80
+
81
+
82
+ # Need reads unless there is already an assembly
83
+ unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
84
+ return Bio::FinishM::ReadInput.new.validate_options(options, [])
85
+ else
86
+ return nil
87
+ end
88
+ end
89
+ end
90
+
91
+ def run(options, argv)
92
+ read_input = Bio::FinishM::ReadInput.new
93
+ read_input.parse_options options
94
+
95
+ # Generate the assembly graph
96
+ log.info "Reading in or generating the assembly graph"
97
+ finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
98
+
99
+ print_trail = lambda do |oriented_trail|
100
+ print '>'
101
+ puts oriented_trail.to_shorthand
102
+ puts oriented_trail.sequence
103
+ end
104
+
105
+ if options[:path_ids]
106
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand(options[:path_ids], finishm_graph.graph)
107
+ print_trail.call trail
108
+
109
+ else
110
+ # Build the oriented node trail
111
+ log.info "Building the trail(s) from the nodes"
112
+ options[:paths].each do |path|
113
+ trail = Bio::Velvet::Graph::OrientedNodeTrail.new
114
+ path.each do |stone|
115
+ log.debug "Adding stone to the trail: #{stone.inspect}"
116
+ node = finishm_graph.graph.nodes[stone.node_id]
117
+ if node.nil?
118
+ raise "Unable to find node ID #{stone.node_id} in the graph, so cannot continue"
119
+ end
120
+
121
+ # check that the path actually connects in the graph, otherwise stop.
122
+ is_neighbour = false
123
+ unless trail.length == 0 #don't worry about the first stepping stone
124
+ trail.neighbours_of_last_node(finishm_graph.graph).each do |oneigh|
125
+ log.debug "Considering neighbour #{oneigh.inspect}"
126
+ is_neighbour = true if oneigh.node == node and oneigh.first_side == stone.first_side
127
+ end
128
+ unless is_neighbour
129
+ raise "In the graph, the node #{trail.last.to_s} does not connect with #{stone.inspect}"
130
+ end
131
+ end
132
+
133
+ # OK, all the checking done. Actually add it to the trail
134
+ trail.add_node node, stone.first_side
135
+ end
136
+
137
+ # Print the sequence
138
+ print_trail.call trail
139
+ end
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,430 @@
1
+ class Bio::FinishM::Visualise
2
+ include Bio::FinishM::Logging
3
+
4
+ DEFAULT_OPTIONS = {
5
+ :min_adjoining_reads => 2,
6
+ :max_adjoining_node_coverage => 300,
7
+ :graph_search_leash_length => 20000,
8
+ :interesting_probes => nil,
9
+ :max_nodes => 50,
10
+ :contig_end_length => 200
11
+ }
12
+
13
+ def add_options(optparse_object, options)
14
+ options.merge! DEFAULT_OPTIONS
15
+ optparse_object.banner = "\nUsage: finishm visualise --assembly-??? <output_visualisation_file>
16
+
17
+ Visualise an assembly graph
18
+ \n\n"
19
+
20
+ optparse_object.separator "Output visualisation formats (one or more of these must be used)"
21
+ add_visualisation_options(optparse_object, options)
22
+
23
+ optparse_object.separator "Input genome information"
24
+ optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
25
+ Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
26
+
27
+ optparse_object.separator "\nOptional graph-exploration arguments:\n\n"
28
+ add_scaffold_options(optparse_object, options)
29
+ add_probe_options(optparse_object, options)
30
+
31
+ optparse_object.separator "\nOptional graph-related arguments:\n\n"
32
+ Bio::FinishM::GraphGenerator.new.add_options(optparse_object, options)
33
+ end
34
+
35
+ def validate_options(options, argv)
36
+ #TODO: give a better description of the error that has occurred
37
+ #TODO: require reads options
38
+ return validate_argv_length(argv) ||
39
+ validate_visualisation_options(options) ||
40
+ validate_probe_options(options) ||
41
+ validate_assembly_options(options)
42
+ end
43
+
44
+ def add_visualisation_options(optparse_object, options)
45
+ optparse_object.on("--assembly-svg PATH", "Output assembly as a SVG file [default: off]") do |arg|
46
+ options[:output_graph_svg] = arg
47
+ end
48
+ optparse_object.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg|
49
+ options[:output_graph_png] = arg
50
+ end
51
+ optparse_object.on("--assembly-dot PATH", "Output assembly as a DOT file [default: off]") do |arg|
52
+ options[:output_graph_dot] = arg
53
+ end
54
+ end
55
+
56
+ def validate_visualisation_options(options)
57
+ if options[:output_graph_png].nil? and options[:output_graph_svg].nil? and options[:output_graph_dot].nil?
58
+ return "No visualisation output format/file given, don't know how to visualise"
59
+ end
60
+ end
61
+
62
+ def add_scaffold_options(optparse_object, options)
63
+ optparse_object.on("--genomes FASTA_1[,FASTA_2...]", Array, "Fasta files of genomes used in the assembly. Required if --scaffolds is given [default: unused]") do |arg|
64
+ options[:assembly_files] = arg
65
+ end
66
+ optparse_object.on("--scaffolds SIDE_1[,SIDE_2...]", Array, "explore from these scaffold ends e.g 'contig1s' for the start of contig1, 'contig1e' for the end of contig1, and 'contig1,contig3e' for both sides of contig1 and the end of contig3 [default: unused]") do |arg|
67
+ options[:scaffold_sides] = arg.collect do |side|
68
+ if side.match(/[se]$/)
69
+ side
70
+ else
71
+ ["#{side}s","#{side}e"]
72
+ end
73
+ end.flatten
74
+ end
75
+ optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length]}]") do |arg|
76
+ options[:contig_end_length] = arg.to_i
77
+ end
78
+ end
79
+
80
+ def validate_scaffold_options(options)
81
+ # If scaffolds are defined, then probe genomes must also be defined
82
+ if options[:scaffolds] and !options[:assembly_files]
83
+ return "If --scaffolds is defined, so then must --genomes"
84
+ end
85
+ end
86
+
87
+ def add_probe_options(optparse_object, options)
88
+ optparse_object.on("--probe-ids PROBE_IDS", Array, "explore from these probe IDs in the graph (comma separated). probe ID is the ID in the velvet Sequence file. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
89
+ options[:interesting_probes] = arg.collect do |read|
90
+ read_id = read.to_i
91
+ if read_id.to_s != read or read_id.nil? or read_id < 1
92
+ raise "Unable to parse probe ID #{read}, from #{arg}, cannot continue"
93
+ end
94
+ read_id
95
+ end
96
+ end
97
+ optparse_object.on("--probe-ids-file PROBE_IDS_FILE", String, "explore from the probe IDs given in the file (1 probe ID per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
98
+ raise "Cannot specify both --probe-ids and --probe-ids-file sorry" if options[:interesting_probes]
99
+ options[:interesting_probes] = []
100
+ log.info "Reading probe IDs from file: `#{arg}'"
101
+ File.foreach(arg) do |line|
102
+ line.strip!
103
+ next if line == '' or line.nil?
104
+ read_id = line.to_i
105
+ if read_id.to_s != line or read_id < 1 or read_id.nil?
106
+ raise "Unable to parse probe ID #{line}, from file #{arg}, cannot continue"
107
+ end
108
+ options[:interesting_probes].push read_id
109
+ end
110
+ log.info "Read #{options[:interesting_probes].length} probes in"
111
+ end
112
+ optparse_object.on("--probe-names-file PROBE_NAMES_FILE", String, "explore from the probe names (i.e. the first word in the fasta/fastq header) given in the file (1 probe name per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
113
+ raise "Cannot specify any two of --probe-names-file, --probe-ids and --probe-ids-file sorry" if options[:interesting_probes]
114
+ options[:interesting_probe_names] = []
115
+ log.info "Reading probe names from file: `#{arg}'"
116
+ File.foreach(arg) do |line|
117
+ line.strip!
118
+ next if line == '' or line.nil?
119
+ options[:interesting_probe_names].push line.split(/\s/)[0]
120
+ end
121
+ log.info "Read #{options[:interesting_probe_names].length} probes names in"
122
+ end
123
+ optparse_object.on("--probe-to-node-map FILE", String, "Output a tab separated file containing the read IDs and their respective node IDs [default: no output]") do |arg|
124
+ options[:probe_to_node_map] = arg
125
+ end
126
+ optparse_object.on("--node-ids NODE_IDS", Array, "explore from these nodes in the graph (comma separated). Node IDs are the nodes in the velvet graph. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
127
+ options[:interesting_nodes] = arg.collect do |read|
128
+ node_id = read.to_i
129
+ if node_id.to_s != read or node_id.nil? or node_id < 1
130
+ raise "Unable to parse node ID #{read}, from #{arg}, cannot continue"
131
+ end
132
+ node_id
133
+ end
134
+ end
135
+ optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: unused unless --probe-ids or --nodes is specified, otherwise #{options[:graph_search_leash_length] }]") do |arg|
136
+ options[:graph_search_leash_length] = arg
137
+ end
138
+ optparse_object.on("--max-nodes NUM", Integer, "Maximum number of nodes to explore out from each probe node, or 0 for no maximum [default: #{options[:max_nodes] }]") do |arg|
139
+ if arg==0
140
+ options[:max_nodes] = nil
141
+ else
142
+ options[:max_nodes] = arg
143
+ end
144
+ end
145
+ end
146
+
147
+ def validate_probe_options(options)
148
+ if options[:interesting_probes] and options[:interesting_nodes]
149
+ return "Can only be interested in probes or nodes, not both, at least currently"
150
+ end
151
+ end
152
+
153
+ def validate_assembly_options(options)
154
+ # Need reads unless there is already an assembly
155
+ unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
156
+ return Bio::FinishM::ReadInput.new.validate_options(options, [])
157
+ end
158
+ end
159
+
160
+ def validate_argv_length(argv)
161
+ if argv.length != 0
162
+ return "Dangling argument(s) found e.g. #{argv[0] }"
163
+ end
164
+ end
165
+
166
+ def run(options, argv)
167
+ read_input = Bio::FinishM::ReadInput.new
168
+ read_input.parse_options options
169
+
170
+ # Generate the assembly graph
171
+ log.info "Reading in or generating the assembly graph"
172
+
173
+ if options[:interesting_probes] or options[:interesting_probe_names]
174
+ finishm_graph, interesting_node_ids = generate_graph_from_probes(read_input, options)
175
+
176
+ if (options[:interesting_probes] or options[:interesting_probe_names]) and options[:probe_to_node_map]
177
+ write_probe_to_node_map(options[:probe_to_node_map], finishm_graph, options[:interesting_probes])
178
+ end
179
+ elsif options[:interesting_nodes]
180
+ finishm_graph = generate_graph_from_nodes(read_input, options)
181
+ interesting_node_ids = options[:interesting_nodes]
182
+ elsif options[:assembly_files]
183
+ finishm_graph, interesting_node_ids, node_id_to_nickname = generate_graph_from_assembly(read_input, options)
184
+ options[:node_id_to_nickname] = node_id_to_nickname
185
+ else
186
+ # Visualising the entire graph
187
+ finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
188
+ end
189
+
190
+
191
+ if options[:graph_search_leash_length] and interesting_node_ids
192
+ #log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] }.."
193
+ nodes_within_leash, node_ids_at_leash = get_nodes_within_leash(finishm_graph, interesting_node_ids, options)
194
+ log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{options[:graph_search_leash_length] }bp leash" if options[:graph_search_leash_length]
195
+
196
+ options.merge!({
197
+ :start_node_ids => interesting_node_ids,
198
+ :nodes => nodes_within_leash,
199
+ :end_node_ids => node_ids_at_leash,
200
+
201
+ })
202
+ else
203
+ options[:nodes] = finishm_graph.graph.nodes
204
+ end
205
+
206
+ # Determine paired-end connections
207
+ log.info "Determining paired-end node connections.."
208
+ paired_end_links = find_paired_end_linkages(finishm_graph, options[:nodes])
209
+ options[:paired_nodes_hash] = paired_end_links
210
+
211
+ create_graphviz_output(finishm_graph, options)
212
+ end
213
+
214
+ def create_graphviz_output(finishm_graph, options)
215
+ log.info "Converting assembly to a graphviz.."
216
+ gv = Bio::Assembly::ABVisualiser.new.graphviz(finishm_graph.graph, {
217
+ :start_node_ids => options[:start_node_ids],
218
+ :nodes => options[:nodes],
219
+ :end_node_ids => options[:end_node_ids],
220
+ :paired_nodes_hash => options[:paired_nodes_hash],
221
+ :node_id_to_nickname => options[:node_id_to_nickname]
222
+ })
223
+
224
+ # Convert gv object to something actually pictorial
225
+ if options[:output_graph_png]
226
+ log.info "Writing PNG #{options[:output_graph_png] }"
227
+ gv.output :png => options[:output_graph_png], :use => :neato
228
+ end
229
+ if options[:output_graph_svg]
230
+ log.info "Writing SVG #{options[:output_graph_svg] }"
231
+ gv.output :svg => options[:output_graph_svg], :use => :neato
232
+ end
233
+ if options[:output_graph_dot]
234
+ log.info "Writing DOT #{options[:output_graph_dot] }"
235
+ gv.output :dot => options[:output_graph_dot], :use => :neato
236
+ end
237
+ end
238
+
239
+ def generate_graph_from_probes(read_input, options)
240
+ # Looking based on probes
241
+ if options[:interesting_probe_names]
242
+ if options[:interesting_probe_names].length > 5
243
+ log.info "Targeting #{options[:interesting_probe_names].length} probes #{options[:interesting_probe_names][0..4].join(', ') }, ..."
244
+ else
245
+ log.info "Targeting #{options[:interesting_probe_names].length} probes #{options[:interesting_probe_names].inspect}"
246
+ end
247
+ options[:probe_read_names] = options[:interesting_probe_names]
248
+ else
249
+ if options[:interesting_probes].length > 5
250
+ log.info "Targeting #{options[:interesting_probes].length} probes #{options[:interesting_probes][0..4].join(', ') }, ..."
251
+ else
252
+ log.info "Targeting #{options[:interesting_probes].length} probes #{options[:interesting_probes].inspect}"
253
+ end
254
+ options[:probe_reads] = options[:interesting_probes]
255
+ end
256
+
257
+ finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
258
+ interesting_node_ids = finishm_graph.probe_nodes.reject{|n| n.nil?}.collect{|node| node.node_id}
259
+
260
+ return finishm_graph, interesting_node_ids
261
+ end
262
+
263
+ def generate_graph_from_nodes(read_input, options)
264
+ # Looking based on nodes
265
+ if options[:interesting_nodes].length > 5
266
+ log.info "Targeting #{options[:interesting_nodes].length} nodes #{options[:interesting_nodes][0..4].join(', ') }, ..."
267
+ else
268
+ log.info "Targeting #{options[:interesting_nodes].length} node(s) #{options[:interesting_nodes].inspect}"
269
+ end
270
+
271
+ finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
272
+ interesting_node_ids = options[:interesting_nodes]
273
+
274
+ return finishm_graph, interesting_node_ids
275
+ end
276
+
277
+ def generate_graph_from_assembly(read_input, options)
278
+ # Parse the genome fasta file in
279
+ genomes = Bio::FinishM::InputGenome.parse_genome_fasta_files(
280
+ options[:assembly_files],
281
+ options[:contig_end_length],
282
+ options
283
+ )
284
+
285
+ # Create hash of contig end name to probe index
286
+ contig_name_to_probe = {}
287
+ genomes.each do |genome|
288
+ genome.scaffolds.each_with_index do |swaff, scaffold_index|
289
+ probes = [
290
+ genome.first_probe(scaffold_index),
291
+ genome.last_probe(scaffold_index)
292
+ ]
293
+ probes.each do |probe|
294
+ key = nil
295
+ if probe.side == :start
296
+ key = "#{probe.contig.scaffold.name}s"
297
+ elsif probe.side == :end
298
+ key = "#{probe.contig.scaffold.name}e"
299
+ else
300
+ raise "Programming error"
301
+ end
302
+
303
+ if contig_name_to_probe.key?(key)
304
+ log.error "Encountered multiple contigs with the same name, this might cause problems, so quitting #{key}"
305
+ end
306
+ contig_name_to_probe[key] = probe.index
307
+ end
308
+ end
309
+ end
310
+
311
+ # Gather a list of probe indexes that are of interest to the user
312
+ interesting_probe_ids = []
313
+ if options[:scaffold_sides]
314
+ # If looking at specified ends
315
+ nodes_to_start_from = options[:scaffold_sides].collect do |side|
316
+ if probe = contig_name_to_probe[side]
317
+ interesting_probe_ids << probe
318
+ else
319
+ raise "Unable to find scaffold side in given genome: #{side}"
320
+ end
321
+ end
322
+ log.info "Found #{interesting_probe_ids.length} scaffold sides in the assembly of interest"
323
+ else
324
+ # else looking at all the contig ends in all the genomes
325
+ interesting_probe_ids = contig_name_to_probe.values
326
+ log.info "Visualising all #{interesting_probe_ids.length} contig ends in all genomes"
327
+ end
328
+
329
+ # Generate the graph
330
+ probe_sequences = genomes.collect{|genome| genome.probe_sequences}.flatten
331
+ finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
332
+
333
+ # Convert probe IDs into node IDs
334
+ interesting_node_ids = interesting_probe_ids.collect do |pid|
335
+ finishm_graph.probe_nodes[pid].node_id
336
+ end.uniq
337
+
338
+ # create a nickname hash, id of node to name. Include all nodes even if they weren't specified directly (they only get visualised if they are within leash length of another)
339
+ node_id_to_nickname = {}
340
+ contig_name_to_probe.each do |name, probe|
341
+ key = finishm_graph.probe_nodes[probe].node_id
342
+ if node_id_to_nickname.key?(key)
343
+ node_id_to_nickname[key] += " "+name
344
+ else
345
+ node_id_to_nickname[key] = name
346
+ end
347
+ end
348
+
349
+ return finishm_graph, interesting_node_ids, node_id_to_nickname
350
+ end
351
+
352
+ def get_nodes_within_leash(finishm_graph, node_ids, options={})
353
+ log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] } with maximum node count #{options[:max_nodes] }.."
354
+ dijkstra = Bio::AssemblyGraphAlgorithms::Dijkstra.new
355
+
356
+ @finder = Bio::FinishM::PairedEndNeighbourFinder.new(finishm_graph, 500) #TODO: this hard-coded 100 isn't great here
357
+ @finder.min_adjoining_reads = options[:min_adjoining_reads]
358
+ @finder.max_adjoining_node_coverage = options[:max_adjoining_node_coverage]
359
+
360
+ nodes_within_leash_hash = dijkstra.min_distances_from_many_nodes_in_both_directions(
361
+ finishm_graph.graph, node_ids.collect{|n| finishm_graph.graph.nodes[n]}, {
362
+ :ignore_directions => true,
363
+ :leash_length => options[:graph_search_leash_length],
364
+ :max_nodes => options[:max_nodes],
365
+ :neighbour_finder => @finder
366
+ })
367
+ nodes_within_leash = nodes_within_leash_hash.keys.collect{|k| finishm_graph.graph.nodes[k[0]]}
368
+ log.info "Found #{nodes_within_leash.collect{|o| o.node_id}.uniq.length} node(s) within the leash length"
369
+
370
+ # These nodes are at the end of the leash - a node is in here iff
371
+ # it has a neighbour that is not in the nodes_within_leash
372
+ node_ids_at_leash = Set.new
373
+ nodes_within_leash_hash.keys.each do |node_and_direction|
374
+ # Add it to the set if 1 or more nieghbours are not in the original set
375
+ node = finishm_graph.graph.nodes[node_and_direction[0]]
376
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node, node_and_direction[1]
377
+ onode.next_neighbours(finishm_graph.graph).each do |oneigh|
378
+ if !nodes_within_leash_hash.key?(oneigh.to_settable)
379
+ node_ids_at_leash << node_and_direction[0]
380
+ break #it only takes one to be listed
381
+ end
382
+ end
383
+ end
384
+
385
+ return nodes_within_leash.uniq, node_ids_at_leash.to_a.uniq
386
+ end
387
+
388
+ def find_paired_end_linkages(finishm_graph, node_array)
389
+ return {} if @finder.nil?
390
+
391
+ paired_end_links = {}
392
+ node_array.each do |node|
393
+ paired_end_links[node.node_id] = []
394
+ [Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
395
+ Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST].each do |direction|
396
+ onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
397
+
398
+ paired_end_links[node.node_id].push @finder.neighbours(onode).collect{|n| n.node.node_id}.uniq
399
+ end
400
+ paired_end_links[node.node_id].flatten!
401
+ end
402
+ return paired_end_links
403
+ end
404
+
405
+ # Write to a file probe_to_node_map_file a map that shows the
406
+ # probe ID, which node that probe is on, and the name of the probe
407
+ def write_probe_to_node_map(probe_to_node_map_file, finishm_graph, names)
408
+ log.info "Writing probe-to-node map to #{x}.."
409
+ File.open(probe_to_node_map_file,'w') do |f|
410
+ f.puts %w(probe_number probe node direction).join("\t")
411
+ finishm_graph.probe_nodes.each_with_index do |node, i|
412
+ if node.nil?
413
+ f.puts [
414
+ i+1,
415
+ names[i],
416
+ '-',
417
+ '-',
418
+ ].join("\t")
419
+ else
420
+ f.puts [
421
+ i+1,
422
+ names[i],
423
+ node.node_id,
424
+ finishm_graph.probe_node_directions[i] == true ? 'forward' : 'reverse',
425
+ ].join("\t")
426
+ end
427
+ end
428
+ end
429
+ end
430
+ end