finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,176 @@
1
+ require 'optparse'
2
+ require 'tempfile'
3
+
4
+ require 'bio-logger'
5
+ require 'bio'
6
+ require 'progressbar'
7
+ require 'bio-ipcress'
8
+ $:.unshift File.join(ENV['HOME'],'git','bioruby-primer3','lib')
9
+ require 'bio-primer3'
10
+
11
+ class Bio::FinishM::Primers::Checker
12
+ include Bio::FinishM::Logging
13
+
14
+ def add_options(optparse_object, options)
15
+ optparse_object.banner = "\nUsage: finishm primers_check --primers <primers_file> [options]
16
+
17
+ Check that each pair of primers in the primers fil is compatible.
18
+ \n\n"
19
+
20
+ options.merge!({
21
+ :logger => 'stderr',
22
+ :logger_trace_level => 'info',
23
+ :melting_temperature_optimum => 56,
24
+ :melting_temperature_tolerance => 2,
25
+ :extra_global_primer3_options => {
26
+ 'PRIMER_MAX_POLY_X' => 4,
27
+ 'PRIMER_EXPLAIN_FLAG' => '1',
28
+ },
29
+ :persevere => false,
30
+ })
31
+
32
+ optparse_object.separator "Required arguments:\n\n"
33
+ optparse_object.on("-p", "--primers PRIMERS_FILE", String, "A file of primers, newline separated [required]") do |arg|
34
+ options[:primers_file] = arg
35
+ end
36
+
37
+ optparse_object.separator "\nOptional arguments:\n\n"
38
+ optparse_object.on("--contig-universe FASTA_FILE", String, "All contigs in the mixture [default: unspecified (don't test this)]") do |arg|
39
+ options[:contig_universe] = arg
40
+ end
41
+ optparse_object.on("--optimum-melting-temperature TEMPERATURE", Integer, "Primers aim for this melting temperature [default: default in primer3 (currently #{options[:melting_temperature_optimum]}C)]") do |arg|
42
+ options[:melting_temperature_optimum] = arg.to_i
43
+ raise Exception, " has to be greater than 0, found #{arg}" unless options[:melting_temperature_optimum] > 0
44
+ end
45
+ optparse_object.on("--primer3-options OPTION_LIST", "Give extra instructions to Primer3 [default <none>]. Acceptable values can be found in the primer3 manual e.g. 'PRIMER_MAX_POLY_X=4;PRIMER_MAX_SIZE=22' will specify those 2 parameters to primer3. Argument names are auto-capitalised so 'primer_max_poly_X=4;primer_max_size=22'is equivalent.") do |arg|
46
+ options[:extra_global_primer3_options] = {}
47
+ arg.split(';').each do |a2|
48
+ splits = a2.split('=')
49
+ unless splits.length == 2
50
+ raise "Unexpected format of the --primer3-options flag, specifically couldn't parse this part: '#{a2}'"
51
+ end
52
+ options[:extra_global_primer3_options][splits[0].upcase]=splits[1]
53
+ end
54
+ end
55
+ end
56
+
57
+ def validate_options(options, argv)
58
+ if argv.length != 0
59
+ return "Dangling argument(s) found e.g. #{argv[0] }"
60
+ else
61
+ [
62
+ :primers_file,
63
+ ].each do |sym|
64
+ if options[sym].nil?
65
+ return "No option found to specify #{sym}."
66
+ end
67
+ end
68
+ return nil
69
+ end
70
+ end
71
+
72
+ def run(options, argv)
73
+ Bio::Log::CLI.configure('bio-primer3')
74
+ found_error = false
75
+
76
+ # Read the primers in
77
+ primers = []
78
+ File.foreach(options[:primers_file]) do |line|
79
+ line.strip!
80
+ next if line.empty?
81
+ unless line.match(/^[atgc]+$/i)
82
+ raise "Malformed primer sequence '#{line}' found in the file - I'm just after one primer per line, that's all."
83
+ end
84
+ primers.push line
85
+ end
86
+ log.info "Read in #{primers.length} primers e.g. #{primers[0] }"
87
+ raise "Need at least 2 primers!" unless primers.length >= 2
88
+
89
+
90
+ primer3_options = options[:extra_global_primer3_options]
91
+ primer3_options.merge!({
92
+ 'PRIMER_OPT_TM' => options[:melting_temperature_optimum],
93
+ 'PRIMER_MIN_TM' => options[:melting_temperature_optimum]-options[:melting_temperature_tolerance],
94
+ 'PRIMER_MAX_TM' => options[:melting_temperature_optimum]+options[:melting_temperature_tolerance],
95
+ })
96
+
97
+ # Check to make sure there is no incompatibilities primer vs primer:
98
+ num_compared = check_primer_compatibilities(primers, primer3_options)
99
+ if num_compared == false
100
+ log.error "Found at least one incompatible primer set, giving up."
101
+ exit 1
102
+ end
103
+ log.info "Validated #{num_compared} different pairs of primers, they don't seem to conflict with each other at all, according to primer3's check primers thing"
104
+
105
+
106
+ # Check the contig universe
107
+ if options[:contig_universe]
108
+ log.info "in-silico PCR: testing the contig universe. This could probably be sped up by only making a single call to ipcress, but oh well."
109
+ num_universe_hits = check_contig_universe(primers, options[:contig_universe])
110
+ if num_universe_hits > 0
111
+ log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
112
+ else
113
+ log.info "No sequence appears to be amplified with any two primers in the background set of contigs (this is a good thing)."
114
+ end
115
+ else
116
+ log.info "Not checking to see if primers match any other contigs not targeted are picked up by these primers, because no universe of contigs was specified."
117
+ end
118
+
119
+
120
+ if found_error
121
+ log.warn "At least one problem detected in the primer checking process"
122
+ else
123
+ log.warn "No primer issues detected"
124
+ end
125
+ end
126
+
127
+
128
+
129
+ # Each pair of primers should be compatible
130
+ def check_primer_compatibilities(primers, primer3_options)
131
+ # First just make sure that everything is ok here
132
+ log.info "Double checking to make sure there is no incompatibilities between primer pairs"
133
+ num_compared = 0
134
+ primers.to_a.combination(2) do |array|
135
+ primer1 = array[0]
136
+ primer2 = array[1]
137
+
138
+ compatible, result = Bio::Primer3.test_primer_compatibility(primer1, primer2, primer3_options, :return_result => true)
139
+ num_compared += 1
140
+ log.debug "Primer3 returned: #{result.inspect}" if log.debug?
141
+
142
+ if compatible == false
143
+ log.warn "Found an incompatibility between #{primer1} and #{primer2}"
144
+ log.warn "Primer3 output was: #{result.inspect}"
145
+ return false
146
+ end
147
+ end
148
+ return num_compared
149
+ end
150
+
151
+
152
+
153
+
154
+ # Does any primer pairs amplify anything in a large set of background (universe) contigs?
155
+ # They shouldn't.
156
+ def check_contig_universe(primers, contig_universe)
157
+ num_hits = 0
158
+ num_compared = 0
159
+
160
+ primers.to_a.combination(2) do |array|
161
+ primer1 = array[0]
162
+ primer2 = array[1]
163
+ num_compared += 1
164
+
165
+ primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
166
+ results = Bio::Ipcress.run primer_set, options[:contig_universe], ipcress_options
167
+
168
+ if results.length > 0
169
+ num_hits += 1
170
+ log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
171
+ end
172
+ end
173
+
174
+ log.debug "Tested #{num_compared} pairs of primers to see if anything else was hit, warnings above if any did so"
175
+ end
176
+ end
@@ -0,0 +1,344 @@
1
+ class Bio::FinishM::RoundUp
2
+ include Bio::FinishM::Logging
3
+
4
+ DEFAULT_OPTIONS = {
5
+ :contig_end_length => 200,
6
+ :graph_search_leash_length => 20000,
7
+ :unscaffold_first => false,
8
+ :recoherence_kmer => 1,
9
+ :debug => false,
10
+ :gapfill_only => false,
11
+ :max_explore_nodes => 10000,
12
+ :max_gapfill_paths => 10,
13
+ }
14
+
15
+ def add_options(optparse_object, options)
16
+ optparse_object.banner = "\nUsage: finishm roundup --genomes <genome1.fasta>[,<genome2.fasta>,...] --fastq-gz <reads..> --output-directory <directory>
17
+
18
+ Takes one or more genomes and tries to improve their quality by reducing the number of
19
+ scaffolds and N characters they contain.
20
+
21
+ Example:
22
+
23
+ finishm roundup --genomes genome1.fasta,genome2.fasta --fastq-gz reads.1.fq.gz,reads.2.fq.gz --output-directory finishm_roundup_results
24
+
25
+ That will create a collapsed de-Bruijn graph from reads.1.fq.gz and reads.2.fq.gz, then try to find connections between
26
+ the starts and the ends of the contigs in genome1.fasta. If any connections between contigs are mutually exclusive,
27
+ then they are incorporated into scaffolds together, and gapfilling is attempted. The final sequences are output in
28
+ the finishm_roundup_results directory in FASTA format. The procedure is then repeated for genome2.fasta.
29
+
30
+ \n\n"
31
+
32
+ options.merge!(DEFAULT_OPTIONS)
33
+
34
+ optparse_object.separator "\nRequired arguments:\n\n"
35
+ optparse_object.on("--genomes FASTA_1[,FASTA_2...]", Array, "fasta files of genomes to be improved [required]") do |arg|
36
+ options[:assembly_files] = arg
37
+ end
38
+ optparse_object.on("--output-directory PATH", "Output results to this directory [required]") do |arg|
39
+ options[:output_directory] = arg
40
+ end
41
+
42
+ optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
43
+ Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
44
+
45
+ optparse_object.separator "\nOptional arguments:\n\n"
46
+ optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length] }]") do |arg|
47
+ options[:contig_end_length] = arg.to_i
48
+ end
49
+ optparse_object.on("--recoherence-kmer NUM", Integer, "Use a kmer longer than the original velvet one, to help remove bubbles and circular paths [default: none]") do |arg|
50
+ options[:recoherence_kmer] = arg
51
+ end
52
+ optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{options[:graph_search_leash_length] }]") do |arg|
53
+ options[:graph_search_leash_length] = arg
54
+ end
55
+ optparse_object.on("--unscaffold-first", "Break the scaffolds in the contigs file apart, and then wander between the resultant contigs. This option is only relevant to the wander step; gapfilling is attempted on all gaps by default. [default: #{options[:unscaffold_first] }]") do
56
+ options[:unscaffold_first] = true
57
+ end
58
+ optparse_object.on("--gapfill-only", "Don't wander, just gapfill [default: #{options[:gapfill_only] }]") do
59
+ options[:gapfill_only] = true
60
+ end
61
+ optparse_object.on("--max-gapfill-paths NUM", Integer, "When this number of paths is exceeded, don't gapfill, print as Ns [default: #{options[:max_gapfill_paths] }]") do |arg|
62
+ options[:max_gapfill_paths] = arg
63
+ end
64
+ optparse_object.on("--max-explore-nodes NUM", Integer, "Only explore this many nodes. If max is reached, do not make connections. [default: #{options[:max_explore_nodes] }]") do |arg|
65
+ options[:max_explore_nodes] = arg
66
+ end
67
+ optparse_object.on("--debug", "Build the graph, then drop to a pry console. [default: #{options[:debug] }]") do
68
+ options[:debug] = true
69
+ end
70
+ optparse_object.on("--probe NUM",Integer,"debug mode: explore from this probe number (1-based index), gapfill only, no wander. [default: explore from all probes}]") do |arg|
71
+ options[:interesting_probes] = [arg-1] #convert to 0-based index
72
+ options[:gapfill_only] = true
73
+ end
74
+ #optparse_object.on("--proceed-on-short-contigs", "By default, when overly short contigs are encountered, finishm croaks. This option stops the croaking [default: #{options[:proceed_on_short_contigs] }]") do
75
+ # options[:proceed_on_short_contigs] = true
76
+ #end
77
+
78
+ Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
79
+ end
80
+
81
+ def validate_options(options, argv)
82
+ #TODO: give a better description of the error that has occurred
83
+ #TODO: require reads options
84
+ if argv.length != 0
85
+ return "Dangling argument(s) found e.g. #{argv[0] }"
86
+ else
87
+ [
88
+ :assembly_files,
89
+ :output_directory,
90
+ ].each do |sym|
91
+ if options[sym].nil?
92
+ return "No option found to specify #{sym}."
93
+ end
94
+ end
95
+
96
+ #if return nil from here, options all were parsed successfully
97
+ return Bio::FinishM::ReadInput.new.validate_options(options, [])
98
+ end
99
+ end
100
+
101
+ def run(options, argv=[])
102
+ # Make sure output directory is writeable to avoid late croaking
103
+ output_directory = setup_output_directory options[:output_directory]
104
+
105
+ # Gather the probes from each genome supplied
106
+ genomes = Bio::FinishM::InputGenome.parse_genome_fasta_files(
107
+ options[:assembly_files],
108
+ options[:contig_end_length],
109
+ options
110
+ )
111
+
112
+ # Generate one velvet assembly to rule them all (forging the assembly is hard work..)
113
+ probe_sequences = genomes.collect{|genome| genome.probe_sequences}.flatten
114
+ # Generate the graph with the probe sequences in it.
115
+ read_input = Bio::FinishM::ReadInput.new
116
+ read_input.parse_options options
117
+ master_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
118
+
119
+ binding.pry if options[:debug]
120
+
121
+ # For each genome, wander, gapfill, then output
122
+ wanderer = Bio::FinishM::Wanderer.new
123
+ gapfiller = Bio::FinishM::GapFiller.new
124
+ printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
125
+ genomes.each do |genome|
126
+ # wander using just the probes on the ends of the scaffolds
127
+ connected_scaffolds = nil
128
+ all_connections = []
129
+ gaps_filled_in_genome = 0
130
+ wandered_probe_indices = nil
131
+
132
+ File.open(File.join(output_directory, File.basename(genome.filename)+".report.txt"),'w') do |report|
133
+ report.puts "#{Time.now} FinishM report for roundup run with: #{options.inspect}"
134
+
135
+ if options[:gapfill_only]
136
+ log.info "Skipping wander, gapfilling only"
137
+ connected_scaffolds = Bio::FinishM::ConnectionInterpreter.new([], (0...genome.scaffolds.length)).scaffolds([])
138
+ else
139
+ log.debug "Wandering.."
140
+ connected_scaffolds, all_connections, wandered_probe_indices = wander_a_genome(wanderer, genome, master_graph, options, report)
141
+ end
142
+
143
+ # Write out all the connections
144
+ File.open(File.join(output_directory, File.basename(genome.filename)+".connections.csv"),'w') do |con_file|
145
+ all_connections.each do |connection|
146
+ con_file.puts connection
147
+ end
148
+ end
149
+
150
+ output_path = File.join(output_directory, File.basename(genome.filename)+".scaffolds.fasta")
151
+ variants_path = File.join(output_directory, File.basename(genome.filename)+".at_least_half_completely_wrong.vcf")
152
+ num_circular_scaffolds = 0
153
+
154
+ File.open(output_path, 'w') do |output_file|
155
+ File.open(variants_path,'w') do |variants_file|
156
+ variants_file.puts %w(#CHROM POS ID REF ALT QUAL FILTER INFO).join("\t")
157
+ # gapfill between
158
+ # (1) interpreted_connections
159
+ # (2) gaps that were present before above wander
160
+ connected_scaffolds.each_with_index do |cross_scaffold_connection, connected_scaffold_index|
161
+ superscaffold_name = "scaffold#{connected_scaffold_index+1}"
162
+
163
+ pretend_contig = cross_scaffold_connection.contigs[0]
164
+ first_scaffold_index = pretend_contig.sequence_index
165
+
166
+ # Gapfill contigs within the scaffold on the extreme LHS
167
+ scaffold_sequence, num_gaps, variants = gapfill_a_scaffold(gapfiller, printer, master_graph, genome, first_scaffold_index, pretend_contig.direction, superscaffold_name, report, variants_file, options)
168
+ gaps_filled_in_genome += num_gaps
169
+
170
+ last_contig = nil
171
+ cross_scaffold_connection.contigs.each_with_index do |contig, superscaffold_index|
172
+ unless last_contig.nil? #skip the first contig - it be done
173
+ last_name = genome.scaffolds[last_contig.sequence_index].name
174
+ current_name = genome.scaffolds[contig.sequence_index].name
175
+ log.debug "Connecting #{last_name} and #{current_name}" if log.debug?
176
+
177
+ # Ready the contig on the RHS of this join
178
+ # Gapfill within the scaffold on the RHS of the new gap
179
+ rhs_sequence, num_gaps, variants = gapfill_a_scaffold(gapfiller, printer, master_graph, genome, contig.sequence_index, contig.direction, superscaffold_name, report, variants_file, options)
180
+ gaps_filled_in_genome += num_gaps
181
+
182
+ # Gapfill across the new gap between scaffolds
183
+ aconn = gapfiller.gapfill(master_graph,
184
+ last_contig.direction == true ? genome.last_probe(last_contig.sequence_index).index : genome.first_probe(last_contig.sequence_index).index,
185
+ contig.direction == true ? genome.first_probe(contig.sequence_index).index : genome.last_probe(contig.sequence_index).index,
186
+ options
187
+ )
188
+ second_sequence = genome.scaffolds[contig.sequence_index].contigs[0].sequence
189
+ log.debug "Found #{aconn.paths.length} connections between #{last_name} and #{current_name}" if log.debug?
190
+ if aconn.paths.length == 0
191
+ # when this occurs, it is due to there being a circuit in the path, so no paths are printed.
192
+ # (at least for now) TODO: this could be improved.
193
+ # Just arbitrarily put in 100 N characters, to denote a join, but no gapfill
194
+ scaffold_sequence = scaffold_sequence+('N'*100)+rhs_sequence
195
+ else
196
+ scaffold_sequence, variants = printer.ready_two_contigs_and_connections(
197
+ master_graph.graph,
198
+ scaffold_sequence,
199
+ aconn,
200
+ rhs_sequence,
201
+ master_graph.velvet_sequences
202
+ )
203
+ # Print variants
204
+ # TODO: need to change coordinates of variants, particularly when >2 contigs are joined?
205
+ variants.each do |variant|
206
+ variant.reference_name = superscaffold_name
207
+ variants_file.puts variant.vcf(scaffold_sequence)
208
+ end
209
+ end
210
+ end
211
+ last_contig = contig
212
+ end
213
+
214
+ #Output the scaffold to the output directory
215
+ descriptor = nil
216
+ if cross_scaffold_connection.circular?
217
+ descriptor = 'circular'
218
+ num_circular_scaffolds += 1
219
+ else
220
+ descriptor = 'scaffold'
221
+ end
222
+ scaffold_names = cross_scaffold_connection.contigs.collect do |contig|
223
+ genome.scaffolds[contig.sequence_index].name
224
+ end
225
+ output_file.puts ">#{superscaffold_name} #{descriptor} #{scaffold_names.join(':') }"
226
+ output_file.puts scaffold_sequence
227
+ end
228
+
229
+ num_connected_scaffolds = genome.scaffolds.length - connected_scaffolds.length
230
+ log.info "Wrote #{connected_scaffolds.length} scaffolds to #{output_path}, after scaffolding #{num_connected_scaffolds} scaffolds together (#{num_circular_scaffolds} were circular) and filling #{gaps_filled_in_genome} gaps."
231
+ end
232
+ end
233
+ end
234
+ end
235
+ end
236
+
237
+ def setup_output_directory(given_directory)
238
+ output_directory = File.absolute_path(given_directory)
239
+ log.debug "Using output directory: #{output_directory}" if log.debug?
240
+
241
+ if File.exist?(output_directory)
242
+ if !File.directory?(output_directory)
243
+ log.error "Specified --output-directory #{output_directory} exists but is a file and not a directory. Cannot continue."
244
+ exit 1
245
+ elsif !File.writable?(output_directory)
246
+ log.error "Specified --output-directory #{output_directory} is not writeable. Cannot continue."
247
+ exit 1
248
+ else
249
+ log.debug "Already existing output directory #{output_directory} seems usable"
250
+ end
251
+ else
252
+ # Creating a new output directory
253
+ Dir.mkdir(output_directory)
254
+ end
255
+
256
+ return output_directory
257
+ end
258
+
259
+ def wander_a_genome(wanderer, genome, master_probed_graph, options, report)
260
+ # Create new finishm_graph with only probes from the ends of the scaffolds of this genome
261
+ probe_indices = []
262
+ genome.each_scaffold_end_numbered_probe{|probe| probe_indices.push(probe.number)}
263
+ genome_graph = master_probed_graph.subgraph(probe_indices)
264
+
265
+ num_scaffolds = genome.scaffolds.length
266
+
267
+ all_connections = wanderer.probed_graph_to_connections(genome_graph, options)
268
+
269
+ interpreter = Bio::FinishM::ConnectionInterpreter.new(all_connections, (0...num_scaffolds))
270
+ connections = interpreter.doubly_single_contig_connections
271
+ report.puts "Found #{connections.length} connections between contigs that can be used for scaffolding"
272
+ unconnected_probes = interpreter.unconnected_probes
273
+ report.puts "Found #{unconnected_probes.length} contig ends that did not connect to any others"
274
+ unconnected_probes.each do |probe|
275
+ report.puts "Did not connect to any other probes: #{probe.inspect}"
276
+ end
277
+
278
+ return interpreter.scaffolds(connections), all_connections, probe_indices
279
+ end
280
+
281
+ def gapfill_a_scaffold(gapfiller, printer, master_graph, genome, scaffold_index, scaffold_direction, superscaffold_name, report, variants_file, options)
282
+ connections = []
283
+ genome.each_gap_probe_pair(scaffold_index) do |probe1, probe2|
284
+ log.info "Gapfilling between probes #{probe1.number+1} and #{probe2.number+1}.."
285
+ next unless options[:interesting_probes].nil? or
286
+ options[:interesting_probes].include?(probe1.number) or
287
+ options[:interesting_probes].include?(probe2.number)
288
+ connections.push gapfiller.gapfill(master_graph, probe1.index, probe2.index, options)
289
+ end
290
+ log.debug "Found #{connections.length} connections" if log.debug?
291
+
292
+ all_variants = []
293
+ num_gapfills = 0
294
+ scaffold = genome.scaffolds[scaffold_index]
295
+ gapfilled_sequence = genome.scaffolds[scaffold_index].contigs[0].sequence
296
+ connections.each_with_index do |aconn, i|
297
+ rhs_sequence = scaffold.contigs[i+1].sequence
298
+ gapfilled_sequence, variants, gapfilled = piece_together_gapfill(
299
+ printer, master_graph, gapfilled_sequence, aconn, rhs_sequence, genome.gap_length(scaffold_index, i),
300
+ options[:max_gapfill_paths]
301
+ )
302
+ if gapfilled
303
+ num_gapfills += 1
304
+ variants.each{|v| all_variants << v}
305
+ to_log = "Filled a gap on genome #{genome.filename}: scaffold #{scaffold.name}: #{scaffold.contigs[i].scaffold_position_end+1}-#{scaffold.contigs[i+1].scaffold_position_start-1}"
306
+ report.puts to_log
307
+ log.info to_log
308
+ end
309
+ end
310
+ if scaffold_direction == false
311
+ gapfilled_sequence = revcom(gapfilled_sequence)
312
+ all_variants.each do |variant|
313
+ variant.position = gapfilled_sequence.length - variant.position
314
+ variant.reverse!
315
+ end
316
+ end
317
+ all_variants.each do |variant|
318
+ variant.reference_name = superscaffold_name
319
+ variants_file.puts variant.vcf(gapfilled_sequence)
320
+ end
321
+ return gapfilled_sequence, num_gapfills, all_variants
322
+ end
323
+
324
+ def piece_together_gapfill(printer, master_graph, first_sequence, aconn, second_sequence, gap_length, max_gapfill_paths)
325
+ scaffold_sequence = nil
326
+ gapfilled = -1
327
+ if aconn.paths.length == 0 or aconn.paths.length > max_gapfill_paths
328
+ # No paths found. Just fill with Ns like it was before
329
+ scaffold_sequence = first_sequence + 'N'*gap_length + second_sequence
330
+ gapfilled = false
331
+ else
332
+ scaffold_sequence, variants = printer.ready_two_contigs_and_connections(
333
+ master_graph.graph, first_sequence, aconn, second_sequence, master_graph.velvet_sequences
334
+ )
335
+ gapfilled = true
336
+ end
337
+ scaffold_sequence.gsub!('-','') #remove gaps i.e. where the consensus is a gap
338
+ return scaffold_sequence, variants, gapfilled
339
+ end
340
+
341
+ def revcom(seq)
342
+ Bio::Sequence::NA.new(seq).reverse_complement.to_s.upcase
343
+ end
344
+ end