bio-velvet_underground 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (286) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +13 -0
  5. data/Gemfile +19 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +53 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/ext/bioruby.patch +60 -0
  11. data/ext/mkrf_conf.rb +50 -0
  12. data/ext/src/Makefile +125 -0
  13. data/ext/src/src/allocArray.c +305 -0
  14. data/ext/src/src/allocArray.h +86 -0
  15. data/ext/src/src/autoOpen.c +107 -0
  16. data/ext/src/src/autoOpen.h +18 -0
  17. data/ext/src/src/binarySequences.c +813 -0
  18. data/ext/src/src/binarySequences.h +125 -0
  19. data/ext/src/src/concatenatedGraph.c +233 -0
  20. data/ext/src/src/concatenatedGraph.h +30 -0
  21. data/ext/src/src/concatenatedPreGraph.c +262 -0
  22. data/ext/src/src/concatenatedPreGraph.h +29 -0
  23. data/ext/src/src/correctedGraph.c +2642 -0
  24. data/ext/src/src/correctedGraph.h +32 -0
  25. data/ext/src/src/dfib.c +509 -0
  26. data/ext/src/src/dfib.h +69 -0
  27. data/ext/src/src/dfibHeap.c +89 -0
  28. data/ext/src/src/dfibHeap.h +39 -0
  29. data/ext/src/src/dfibpriv.h +105 -0
  30. data/ext/src/src/fib.c +628 -0
  31. data/ext/src/src/fib.h +78 -0
  32. data/ext/src/src/fibHeap.c +79 -0
  33. data/ext/src/src/fibHeap.h +41 -0
  34. data/ext/src/src/fibpriv.h +110 -0
  35. data/ext/src/src/globals.h +153 -0
  36. data/ext/src/src/graph.c +3983 -0
  37. data/ext/src/src/graph.h +233 -0
  38. data/ext/src/src/graphReConstruction.c +1472 -0
  39. data/ext/src/src/graphReConstruction.h +30 -0
  40. data/ext/src/src/graphStats.c +2167 -0
  41. data/ext/src/src/graphStats.h +72 -0
  42. data/ext/src/src/kmer.c +652 -0
  43. data/ext/src/src/kmer.h +73 -0
  44. data/ext/src/src/kmerOccurenceTable.c +236 -0
  45. data/ext/src/src/kmerOccurenceTable.h +44 -0
  46. data/ext/src/src/kseq.h +223 -0
  47. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  48. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  49. data/ext/src/src/passageMarker.c +677 -0
  50. data/ext/src/src/passageMarker.h +137 -0
  51. data/ext/src/src/preGraph.c +1717 -0
  52. data/ext/src/src/preGraph.h +106 -0
  53. data/ext/src/src/preGraphConstruction.c +990 -0
  54. data/ext/src/src/preGraphConstruction.h +26 -0
  55. data/ext/src/src/readCoherentGraph.c +557 -0
  56. data/ext/src/src/readCoherentGraph.h +30 -0
  57. data/ext/src/src/readSet.c +1734 -0
  58. data/ext/src/src/readSet.h +67 -0
  59. data/ext/src/src/recycleBin.c +199 -0
  60. data/ext/src/src/recycleBin.h +58 -0
  61. data/ext/src/src/roadMap.c +342 -0
  62. data/ext/src/src/roadMap.h +65 -0
  63. data/ext/src/src/run.c +318 -0
  64. data/ext/src/src/run.h +52 -0
  65. data/ext/src/src/run2.c +712 -0
  66. data/ext/src/src/scaffold.c +1876 -0
  67. data/ext/src/src/scaffold.h +64 -0
  68. data/ext/src/src/shortReadPairs.c +1243 -0
  69. data/ext/src/src/shortReadPairs.h +32 -0
  70. data/ext/src/src/splay.c +259 -0
  71. data/ext/src/src/splay.h +43 -0
  72. data/ext/src/src/splayTable.c +1315 -0
  73. data/ext/src/src/splayTable.h +31 -0
  74. data/ext/src/src/tightString.c +362 -0
  75. data/ext/src/src/tightString.h +82 -0
  76. data/ext/src/src/utility.c +199 -0
  77. data/ext/src/src/utility.h +98 -0
  78. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  79. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  80. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  81. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  82. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  83. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  84. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  85. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  86. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  87. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  88. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  89. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  90. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  91. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  92. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  93. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  94. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  95. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  96. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  97. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  98. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  99. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  100. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  101. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  102. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  103. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  104. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  105. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  106. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  107. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  108. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  109. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  110. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  111. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  112. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  113. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  114. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  115. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  116. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  117. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  118. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  119. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  120. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  218. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  219. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  220. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  221. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  222. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  223. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  224. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  225. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  226. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  227. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  228. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  229. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  230. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  231. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  232. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  233. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  234. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  235. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  236. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  237. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  238. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  239. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  240. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  241. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  242. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  243. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  244. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  245. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  246. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  247. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  248. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  249. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  250. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  251. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  252. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  253. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  254. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  255. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  256. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  257. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  258. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  259. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  260. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  261. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  262. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  263. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  264. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  265. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  266. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  267. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  268. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  269. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  270. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  271. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  272. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  273. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  274. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  275. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  276. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  277. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  278. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  279. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  280. data/lib/bio-velvet_underground.rb +12 -0
  281. data/lib/bio-velvet_underground/external/VERSION +1 -0
  282. data/lib/bio-velvet_underground/velvet_underground.rb +72 -0
  283. data/spec/binary_sequence_store_spec.rb +27 -0
  284. data/spec/data/1/CnyUnifiedSeq +0 -0
  285. data/spec/spec_helper.rb +31 -0
  286. metadata +456 -0
@@ -0,0 +1,65 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _ROADMAP_H_
22
+ #define _ROADMAP_H_
23
+
24
+ #include <stdio.h>
25
+
26
+ struct roadMapArray_st {
27
+ RoadMap *array;
28
+ Annotation *annotations;
29
+ IDnum length;
30
+ int WORDLENGTH;
31
+ boolean double_strand;
32
+ IDnum referenceCount;
33
+ };
34
+
35
+ ////////////////////////////////////////////////////////////////////
36
+ // Annotation stuff
37
+ ////////////////////////////////////////////////////////////////////
38
+ IDnum getAnnotSequenceID(Annotation * annot);
39
+ Coordinate getFinish(Annotation * annot);
40
+ Coordinate getStart(Annotation * annot);
41
+ Coordinate getPosition(Annotation * annot);
42
+ Coordinate getAnnotationLength(Annotation * annot);
43
+ void incrementAnnotationCoordinates(Annotation * annot);
44
+
45
+ void setStartID(Annotation * annot, IDnum nodeID);
46
+ IDnum getStartID(Annotation * annot);
47
+ void setFinishID(Annotation * annot, IDnum nodeID);
48
+ IDnum getFinishID(Annotation * annot);
49
+
50
+ Annotation *getNextAnnotation(Annotation * annot);
51
+ Annotation *getAnnotationInArray(Annotation * annot, Coordinate index);
52
+
53
+ ////////////////////////////////////////////////////////////////////
54
+ // RoadMap stuff
55
+ ////////////////////////////////////////////////////////////////////
56
+
57
+ IDnum getAnnotationCount(RoadMap * rdmap);
58
+
59
+ RoadMap *getRoadMapInArray(RoadMapArray * array, IDnum index);
60
+
61
+ // Same thing but for the RoadMap file generated by the hash
62
+ RoadMapArray *importRoadMapArray(char *filename);
63
+ RoadMapArray *importReferenceRoadMapArray(char * filename);
64
+ void destroyRoadMapArray(RoadMapArray * rdmap);
65
+ #endif
data/ext/src/src/run.c ADDED
@@ -0,0 +1,318 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <sys/stat.h>
24
+ #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
25
+ #include <uce-dirent.h>
26
+ #define Arc v_Arc
27
+ #else
28
+ #include <dirent.h>
29
+ #endif
30
+
31
+ #include "run.h"
32
+
33
+ static void printUsage()
34
+ {
35
+ puts("Usage:");
36
+ puts("./velveth directory hash_length {[-file_format][-read_type][-separate|-interleaved] filename1 [filename2 ...]} {...} [options]");
37
+ puts("");
38
+ puts("\tdirectory\t: directory name for output files");
39
+ printf("\thash_length\t: EITHER an odd integer (if even, it will be decremented) <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
40
+ printf("\t\t\t: OR: m,M,s where m and M are odd integers (if not, they will be decremented) with m < M <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
41
+ puts("\t\t\t\tand s is a step (even number). Velvet will then hash from k=m to k=M with a step of s");
42
+ puts("\tfilename\t: path to sequence file or - for standard input");
43
+ puts("");
44
+ puts("File format options:");
45
+ puts("\t-fasta\t-fastq\t-raw\t-fasta.gz\t-fastq.gz\t-raw.gz\t-sam\t-bam\t-fmtAuto");
46
+ puts("\t(Note: -fmtAuto will detect fasta or fastq, and will try the following programs for decompression : gunzip, pbunzip2, bunzip2");
47
+ puts("");
48
+ puts("File layout options for paired reads (only for fasta and fastq formats):");
49
+ puts("\t-interleaved\t: File contains paired reads interleaved in the one file (default)");
50
+ puts("\t-separate\t: Read 2 separate files for paired reads");
51
+ puts("");
52
+ puts("Read type options:");
53
+ puts("\t-short\t-shortPaired");
54
+ #if CATEGORIES <= 5
55
+ Category cat;
56
+ for (cat = 2; cat <= CATEGORIES; cat++)
57
+ printf("\t-short%i\t-shortPaired%i\n", cat, cat);
58
+ #else
59
+ puts("\t...");
60
+ printf("\t-short%i\t-shortPaired%i\n", CATEGORIES - 1, CATEGORIES - 1);
61
+ printf("\t-short%i\t-shortPaired%i\n", CATEGORIES, CATEGORIES);
62
+ #endif
63
+ puts("\t-long\t-longPaired");
64
+ puts("\t-reference");
65
+ puts("");
66
+ puts("Options:");
67
+ puts("\t-strand_specific\t: for strand specific transcriptome sequencing data (default: off)");
68
+ puts("\t-reuse_Sequences\t: reuse Sequences file (or link) already in directory (no need to provide original filenames in this case (default: off)");
69
+ puts("\t-reuse_binary\t: reuse binary sequences file (or link) already in directory (no need to provide original filenames in this case (default: off)");
70
+ puts("\t-noHash\t\t\t: simply prepare Sequences file, do not hash reads or prepare Roadmaps file (default: off)");
71
+ puts("\t-create_binary \t: create binary CnyUnifiedSeq file (default: off)");
72
+ puts("");
73
+ puts("Synopsis:");
74
+ puts("");
75
+ puts("- Short single end reads:");
76
+ puts("\tvelveth Assem 29 -short -fastq s_1_sequence.txt");
77
+ puts("");
78
+ puts("- Paired-end short reads (remember to interleave paired reads):");
79
+ puts("\tvelveth Assem 31 -shortPaired -fasta interleaved.fna");
80
+ puts("");
81
+ puts("- Paired-end short reads (using separate files for the paired reads)");
82
+ puts("\tvelveth Assem 31 -shortPaired -fasta -separate left.fa right.fa");
83
+ puts("");
84
+ puts("- Two channels and some long reads:");
85
+ puts("\tvelveth Assem 43 -short -fastq unmapped.fna -longPaired -fasta SangerReads.fasta");
86
+ puts("");
87
+ puts("- Three channels:");
88
+ puts("\tvelveth Assem 35 -shortPaired -fasta pe_lib1.fasta -shortPaired2 pe_lib2.fasta -short3 se_lib1.fa");
89
+ puts("");
90
+ puts("Output:");
91
+ puts("\tdirectory/Roadmaps");
92
+ puts("\tdirectory/Sequences");
93
+ puts("\t\t[Both files are picked up by graph, so please leave them there]");
94
+ }
95
+
96
+ int main(int argc, char **argv)
97
+ {
98
+ ReadSet *allSequences = NULL;
99
+ SplayTable *splayTable;
100
+ int hashLength, hashLengthStep, hashLengthMax, h;
101
+ char *directory, *filename, *seqFilename, *baseSeqName, *buf;
102
+ char * token;
103
+ boolean double_strand = true;
104
+ boolean noHash = false;
105
+ boolean multiple_kmers = false;
106
+ char buffer[100];
107
+ DIR *dir;
108
+
109
+ setProgramName("velveth");
110
+
111
+ if (argc < 4) {
112
+ printf("velveth - simple hashing program\n");
113
+ printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
114
+ RELEASE_NUMBER, UPDATE_NUMBER);
115
+ printf("\nCopyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)\n");
116
+ printf("This is free software; see the source for copying conditions. There is NO\n");
117
+ printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n");
118
+ printf("Compilation settings:\n");
119
+ printf("CATEGORIES = %i\n", CATEGORIES);
120
+ printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
121
+ #ifdef _OPENMP
122
+ puts("OPENMP");
123
+ #endif
124
+ #ifdef LONGSEQUENCES
125
+ puts("LONGSEQUENCES");
126
+ #endif
127
+ #ifdef BIGASSEMBLY
128
+ puts("BIGASSEMBLY");
129
+ #endif
130
+ #ifdef COLOR
131
+ puts("COLOR");
132
+ #endif
133
+ #ifdef DEBUG
134
+ puts("DEBUG");
135
+ #endif
136
+ printf("\n");
137
+ printUsage();
138
+ return 0;
139
+ }
140
+
141
+ strcpy(buffer, argv[2]);
142
+ token = strtok(buffer, ",");
143
+ hashLength = atoi(token);
144
+ token = strtok(NULL, ",");
145
+ if (token == NULL) {
146
+ multiple_kmers = false;
147
+ hashLengthMax = hashLength + 1;
148
+ } else {
149
+ multiple_kmers = true;
150
+ hashLengthMax = atoi(token);
151
+ }
152
+ token = strtok(NULL, ",");
153
+ if (token == NULL) {
154
+ hashLengthStep = 2;
155
+ } else {
156
+ hashLengthStep = atoi(token);
157
+ }
158
+
159
+ if (hashLength > MAXKMERLENGTH) {
160
+ velvetLog
161
+ ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
162
+ hashLength, MAXKMERLENGTH);
163
+ hashLength = MAXKMERLENGTH;
164
+ }
165
+ if (hashLength <= 0) {
166
+ velvetLog("Invalid hash length: %s\n", argv[2]);
167
+ printUsage();
168
+ return 0;
169
+ }
170
+ if (hashLength % 2 == 0) {
171
+ velvetLog
172
+ ("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
173
+ hashLength, hashLength - 1);
174
+ hashLength--;
175
+ }
176
+
177
+ if (multiple_kmers) {
178
+ if (hashLengthMax > MAXKMERLENGTH + 1) {
179
+ velvetLog
180
+ ("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
181
+ hashLengthMax, MAXKMERLENGTH + 1);
182
+ hashLengthMax = MAXKMERLENGTH + 1;
183
+ }
184
+ if (hashLengthMax <= hashLength) {
185
+ velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
186
+ printUsage();
187
+ return 0;
188
+ }
189
+
190
+ if (hashLengthStep <= 0) {
191
+ velvetLog("Non-positive hash length! Setting it to 2\n");
192
+ hashLengthStep = 2;
193
+ }
194
+ if (hashLengthStep % 2 == 1) {
195
+ velvetLog
196
+ ("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n",
197
+ hashLengthStep, hashLengthStep + 1);
198
+ hashLengthStep++;
199
+ }
200
+ }
201
+
202
+ // check if binary sequences should be used
203
+ int argIndex;
204
+ for (argIndex = 3; argIndex < argc; argIndex++)
205
+ if (strcmp(argv[argIndex], "-create_binary") == 0 || strcmp(argv[argIndex], "-reuse_binary") == 0)
206
+ setCreateBinary(true);
207
+
208
+ for (h = hashLength; h < hashLengthMax; h += hashLengthStep) {
209
+
210
+ resetWordFilter(h);
211
+
212
+ buf = mallocOrExit(2 * strlen(argv[1]) + 500, char);
213
+
214
+ if ( multiple_kmers ) {
215
+ sprintf(buf,"%s_%d",argv[1],h);
216
+ directory = mallocOrExit(strlen(buf) + 100, char);
217
+ strcpy(directory,buf);
218
+ } else
219
+ directory = argv[1];
220
+
221
+ filename = mallocOrExit(strlen(directory) + 100, char);
222
+ seqFilename = mallocOrExit(strlen(directory) + 100, char);
223
+ baseSeqName = mallocOrExit(100, char);
224
+
225
+ dir = opendir(directory);
226
+
227
+ if (dir == NULL)
228
+ mkdir(directory, 0777);
229
+ else {
230
+ sprintf(buf, "%s/PreGraph", directory);
231
+ remove(buf);
232
+ sprintf(buf, "%s/Graph", directory);
233
+ remove(buf);
234
+ sprintf(buf, "%s/Graph2", directory);
235
+ remove(buf);
236
+ sprintf(buf, "%s/Graph3", directory);
237
+ remove(buf);
238
+ sprintf(buf, "%s/Graph4", directory);
239
+ remove(buf);
240
+ }
241
+
242
+ logInstructions(argc, argv, directory);
243
+
244
+ strcpy(seqFilename, directory);
245
+ if (isCreateBinary()) {
246
+ // use the CNY unified seq writer
247
+ strcpy(baseSeqName, "/CnyUnifiedSeq");
248
+ // remove other style sequences file
249
+ sprintf(buf, "%s/Sequences", directory);
250
+ remove(buf);
251
+ } else {
252
+ strcpy(baseSeqName, "/Sequences");
253
+ // remove other style sequences file
254
+ sprintf(buf, "%s/CnyUnifiedSeq", directory);
255
+ remove(buf);
256
+ sprintf(buf, "%s/CnyUnifiedSeq.names", directory);
257
+ remove(buf);
258
+ }
259
+ strcat(seqFilename, baseSeqName);
260
+
261
+ if ( h == hashLength ) {
262
+ parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand, &noHash);
263
+ } else {
264
+ sprintf(buf,"rm -f %s",seqFilename);
265
+ if (system(buf)) {
266
+ velvetLog("Command failed!\n");
267
+ velvetLog("%s\n", buf);
268
+ #ifdef DEBUG
269
+ abort();
270
+ #endif
271
+ exit(1);
272
+ }
273
+ if (argv[1][0] == '/')
274
+ sprintf(buf,"ln -s %s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
275
+ else
276
+ sprintf(buf,"ln -s `pwd`/%s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
277
+ if (system(buf)) {
278
+ velvetLog("Command failed!\n");
279
+ velvetLog("%s\n", buf);
280
+ #ifdef DEBUG
281
+ abort();
282
+ #endif
283
+ exit(1);
284
+ }
285
+ }
286
+
287
+ if (noHash)
288
+ continue;
289
+
290
+ splayTable = newSplayTable(h, double_strand);
291
+ if (isCreateBinary()) {
292
+ allSequences = importCnyReadSet(seqFilename);
293
+ } else {
294
+ allSequences = importReadSet(seqFilename);
295
+ }
296
+ velvetLog("%li sequences in total.\n", (long) allSequences->readCount);
297
+
298
+ strcpy(filename, directory);
299
+ strcat(filename, "/Roadmaps");
300
+ inputSequenceArrayIntoSplayTableAndArchive(allSequences,
301
+ splayTable, filename, seqFilename);
302
+
303
+ destroySplayTable(splayTable);
304
+ if (dir)
305
+ closedir(dir);
306
+ if (directory != argv[1])
307
+ free(directory);
308
+ free(filename);
309
+ free(seqFilename);
310
+ free(baseSeqName);
311
+ free(buf);
312
+ if (allSequences) {
313
+ destroyReadSet(allSequences);
314
+ }
315
+ }
316
+
317
+ return 0;
318
+ }
data/ext/src/src/run.h ADDED
@@ -0,0 +1,52 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ // Compilation
22
+ #include "globals.h"
23
+
24
+ // Utilities
25
+ #include "graphStats.h"
26
+ #include "utility.h"
27
+
28
+ // Datastructures
29
+ #include "kmer.h"
30
+ #include "readSet.h"
31
+ #include "tightString.h"
32
+ #include "roadMap.h"
33
+ #include "splayTable.h"
34
+ #include "graph.h"
35
+ #include "scaffold.h"
36
+ #include "binarySequences.h"
37
+
38
+ // PreGraph operations
39
+ #include "preGraph.h"
40
+ #include "preGraphConstruction.h"
41
+ #include "concatenatedPreGraph.h"
42
+
43
+ // Graph operations
44
+ #include "graph.h"
45
+ #include "graphReConstruction.h"
46
+ #include "concatenatedGraph.h"
47
+ #include "correctedGraph.h"
48
+ #include "locallyCorrectedGraph.h"
49
+
50
+ // Repeat resolution
51
+ #include "readCoherentGraph.h"
52
+ #include "shortReadPairs.h"
@@ -0,0 +1,712 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <unistd.h>
25
+
26
+ #include "run.h"
27
+
28
+ #include "binarySequences.h"
29
+ #include "globals.h"
30
+
31
+ static void printUsage()
32
+ {
33
+ puts("Usage:");
34
+ puts("./velvetg directory [options]");
35
+ puts("");
36
+ puts("\tdirectory\t\t\t: working directory name");
37
+ puts("");
38
+ puts("Standard options:");
39
+ puts("\t-cov_cutoff <floating-point|auto>\t: removal of low coverage nodes AFTER tour bus or allow the system to infer it");
40
+ puts("\t\t(default: no removal)");
41
+ puts("\t-ins_length <integer>\t\t: expected distance between two paired end reads (default: no read pairing)");
42
+ puts("\t-read_trkg <yes|no>\t\t: tracking of short read positions in assembly (default: no tracking)");
43
+ puts("\t-min_contig_lgth <integer>\t: minimum contig length exported to contigs.fa file (default: hash length * 2)");
44
+ puts("\t-amos_file <yes|no>\t\t: export assembly to AMOS file (default: no export)");
45
+ puts("\t-exp_cov <floating point|auto>\t: expected coverage of unique regions or allow the system to infer it");
46
+ puts("\t\t(default: no long or paired-end read resolution)");
47
+ puts("\t-long_cov_cutoff <floating-point>: removal of nodes with low long-read coverage AFTER tour bus");
48
+ puts("\t\t(default: no removal)");
49
+ puts("");
50
+ puts("Advanced options:");
51
+ puts("\t-ins_length* <integer>\t\t: expected distance between two paired-end reads in the respective short-read dataset (default: no read pairing)");
52
+ puts("\t-ins_length_long <integer>\t: expected distance between two long paired-end reads (default: no read pairing)");
53
+ puts("\t-ins_length*_sd <integer>\t: est. standard deviation of respective dataset (default: 10% of corresponding length)");
54
+ puts("\t\t[replace '*' by nothing, '2' or '_long' as necessary]");
55
+ puts("\t-scaffolding <yes|no>\t\t: scaffolding of contigs used paired end information (default: on)");
56
+ puts("\t-max_branch_length <integer>\t: maximum length in base pair of bubble (default: 100)");
57
+ puts("\t-max_divergence <floating-point>: maximum divergence rate between two branches in a bubble (default: 0.2)");
58
+ puts("\t-max_gap_count <integer>\t: maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)");
59
+ puts("\t-min_pair_count <integer>\t: minimum number of paired end connections to justify the scaffolding of two long contigs (default: 5)");
60
+ puts("\t-max_coverage <floating point>\t: removal of high coverage nodes AFTER tour bus (default: no removal)");
61
+ puts("\t-coverage_mask <int>\t: minimum coverage required for confident regions of contigs (default: 1)");
62
+ puts("\t-long_mult_cutoff <int>\t\t: minimum number of long reads required to merge contigs (default: 2)");
63
+ puts("\t-unused_reads <yes|no>\t\t: export unused reads in UnusedReads.fa file (default: no)");
64
+ puts("\t-alignments <yes|no>\t\t: export a summary of contig alignment to the reference sequences (default: no)");
65
+ puts("\t-exportFiltered <yes|no>\t: export the long nodes which were eliminated by the coverage filters (default: no)");
66
+ puts("\t-clean <yes|no>\t\t\t: remove all the intermediary files which are useless for recalculation (default : no)");
67
+ puts("\t-very_clean <yes|no>\t\t: remove all the intermediary files (no recalculation possible) (default: no)");
68
+ puts("\t-paired_exp_fraction <double>\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)");
69
+ puts("\t-shortMatePaired* <yes|no>\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)");
70
+ puts("\t-conserveLong <yes|no>\t\t: preserve sequences with long reads in them (default no)");
71
+ puts("");
72
+ puts("Output:");
73
+ puts("\tdirectory/contigs.fa\t\t: fasta file of contigs longer than twice hash length");
74
+ puts("\tdirectory/stats.txt\t\t: stats file (tab-spaced) useful for determining appropriate coverage cutoff");
75
+ puts("\tdirectory/LastGraph\t\t: special formatted file with all the information on the final graph");
76
+ puts("\tdirectory/velvet_asm.afg\t: (if requested) AMOS compatible assembly file");
77
+ }
78
+
79
+ int main(int argc, char **argv)
80
+ {
81
+ ReadSet *sequences = NULL;
82
+ RoadMapArray *rdmaps;
83
+ PreGraph *preGraph;
84
+ Graph *graph;
85
+ char *directory, *graphFilename, *connectedGraphFilename,
86
+ *preGraphFilename, *seqFilename, *roadmapFilename,
87
+ *lowCovContigsFilename, *highCovContigsFilename;
88
+ double coverageCutoff = -1;
89
+ double longCoverageCutoff = -1;
90
+ double maxCoverageCutoff = -1;
91
+ double expectedCoverage = -1;
92
+ Coordinate minContigLength = -1;
93
+ Coordinate minContigKmerLength;
94
+ boolean *dubious = NULL;
95
+ Coordinate insertLength[CATEGORIES];
96
+ Coordinate insertLengthLong = -1;
97
+ Coordinate std_dev[CATEGORIES];
98
+ Coordinate std_dev_long = -1;
99
+ short int accelerationBits = 24;
100
+ boolean readTracking = false;
101
+ boolean exportAssembly = false;
102
+ boolean unusedReads = false;
103
+ boolean estimateCoverage = false;
104
+ boolean estimateCutoff = false;
105
+ boolean exportAlignments = false;
106
+ FILE *file;
107
+ int arg_index, arg_int;
108
+ double arg_double;
109
+ char *arg;
110
+ ShortLength *sequenceLengths = NULL;
111
+ Category cat;
112
+ boolean scaffolding = true;
113
+ int pebbleRounds = 1;
114
+ long long longlong_var;
115
+ short int short_var;
116
+ boolean exportFilteredNodes = false;
117
+ int clean = 0;
118
+ boolean conserveLong = false;
119
+ boolean shadows[CATEGORIES];
120
+ int coverageMask = 1;
121
+ SequencesReader *seqReadInfo = NULL;
122
+
123
+ setProgramName("velvetg");
124
+
125
+ for (cat = 0; cat < CATEGORIES; cat++) {
126
+ insertLength[cat] = -1;
127
+ std_dev[cat] = -1;
128
+ shadows[cat] = false;
129
+ }
130
+
131
+ // Error message
132
+ if (argc == 1) {
133
+ puts("velvetg - de Bruijn graph construction, error removal and repeat resolution");
134
+ printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
135
+ RELEASE_NUMBER, UPDATE_NUMBER);
136
+ puts("Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)");
137
+ puts("This is free software; see the source for copying conditions. There is NO");
138
+ puts("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.");
139
+ puts("Compilation settings:");
140
+ printf("CATEGORIES = %i\n", CATEGORIES);
141
+ printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
142
+ #ifdef _OPENMP
143
+ puts("OPENMP");
144
+ #endif
145
+ #ifdef LONGSEQUENCES
146
+ puts("LONGSEQUENCES");
147
+ #endif
148
+ #ifdef BIGASSEMBLY
149
+ puts("BIGASSEMBLY");
150
+ #endif
151
+ #ifdef COLOR
152
+ puts("COLOR");
153
+ #endif
154
+ #ifdef DEBUG
155
+ puts("DEBUG");
156
+ #endif
157
+ puts("");
158
+ printUsage();
159
+ return 1;
160
+ }
161
+
162
+ if (strcmp(argv[1], "--help") == 0) {
163
+ printUsage();
164
+ return 0;
165
+ }
166
+
167
+ // Memory allocation
168
+ directory = argv[1];
169
+ graphFilename = mallocOrExit(strlen(directory) + 100, char);
170
+ connectedGraphFilename = mallocOrExit(strlen(directory) + 100, char);
171
+ preGraphFilename =
172
+ mallocOrExit(strlen(directory) + 100, char);
173
+ roadmapFilename = mallocOrExit(strlen(directory) + 100, char);
174
+ seqFilename = mallocOrExit(strlen(directory) + 100, char);
175
+ lowCovContigsFilename = mallocOrExit(strlen(directory) + 100, char);
176
+ highCovContigsFilename = mallocOrExit(strlen(directory) + 100, char);
177
+
178
+ // Argument parsing
179
+ for (arg_index = 2; arg_index < argc; arg_index++) {
180
+ arg = argv[arg_index++];
181
+ if (arg_index >= argc) {
182
+ velvetLog("Unusual number of arguments!\n");
183
+ printUsage();
184
+ #ifdef DEBUG
185
+ abort();
186
+ #endif
187
+ exit(1);
188
+ }
189
+
190
+ if (strcmp(arg, "-cov_cutoff") == 0) {
191
+ if (strcmp(argv[arg_index], "auto") == 0) {
192
+ estimateCutoff = true;
193
+ } else {
194
+ sscanf(argv[arg_index], "%lf", &coverageCutoff);
195
+ }
196
+ } else if (strcmp(arg, "-long_cov_cutoff") == 0) {
197
+ sscanf(argv[arg_index], "%lf", &longCoverageCutoff);
198
+ } else if (strcmp(arg, "-exp_cov") == 0) {
199
+ if (strcmp(argv[arg_index], "auto") == 0) {
200
+ estimateCoverage = true;
201
+ readTracking = true;
202
+ } else {
203
+ sscanf(argv[arg_index], "%lf", &expectedCoverage);
204
+ if (expectedCoverage > 0)
205
+ readTracking = true;
206
+ }
207
+ } else if (strcmp(arg, "-ins_length") == 0) {
208
+ sscanf(argv[arg_index], "%lli", &longlong_var);
209
+ insertLength[0] = (Coordinate) longlong_var;
210
+ if (insertLength[0] < 0) {
211
+ velvetLog("Invalid insert length: %lli\n",
212
+ (long long) insertLength[0]);
213
+ #ifdef DEBUG
214
+ abort();
215
+ #endif
216
+ exit(1);
217
+ }
218
+ } else if (strcmp(arg, "-ins_length_sd") == 0) {
219
+ sscanf(argv[arg_index], "%lli", &longlong_var);
220
+ std_dev[0] = (Coordinate) longlong_var;
221
+ if (std_dev[0] < 0) {
222
+ velvetLog("Invalid std deviation: %lli\n",
223
+ (long long) std_dev[0]);
224
+ #ifdef DEBUG
225
+ abort();
226
+ #endif
227
+ exit(1);
228
+ }
229
+ } else if (strcmp(arg, "-ins_length_long") == 0) {
230
+ sscanf(argv[arg_index], "%lli", &longlong_var);
231
+ insertLengthLong = (Coordinate) longlong_var;
232
+ } else if (strcmp(arg, "-ins_length_long_sd") == 0) {
233
+ sscanf(argv[arg_index], "%lli", &longlong_var);
234
+ std_dev_long = (Coordinate) longlong_var;
235
+ } else if (strncmp(arg, "-ins_length", 11) == 0
236
+ && strchr(arg, 'd') == NULL) {
237
+ sscanf(arg, "-ins_length%hi", &short_var);
238
+ cat = (Category) short_var;
239
+ if (cat < 1 || cat > CATEGORIES) {
240
+ velvetLog("Unknown option: %s\n", arg);
241
+ #ifdef DEBUG
242
+ abort();
243
+ #endif
244
+ exit(1);
245
+ }
246
+ sscanf(argv[arg_index], "%lli", &longlong_var);
247
+ insertLength[cat - 1] = (Coordinate) longlong_var;
248
+ if (insertLength[cat - 1] < 0) {
249
+ velvetLog("Invalid insert length: %lli\n",
250
+ (long long) insertLength[cat - 1]);
251
+ #ifdef DEBUG
252
+ abort();
253
+ #endif
254
+ exit(1);
255
+ }
256
+ } else if (strncmp(arg, "-ins_length", 11) == 0) {
257
+ sscanf(arg, "-ins_length%hi_sd", &short_var);
258
+ cat = (Category) short_var;
259
+ if (cat < 1 || cat > CATEGORIES) {
260
+ velvetLog("Unknown option: %s\n", arg);
261
+ #ifdef DEBUG
262
+ abort();
263
+ #endif
264
+ exit(1);
265
+ }
266
+ sscanf(argv[arg_index], "%lli", &longlong_var);
267
+ std_dev[cat - 1] = (Coordinate) longlong_var;
268
+ if (std_dev[cat - 1] < 0) {
269
+ velvetLog("Invalid std deviation: %lli\n",
270
+ (long long) std_dev[cat - 1]);
271
+ #ifdef DEBUG
272
+ abort();
273
+ #endif
274
+ exit(1);
275
+ }
276
+ } else if (strcmp(arg, "-read_trkg") == 0) {
277
+ readTracking =
278
+ (strcmp(argv[arg_index], "yes") == 0);
279
+ } else if (strcmp(arg, "-scaffolding") == 0) {
280
+ scaffolding =
281
+ (strcmp(argv[arg_index], "yes") == 0);
282
+ } else if (strcmp(arg, "-exportFiltered") == 0) {
283
+ exportFilteredNodes =
284
+ (strcmp(argv[arg_index], "yes") == 0);
285
+ } else if (strcmp(arg, "-amos_file") == 0) {
286
+ exportAssembly =
287
+ (strcmp(argv[arg_index], "yes") == 0);
288
+ } else if (strcmp(arg, "-alignments") == 0) {
289
+ exportAlignments =
290
+ (strcmp(argv[arg_index], "yes") == 0);
291
+ } else if (strcmp(arg, "-min_contig_lgth") == 0) {
292
+ sscanf(argv[arg_index], "%lli", &longlong_var);
293
+ minContigLength = (Coordinate) longlong_var;
294
+ } else if (strcmp(arg, "-coverage_mask") == 0) {
295
+ sscanf(argv[arg_index], "%lli", &longlong_var);
296
+ coverageMask = (IDnum) longlong_var;
297
+ } else if (strcmp(arg, "-accel_bits") == 0) {
298
+ sscanf(argv[arg_index], "%hi", &accelerationBits);
299
+ if (accelerationBits < 0) {
300
+ velvetLog
301
+ ("Illegal acceleration parameter: %s\n",
302
+ argv[arg_index]);
303
+ printUsage();
304
+ return -1;
305
+ }
306
+ } else if (strcmp(arg, "-max_branch_length") == 0) {
307
+ sscanf(argv[arg_index], "%i", &arg_int);
308
+ setMaxReadLength(arg_int);
309
+ setLocalMaxReadLength(arg_int);
310
+ } else if (strcmp(arg, "-max_divergence") == 0) {
311
+ sscanf(argv[arg_index], "%lf", &arg_double);
312
+ setMaxDivergence(arg_double);
313
+ setLocalMaxDivergence(arg_double);
314
+ } else if (strcmp(arg, "-max_gap_count") == 0) {
315
+ sscanf(argv[arg_index], "%i", &arg_int);
316
+ setMaxGaps(arg_int);
317
+ setLocalMaxGaps(arg_int);
318
+ } else if (strcmp(arg, "-min_pair_count") == 0) {
319
+ sscanf(argv[arg_index], "%i", &arg_int);
320
+ setUnreliableConnectionCutoff(arg_int);
321
+ } else if (strcmp(arg, "-max_coverage") == 0) {
322
+ sscanf(argv[arg_index], "%lf", &maxCoverageCutoff);
323
+ } else if (strcmp(arg, "-long_mult_cutoff") == 0) {
324
+ sscanf(argv[arg_index], "%i", &arg_int);
325
+ setMultiplicityCutoff(arg_int);
326
+ } else if (strcmp(arg, "-paired_exp_fraction") == 0) {
327
+ sscanf(argv[arg_index], "%lf", &arg_double);
328
+ setPairedExpFraction(arg_double);
329
+ } else if (strcmp(arg, "-clean") == 0) {
330
+ if (strcmp(argv[arg_index], "yes") == 0)
331
+ clean = 1;
332
+ } else if (strcmp(arg, "-very_clean") == 0) {
333
+ if (strcmp(argv[arg_index], "yes") == 0)
334
+ clean = 2;
335
+ } else if (strcmp(arg, "-conserveLong") == 0) {
336
+ if (strcmp(argv[arg_index], "yes") == 0)
337
+ conserveLong = 2;
338
+ } else if (strcmp(arg, "-unused_reads") == 0) {
339
+ unusedReads =
340
+ (strcmp(argv[arg_index], "yes") == 0);
341
+ if (unusedReads)
342
+ readTracking = true;
343
+ } else if (strcmp(arg, "-shortMatePaired") == 0) {
344
+ shadows[0] = (strcmp(argv[arg_index], "yes") == 0);
345
+ } else if (strncmp(arg, "-shortMatePaired", 16) == 0) {
346
+ sscanf(arg, "-shortMatePaired%hi", &short_var);
347
+ cat = (Category) short_var;
348
+ if (cat < 1 || cat > CATEGORIES) {
349
+ velvetLog("Unknown option: %s\n", arg);
350
+ #ifdef DEBUG
351
+ abort();
352
+ #endif
353
+ exit(1);
354
+ }
355
+ shadows[cat - 1] = (strcmp(argv[arg_index], "yes") == 0);
356
+ } else if (strcmp(arg, "--help") == 0) {
357
+ printUsage();
358
+ return 0;
359
+ } else {
360
+ velvetLog("Unknown option: %s;\n", arg);
361
+ printUsage();
362
+ return 1;
363
+ }
364
+ }
365
+
366
+ // Bookkeeping
367
+ logInstructions(argc, argv, directory);
368
+
369
+ seqReadInfo = callocOrExit(1, SequencesReader);
370
+ strcpy(seqFilename, directory);
371
+ // if binary CnyUnifiedSeq exists, use it. Otherwise try Sequences
372
+ strcat(seqFilename, "/CnyUnifiedSeq");
373
+ if (access(seqFilename, R_OK) == 0) {
374
+ seqReadInfo->m_bIsBinary = true;
375
+ } else {
376
+ seqReadInfo->m_bIsBinary = false;
377
+ strcpy(seqFilename, directory);
378
+ strcat(seqFilename, "/Sequences");
379
+ }
380
+ seqReadInfo->m_seqFilename = seqFilename;
381
+ strcpy(roadmapFilename, directory);
382
+ strcat(roadmapFilename, "/Roadmaps");
383
+
384
+ strcpy(preGraphFilename, directory);
385
+ strcat(preGraphFilename, "/PreGraph");
386
+
387
+ strcpy(connectedGraphFilename, directory);
388
+ strcat(connectedGraphFilename, "/ConnectedGraph");
389
+
390
+ if (!readTracking) {
391
+ strcpy(graphFilename, directory);
392
+ strcat(graphFilename, "/Graph");
393
+ } else {
394
+ strcpy(graphFilename, directory);
395
+ strcat(graphFilename, "/Graph2");
396
+ }
397
+
398
+ strcpy(lowCovContigsFilename, directory);
399
+ strcat(lowCovContigsFilename, "/lowCoverageContigs.fa");
400
+
401
+ strcpy(highCovContigsFilename, directory);
402
+ strcat(highCovContigsFilename, "/highCoverageContigs.fa");
403
+
404
+ // Graph uploading or creation
405
+ if ((file = fopen(graphFilename, "r")) != NULL) {
406
+ fclose(file);
407
+
408
+ graph = importGraph(graphFilename);
409
+
410
+ } else if ((file = fopen(connectedGraphFilename, "r")) != NULL) {
411
+ fclose(file);
412
+ if (seqReadInfo->m_bIsBinary) {
413
+
414
+ sequences = importCnyReadSet(seqFilename);
415
+
416
+ #if 0
417
+ // compare to velvet's version of a seq
418
+ ReadSet *compareSequences = NULL;
419
+ compareSeqFilename = mallocOrExit(strlen(directory) + 100, char);
420
+ strcpy(compareSeqFilename, directory);
421
+ strcat(compareSeqFilename, "/Sequences");
422
+ compareSequences = importReadSet(compareSeqFilename);
423
+ convertSequences(compareSequences);
424
+ if (sequences->readCount != compareSequences->readCount) {
425
+ printf("read count mismatch\n");
426
+ exit(1);
427
+ }
428
+ int i;
429
+ for (i = 0; i < sequences->readCount; i++) {
430
+ TightString *tString = getTightStringInArray(sequences->tSequences, i);
431
+ TightString *tStringCmp = getTightStringInArray(compareSequences->tSequences, i);
432
+ if (getLength(tString) != getLength(tStringCmp)) {
433
+ printf("sequence %d len mismatch\n", i);
434
+ exit(1);
435
+ }
436
+ if (strcmp(readTightString(tString), readTightString(tStringCmp)) != 0) {
437
+ printf("sequence %d cmp mismatch\n", i);
438
+ printf("seq %s != cmp %s\n", readTightString(tString), readTightString(tStringCmp));
439
+ exit(1);
440
+ }
441
+ }
442
+ #endif
443
+ } else {
444
+ sequences = importReadSet(seqFilename);
445
+ convertSequences(sequences);
446
+ }
447
+ seqReadInfo->m_sequences = sequences;
448
+
449
+ graph =
450
+ importConnectedGraph(connectedGraphFilename, sequences,
451
+ roadmapFilename, readTracking, accelerationBits);
452
+
453
+ sequenceLengths =
454
+ getSequenceLengths(sequences, getWordLength(graph));
455
+ correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
456
+ exportGraph(graphFilename, graph, sequences->tSequences);
457
+ } else if ((file = fopen(preGraphFilename, "r")) != NULL) {
458
+ fclose(file);
459
+ if (seqReadInfo->m_bIsBinary) {
460
+ sequences = importCnyReadSet(seqFilename);
461
+ } else {
462
+ sequences = importReadSet(seqFilename);
463
+ convertSequences(sequences);
464
+ }
465
+ seqReadInfo->m_sequences = sequences;
466
+ graph =
467
+ importPreGraph(preGraphFilename, sequences,
468
+ roadmapFilename, readTracking, accelerationBits);
469
+ sequenceLengths =
470
+ getSequenceLengths(sequences, getWordLength(graph));
471
+ correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
472
+ exportGraph(graphFilename, graph, sequences->tSequences);
473
+ } else if ((file = fopen(roadmapFilename, "r")) != NULL) {
474
+ fclose(file);
475
+
476
+ rdmaps = importRoadMapArray(roadmapFilename);
477
+ if (seqReadInfo->m_bIsBinary) {
478
+ // pull in sequences first and use in preGraph
479
+ sequences = importCnyReadSet(seqFilename);
480
+ seqReadInfo->m_sequences = sequences;
481
+ #if 0
482
+ // compare to velvet's version of a seq
483
+ ReadSet *compareSequences = NULL;
484
+ char *compareSeqFilename = mallocOrExit(strlen(directory) + 100, char);
485
+ strcpy(compareSeqFilename, directory);
486
+ strcat(compareSeqFilename, "/Sequences");
487
+ compareSequences = importReadSet(compareSeqFilename);
488
+ convertSequences(compareSequences);
489
+ if (sequences->readCount != compareSequences->readCount) {
490
+ printf("read count mismatch\n");
491
+ exit(1);
492
+ }
493
+ int i;
494
+ for (i = 0; i < sequences->readCount; i++) {
495
+ TightString *tString = getTightStringInArray(sequences->tSequences, i);
496
+ TightString *tStringCmp = getTightStringInArray(compareSequences->tSequences, i);
497
+ if (getLength(tString) != getLength(tStringCmp)) {
498
+ printf("sequence %d len mismatch\n", i);
499
+ exit(1);
500
+ }
501
+ if (strcmp(readTightString(tString), readTightString(tStringCmp)) != 0) {
502
+ printf("sequence %d cmp mismatch\n", i);
503
+ printf("seq %s != cmp %s\n", readTightString(tString), readTightString(tStringCmp));
504
+ exit(1);
505
+ }
506
+ }
507
+ printf("sequence files match!\n");
508
+ #endif
509
+ }
510
+ preGraph = newPreGraph_pg(rdmaps, seqReadInfo);
511
+ concatenatePreGraph_pg(preGraph);
512
+ if (!conserveLong)
513
+ clipTips_pg(preGraph);
514
+ exportPreGraph_pg(preGraphFilename, preGraph);
515
+ destroyPreGraph_pg(preGraph);
516
+ if (!seqReadInfo->m_bIsBinary) {
517
+ sequences = importReadSet(seqFilename);
518
+ convertSequences(sequences);
519
+ seqReadInfo->m_sequences = sequences;
520
+ }
521
+ graph =
522
+ importPreGraph(preGraphFilename, sequences,
523
+ roadmapFilename, readTracking, accelerationBits);
524
+ sequenceLengths =
525
+ getSequenceLengths(sequences, getWordLength(graph));
526
+ correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
527
+ exportGraph(graphFilename, graph, sequences->tSequences);
528
+ } else {
529
+ velvetLog("No Roadmap file to build upon! Please run velveth (see manual)\n");
530
+ #ifdef DEBUG
531
+ abort();
532
+ #endif
533
+ exit(1);
534
+ }
535
+
536
+ // Set insert lengths and their standard deviations
537
+ for (cat = 0; cat < CATEGORIES; cat++) {
538
+ if (insertLength[cat] > -1 && std_dev[cat] < 0)
539
+ std_dev[cat] = insertLength[cat] / 10;
540
+ setInsertLengths(graph, cat,
541
+ insertLength[cat], std_dev[cat]);
542
+ }
543
+
544
+ if (insertLengthLong > -1 && std_dev_long < 0)
545
+ std_dev_long = insertLengthLong / 10;
546
+ setInsertLengths(graph, CATEGORIES,
547
+ insertLengthLong, std_dev_long);
548
+
549
+ // Coverage cutoff
550
+ if (expectedCoverage < 0 && estimateCoverage == true) {
551
+ expectedCoverage = estimated_cov(graph, directory);
552
+ if (coverageCutoff < 0) {
553
+ coverageCutoff = expectedCoverage / 2;
554
+ estimateCutoff = true;
555
+ }
556
+ } else {
557
+ estimateCoverage = false;
558
+ if (coverageCutoff < 0 && estimateCutoff)
559
+ coverageCutoff = estimated_cov(graph, directory) / 2;
560
+ else
561
+ estimateCutoff = false;
562
+ }
563
+
564
+ if (coverageCutoff < 0) {
565
+ velvetLog("WARNING: NO COVERAGE CUTOFF PROVIDED\n");
566
+ velvetLog("Velvet will probably leave behind many detectable errors\n");
567
+ velvetLog("See manual for instructions on how to set the coverage cutoff parameter\n");
568
+ }
569
+
570
+ if (sequences == NULL) {
571
+ if (seqReadInfo->m_bIsBinary) {
572
+ sequences = importCnyReadSet(seqFilename);
573
+ } else {
574
+ sequences = importReadSet(seqFilename);
575
+ convertSequences(sequences);
576
+ }
577
+ seqReadInfo->m_sequences = sequences;
578
+ }
579
+
580
+ if (minContigLength < 2 * getWordLength(graph))
581
+ minContigKmerLength = getWordLength(graph);
582
+ else
583
+ minContigKmerLength = minContigLength - getWordLength(graph) + 1;
584
+
585
+ dubious =
586
+ removeLowCoverageNodesAndDenounceDubiousReads(graph,
587
+ coverageCutoff,
588
+ sequences,
589
+ exportFilteredNodes,
590
+ minContigKmerLength,
591
+ lowCovContigsFilename);
592
+
593
+ removeLowLongCoverageNodesAndDenounceDubiousReads(graph,
594
+ longCoverageCutoff,
595
+ sequences,
596
+ dubious,
597
+ exportFilteredNodes,
598
+ minContigKmerLength,
599
+ lowCovContigsFilename);
600
+
601
+ removeHighCoverageNodes(graph, maxCoverageCutoff, exportFilteredNodes, minContigKmerLength, highCovContigsFilename);
602
+ clipTipsHard(graph, conserveLong);
603
+
604
+ if (sequences->readCount > 0 && sequences->categories[0] == REFERENCE)
605
+ removeLowArcs(graph, coverageCutoff);
606
+
607
+ if (expectedCoverage > 0) {
608
+
609
+ // Mixed length sequencing
610
+ readCoherentGraph(graph, isUniqueSolexa, expectedCoverage,
611
+ sequences);
612
+
613
+ // Paired end resolution
614
+ createReadPairingArray(sequences);
615
+ pebbleRounds += pairedCategories(sequences);
616
+ detachDubiousReads(sequences, dubious);
617
+ activateGapMarkers(graph);
618
+
619
+ for ( ;pebbleRounds > 0; pebbleRounds--)
620
+ exploitShortReadPairs(graph, sequences, dubious, shadows, scaffolding);
621
+
622
+ } else {
623
+ velvetLog("WARNING: NO EXPECTED COVERAGE PROVIDED\n");
624
+ velvetLog("Velvet will be unable to resolve any repeats\n");
625
+ velvetLog("See manual for instructions on how to set the expected coverage parameter\n");
626
+ }
627
+
628
+ if (dubious)
629
+ free(dubious);
630
+
631
+ concatenateGraph(graph);
632
+
633
+ removeLowCoverageReferenceNodes(graph, coverageCutoff, longCoverageCutoff, sequences);
634
+
635
+ strcpy(graphFilename, directory);
636
+ strcat(graphFilename, "/contigs.fa");
637
+ sequenceLengths = getSequenceLengths(sequences, getWordLength(graph));
638
+ exportLongNodeSequences(graphFilename, graph, minContigKmerLength, sequences, sequenceLengths, coverageMask);
639
+
640
+ if (exportAlignments) {
641
+ strcpy(graphFilename, directory);
642
+ strcat(graphFilename, "/contig-alignments.psa");
643
+ exportLongNodeMappings(graphFilename, graph, sequences,
644
+ minContigKmerLength, seqReadInfo);
645
+ }
646
+
647
+ strcpy(graphFilename, directory);
648
+ strcat(graphFilename, "/stats.txt");
649
+ displayGeneralStatistics(graph, graphFilename, sequences);
650
+
651
+ if (clean == 0) {
652
+ strcpy(graphFilename, directory);
653
+ strcat(graphFilename, "/LastGraph");
654
+ exportGraph(graphFilename, graph, sequences->tSequences);
655
+ }
656
+
657
+ if (exportAssembly) {
658
+ strcpy(graphFilename, directory);
659
+ strcat(graphFilename, "/velvet_asm.afg");
660
+ exportAMOSContigs(graphFilename, graph, minContigKmerLength, sequences);
661
+ }
662
+
663
+ if (unusedReads)
664
+ exportUnusedReads(graph, sequences, minContigKmerLength, directory);
665
+
666
+ if (estimateCoverage)
667
+ velvetLog("Estimated Coverage = %f\n", expectedCoverage);
668
+ if (estimateCutoff)
669
+ velvetLog("Estimated Coverage cutoff = %f\n", coverageCutoff);
670
+
671
+ logFinalStats(graph, minContigKmerLength, directory);
672
+
673
+ if (clean > 0) {
674
+ strcpy(graphFilename, directory);
675
+ strcat(graphFilename, "/Roadmaps");
676
+ remove(graphFilename);
677
+
678
+ strcpy(graphFilename, directory);
679
+ strcat(graphFilename, "/LastGraph");
680
+ remove(graphFilename);
681
+ }
682
+
683
+ if (clean > 1) {
684
+ strcpy(graphFilename, directory);
685
+ strcat(graphFilename, "/Sequences");
686
+ remove(graphFilename);
687
+
688
+ strcpy(graphFilename, directory);
689
+ strcat(graphFilename, "/Graph2");
690
+ remove(graphFilename);
691
+
692
+ strcpy(graphFilename, directory);
693
+ strcat(graphFilename, "/Graph");
694
+ remove(graphFilename);
695
+ }
696
+
697
+ free(sequenceLengths);
698
+ destroyGraph(graph);
699
+ free(graphFilename);
700
+ free(connectedGraphFilename);
701
+ free(preGraphFilename);
702
+ free(seqFilename);
703
+ free(roadmapFilename);
704
+ free(lowCovContigsFilename);
705
+ free(highCovContigsFilename);
706
+ destroyReadSet(sequences);
707
+ if (seqReadInfo) {
708
+ free(seqReadInfo);
709
+ }
710
+
711
+ return 0;
712
+ }