finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,86 @@
1
+ /*
2
+ Copyright 2009 Sylvain Foret (sylvain.foret@anu.edu.au)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+
22
+ #ifndef _ALLOC_ARRAY_H_
23
+ #define _ALLOC_ARRAY_H_
24
+
25
+ #ifdef _OPENMP
26
+ #include <omp.h>
27
+ #endif
28
+
29
+ #include "globals.h"
30
+
31
+ typedef struct AllocArray_st AllocArray;
32
+ typedef struct AllocArrayFreeElement_st AllocArrayFreeElement;
33
+
34
+ struct AllocArray_st
35
+ {
36
+ void **blocks;
37
+ AllocArrayFreeElement *freeElements;
38
+ size_t elementSize;
39
+ size_t blockSize;
40
+ size_t maxBlocks;
41
+ size_t currentBlocks;
42
+ size_t maxElements;
43
+ size_t currentElements;
44
+ #ifdef DEBUG
45
+ char *name;
46
+ size_t elementsRecycled;
47
+ size_t elementsAllocated;
48
+ #endif
49
+ #ifdef _OPENMP
50
+ int nbThreads;
51
+ #endif
52
+ };
53
+
54
+ AllocArray* newAllocArray (size_t elementSize, char *name);
55
+ void destroyAllocArray (AllocArray *array);
56
+ ArrayIdx allocArrayAllocate (AllocArray *array);
57
+ void allocArrayFree (AllocArray *array, ArrayIdx idx);
58
+
59
+ #define DECLARE_FAST_ACCESSORS(name, type, array) \
60
+ /* Fast version, without null pointer checks */ \
61
+ static inline type* name##_FI2P(ArrayIdx idx) \
62
+ { \
63
+ const ArrayIdx i = idx - 1; \
64
+ const ArrayIdx blockIdx = i / array->maxElements; \
65
+ const ArrayIdx elementIdx = i % array->maxElements; \
66
+ return &((type*)(array->blocks[blockIdx]))[elementIdx]; \
67
+ } \
68
+ /* Slower version, with null pointer checks */ \
69
+ static inline type* name##_I2P(ArrayIdx idx) \
70
+ { \
71
+ if (idx != NULL_IDX) \
72
+ return name##_FI2P(idx); \
73
+ return NULL; \
74
+ }
75
+
76
+ #ifdef _OPENMP
77
+ // For multithreading: thread-specific alloc arrays
78
+ AllocArray *newAllocArrayArray(unsigned int n,
79
+ size_t elementSize,
80
+ char * name);
81
+ void destroyAllocArrayArray(AllocArray * allocArray);
82
+ ArrayIdx allocArrayArrayAllocate (AllocArray *array);
83
+ void allocArrayArrayFree (AllocArray *array, ArrayIdx idx);
84
+ #endif
85
+
86
+ #endif /* _ALLOC_ARRAY_H_ */
@@ -0,0 +1,107 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <unistd.h>
4
+ #include <sys/wait.h>
5
+ #include <string.h>
6
+
7
+ #include "autoOpen.h"
8
+
9
+ // Implementation of "popen" that ignores stderr
10
+ static FILE* popenNoStderr(const char *exe, const char *const argv[], int* retPid)
11
+ {
12
+ int out[2];
13
+ int pid;
14
+ int rc;
15
+
16
+ rc = pipe(out);
17
+ if (rc<0)
18
+ goto error_out;
19
+
20
+ pid = fork();
21
+ if (pid > 0) { // parent
22
+ close(out[1]);
23
+ *retPid = pid;
24
+ return fdopen(out[0], "r");
25
+ } else if (pid == 0) { // child
26
+ close(out[0]);
27
+ close(1);
28
+ dup(out[1]);
29
+
30
+ close(0); // Don't let child inherit stdin, nor stderr
31
+ close(2);
32
+
33
+ execvp(exe, (char**)argv);
34
+ exit(1);
35
+ } else
36
+ goto error_fork;
37
+
38
+ return NULL;
39
+
40
+ error_fork:
41
+ close(out[0]);
42
+ close(out[1]);
43
+ error_out:
44
+ return NULL;
45
+ }
46
+
47
+ static int pcloseNoStderr(int pid, FILE* out)
48
+ {
49
+ int rc, status;
50
+ fclose(out);
51
+ rc = waitpid(pid, &status, 0);
52
+ return status;
53
+ }
54
+
55
+
56
+ static const char const* decompressors[] = {"","pigz", "gunzip", "pbunzip2", "bunzip2", NULL};
57
+
58
+ AutoFile* openFileAuto(char*filename)
59
+ {
60
+ AutoFile* seqFile = calloc(1, sizeof(AutoFile));
61
+ int i;
62
+
63
+ if (strcmp(filename, "-")==0)
64
+ exitErrorf(EXIT_FAILURE, false, "Cannot read from stdin in auto mode\n");
65
+
66
+ for (i=0; decompressors[i] ; i++) {
67
+ if (strlen(decompressors[i])==0) {
68
+ seqFile->file = fopen(filename, "r");
69
+ seqFile->pid = 0;
70
+ seqFile->decompressor = "Raw read";
71
+ } else {
72
+ //printf("Trying : %s\n", decompressors[i]);
73
+ char const* args[] = {decompressors[i], "-c", "-d", filename, NULL};
74
+ seqFile->file = popenNoStderr(args[0], args, &(seqFile->pid));
75
+ seqFile->decompressor = decompressors[i];
76
+ }
77
+
78
+ if (!seqFile->file)
79
+ continue;
80
+
81
+ int c = fgetc(seqFile->file);
82
+ if (c=='>' || c=='@') {
83
+ // Ok, looks like FASTA or FASTQ
84
+ ungetc(c, seqFile->file);
85
+ seqFile->first_char = c;
86
+ return seqFile;
87
+ } else {
88
+ if (seqFile->pid)
89
+ pcloseNoStderr(seqFile->pid, seqFile->file);
90
+ else
91
+ fclose(seqFile->file);
92
+ }
93
+ }
94
+ //printf("Unable to determine file type\n");
95
+ return NULL;
96
+ }
97
+
98
+ void closeFileAuto(AutoFile* seqFile)
99
+ {
100
+ if (!seqFile)
101
+ return;
102
+
103
+ if (seqFile->pid)
104
+ pcloseNoStderr(seqFile->pid, seqFile->file);
105
+ else
106
+ fclose(seqFile->file);
107
+ }
@@ -0,0 +1,18 @@
1
+ #ifndef AUTOOPEN_H_
2
+ #define AUTOOPEN_H_
3
+
4
+ #include "globals.h"
5
+ #include "utility.h"
6
+
7
+ typedef struct {
8
+ int pid;
9
+ FILE* file;
10
+ char const* decompressor;
11
+ int first_char;
12
+ } AutoFile;
13
+
14
+ AutoFile* openFileAuto(char*filename);
15
+
16
+ void closeFileAuto(AutoFile* autoFile);
17
+
18
+ #endif
@@ -0,0 +1,813 @@
1
+ /*
2
+ Copyright 2011 Convey Computer Corporation (info@conveycomputer.com)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <math.h>
25
+ #include <time.h>
26
+ #include <limits.h>
27
+
28
+ #include "globals.h"
29
+ #include "tightString.h"
30
+ #include "readSet.h"
31
+ #include "binarySequences.h"
32
+ #include "utility.h"
33
+
34
+ #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
35
+ #include "../third-party/zlib-1.2.3/Win32/include/zlib.h"
36
+ #else
37
+ #include "../third-party/zlib-1.2.3/zlib.h"
38
+ #endif
39
+
40
+ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
41
+ # include <fcntl.h>
42
+ # include <io.h>
43
+ # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
44
+ #else
45
+ # define SET_BINARY_MODE(file)
46
+ #endif
47
+
48
+ // write defines, typedefs, and protos
49
+ #define WRITE_BUF_SHFT 16 // byte shift and mask
50
+ #define WRITE_BUF_SIZE (1<<WRITE_BUF_SHFT)
51
+ #define WRITE_BUF_MASK (WRITE_BUF_SIZE-1)
52
+ #define SHORT_NUCL_LENGTH 128 // Nucleotide length (2-bits each)
53
+
54
+ void computeSecondInPair(ReadSet * reads);
55
+
56
+ FILE *openCnySeqForRead(const char *fileName, CnyUnifiedSeqFileHeader *seqFileHeader)
57
+ {
58
+ FILE *pFile;
59
+ if ((pFile = fopen(fileName, "rb")) == 0) {
60
+ velvetLog("Unable to open %s for reading\n", fileName);
61
+ return NULL;
62
+ }
63
+
64
+ if (fread(seqFileHeader, sizeof(*seqFileHeader), 1, pFile) != 1) {
65
+ velvetLog("Unable to read file %s\n", fileName);
66
+ fclose(pFile);
67
+ return NULL;
68
+ }
69
+
70
+ if (strncmp((char *)&seqFileHeader->m_magic, "CSQ0", 4) != 0) {
71
+ velvetLog("Unknown format for file %s\n", fileName);
72
+ fclose(pFile);
73
+ return NULL;
74
+ }
75
+
76
+ if (seqFileHeader->m_bFileWriteCompleted == false) {
77
+ velvetLog("Corrupted file, %s\n", fileName);
78
+ fclose(pFile);
79
+ return NULL;
80
+ }
81
+
82
+ if (seqFileHeader->m_numCategories > CATEGORIES) {
83
+ velvetLog("File %s has %d categories, please rebuild velvet to match\n", fileName, seqFileHeader->m_numCategories);
84
+ fclose(pFile);
85
+ return NULL;
86
+ }
87
+
88
+ #ifdef COLOR
89
+ if (!seqFileHeader->m_bColor) {
90
+ velvetLog("File %s does not specify color, please rebuild velvet to match\n", fileName);
91
+ fclose(pFile);
92
+ return NULL;
93
+ }
94
+ #else
95
+ if (seqFileHeader->m_bColor) {
96
+ velvetLog("File %s specifies color, please rebuild velvet to match\n", fileName);
97
+ fclose(pFile);
98
+ return NULL;
99
+ }
100
+ #endif
101
+ return pFile;
102
+ }
103
+
104
+ static boolean refillCnySeqReadBuffer(SequencesReader *seqReadInfo)
105
+ {
106
+ uint64_t readLen = (USF_READ_BUF_SIZE < seqReadInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize - seqReadInfo->m_readBufPos) ?
107
+ USF_READ_BUF_SIZE : seqReadInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize - seqReadInfo->m_readBufPos;
108
+
109
+ if (readLen == 0)
110
+ return false;
111
+
112
+ if (fread(seqReadInfo->m_pReadBuffer, (uint32_t)readLen, 1, seqReadInfo->m_pFile) != 1) {
113
+ velvetLog("Unable to read file\n");
114
+ exit (1);
115
+ }
116
+
117
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pReadBuffer;
118
+ seqReadInfo->m_pReadBufEnd = seqReadInfo->m_pReadBuffer + readLen;
119
+ seqReadInfo->m_readBufPos += readLen;
120
+
121
+ if (seqReadInfo->m_pNextReadPtr >= seqReadInfo->m_pReadBufEnd) {
122
+ seqReadInfo->m_pNextReadPtr -= USF_READ_BUF_SIZE;
123
+ }
124
+
125
+ return true;
126
+ }
127
+
128
+ static int32_t readCnySeqUint8(SequencesReader *seqReadInfo)
129
+ {
130
+ if (seqReadInfo->m_pCurrentReadPtr == seqReadInfo->m_pReadBufEnd && !refillCnySeqReadBuffer(seqReadInfo))
131
+ {
132
+ return -1;
133
+ }
134
+
135
+ // printf("m_pCurrentReadPtr %llx\n", (long long) pReadInfo->m_pCurrentReadPtr);
136
+ return *seqReadInfo->m_pCurrentReadPtr++;
137
+ }
138
+
139
+ uint32_t readCnySeqUint32(SequencesReader *seqReadInfo)
140
+ {
141
+ uint32_t data;
142
+ data = 0;
143
+ int i;
144
+ for (i = 0; i < 4; i += 1)
145
+ data |= readCnySeqUint8(seqReadInfo) << (i*8);
146
+ return data;
147
+ }
148
+
149
+ boolean advanceCnySeqCurrentRead(SequencesReader *seqReadInfo)
150
+ {
151
+ // Perform consistency check, unused bits of previous sequence should have a fixed pattern
152
+ uint32_t finalNuclOffset = 1;
153
+ if (seqReadInfo->m_bIsRef) {
154
+ finalNuclOffset += (sizeof(seqReadInfo->m_refCnt));
155
+ finalNuclOffset += (sizeof(RefInfo) * seqReadInfo->m_refCnt);
156
+ }
157
+ if ((seqReadInfo->m_currentReadLength & 3) != 0 && ((seqReadInfo->m_pNextReadPtr - finalNuclOffset) >= seqReadInfo->m_pReadBuffer)) {
158
+ uint8_t mask = 0xFF << (seqReadInfo->m_currentReadLength & 3) * 2;
159
+ if ((*(seqReadInfo->m_pNextReadPtr - finalNuclOffset) & mask) != (0xAA & mask)) {
160
+ velvetLog("Cny seq consistency check failed in advance\n");
161
+ #ifdef DEBUG
162
+ abort();
163
+ #endif
164
+ exit(1);
165
+ }
166
+ }
167
+
168
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pNextReadPtr;
169
+ seqReadInfo->m_currentNuclReadIdx = 0;
170
+
171
+ // clear ref flag before each code check
172
+ seqReadInfo->m_bIsRef = false;
173
+ seqReadInfo->m_refCnt = 0;
174
+
175
+ for(;;) {
176
+ int32_t code = readCnySeqUint8(seqReadInfo);
177
+ // printf("checking code %d\n", code);
178
+ switch (code & 0xc0) {
179
+ case 0x00: // short sequence
180
+ case 0x40:
181
+ seqReadInfo->m_currentReadLength = code & 0x7f;
182
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pCurrentReadPtr + ((seqReadInfo->m_currentReadLength + 3) >> 2);
183
+ break;
184
+ case 0x80: // long sequence
185
+ seqReadInfo->m_currentReadLength = readCnySeqUint32(seqReadInfo);
186
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pCurrentReadPtr + ((seqReadInfo->m_currentReadLength + 3) >> 2);
187
+ if (code & 0x20) {
188
+ // ref info present
189
+ seqReadInfo->m_bIsRef = true;
190
+ seqReadInfo->m_pNextReadPtr += (sizeof(seqReadInfo->m_refCnt));
191
+ // length is updated once count is read
192
+ }
193
+ break;
194
+ case 0xc0: // new file / category
195
+ if (code == EOF) {
196
+ return false;
197
+ }
198
+ seqReadInfo->m_currCategory = (Category) readCnySeqUint32(seqReadInfo);
199
+ if (seqReadInfo->m_currCategory < 0 || seqReadInfo->m_currCategory > REFERENCE) {
200
+ velvetLog("Illegal category %d\n", (int32_t) seqReadInfo->m_currCategory);
201
+ exit(1);
202
+ }
203
+ continue;
204
+ }
205
+
206
+ if (seqReadInfo->m_currentReadLength > seqReadInfo->m_maxSeqLen ||
207
+ seqReadInfo->m_currentReadLength < seqReadInfo->m_minSeqLen) {
208
+ velvetLog("Cny seq consistency check failed, len mismatch\n");
209
+ #ifdef DEBUG
210
+ abort();
211
+ #endif
212
+ exit(1);
213
+ }
214
+
215
+ return true;
216
+
217
+ }
218
+ }
219
+
220
+ void resetCnySeqCurrentRead(SequencesReader *seqReadInfo)
221
+ {
222
+ seqReadInfo->m_pReadBufEnd = seqReadInfo->m_pReadBuffer;
223
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pReadBuffer;
224
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pReadBuffer;
225
+ seqReadInfo->m_currentReadLength = 0;
226
+ seqReadInfo->m_readBufPos = 0;
227
+
228
+ if (fseek(seqReadInfo->m_pFile, sizeof(CnyUnifiedSeqFileHeader), SEEK_SET) < 0) {
229
+ perror("Unable to seek\n");
230
+ exit(1);
231
+ }
232
+
233
+ advanceCnySeqCurrentRead(seqReadInfo);
234
+ }
235
+
236
+ void getCnySeqNucl(SequencesReader *seqReadInfo, uint8_t *sequence) {
237
+ uint32_t nuclIdx;
238
+ for (nuclIdx = 0; nuclIdx < seqReadInfo->m_currentReadLength; nuclIdx += 4) {
239
+ sequence[nuclIdx / 4] = (uint8_t)readCnySeqUint8(seqReadInfo);
240
+ }
241
+ }
242
+
243
+ ReadSet *importCnyReadSet(char *filename)
244
+ {
245
+ IDnum sequenceCount, sequenceIndex;
246
+ ReadSet *reads;
247
+ uint8_t *tmp;
248
+ Coordinate totalLength = 0;
249
+ int arrayLength;
250
+ SequencesReader seqReadInfo;
251
+ memset(&seqReadInfo, 0, sizeof(seqReadInfo));
252
+
253
+ seqReadInfo.m_pFile = openCnySeqForRead(filename, &seqReadInfo.m_unifiedSeqFileHeader);
254
+ seqReadInfo.m_numCategories = seqReadInfo.m_unifiedSeqFileHeader.m_numCategories;
255
+ seqReadInfo.m_minSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_minSeqLen;
256
+ seqReadInfo.m_maxSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_maxSeqLen;
257
+ seqReadInfo.m_bIsRef = false;
258
+
259
+ if (seqReadInfo.m_pFile != NULL)
260
+ velvetLog("Reading CNY read set file %s\n", filename);
261
+ else
262
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
263
+
264
+ // readInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, sizeof(*readInfo.m_pReadBuffer));
265
+ seqReadInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, uint8_t );
266
+ seqReadInfo.m_pCurrentReadPtr = seqReadInfo.m_pReadBufEnd = 0;
267
+
268
+ reads = newReadSet();
269
+
270
+ resetCnySeqCurrentRead(&seqReadInfo);
271
+ sequenceCount = seqReadInfo.m_unifiedSeqFileHeader.m_sequenceCnt;
272
+
273
+ velvetLog("%li sequences found\n", (long) sequenceCount);
274
+
275
+ reads->readCount = sequenceCount;
276
+
277
+ if (reads->readCount == 0) {
278
+ reads->sequences = NULL;
279
+ reads->categories = NULL;
280
+ free(seqReadInfo.m_pReadBuffer);
281
+ return reads;
282
+ }
283
+
284
+ reads->sequences = NULL;
285
+ reads->categories = callocOrExit(sequenceCount, Category);
286
+ reads->tSequences = mallocOrExit(sequenceCount, TightString);
287
+ // note there is some overhead with the seq store
288
+ reads->tSeqMem = callocOrExit (seqReadInfo.m_unifiedSeqFileHeader.m_seqNuclStoreSize, char);
289
+ tmp = (uint8_t *) reads->tSeqMem;
290
+ uint8_t * arrayEnd = tmp + seqReadInfo.m_unifiedSeqFileHeader.m_seqNuclStoreSize;
291
+ // read all sequence and category info in one pass
292
+ for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex += 1) {
293
+ reads->categories[sequenceIndex] = seqReadInfo.m_currCategory;
294
+ if (sizeof(ShortLength) == sizeof(int16_t) && seqReadInfo.m_currentReadLength > SHRT_MAX) {
295
+ velvetLog("Read %li of length %lli, longer than limit %i\n",
296
+ (long) sequenceIndex + 1, (long long) seqReadInfo.m_currentReadLength, SHRT_MAX);
297
+ velvetLog("You should recompile Velvet with the LONGSEQUENCES option.\n");
298
+ exit(1);
299
+ }
300
+ // only use tString to reduce memory use
301
+ reads->tSequences[sequenceIndex].length = seqReadInfo.m_currentReadLength;
302
+ arrayLength = (reads->tSequences[sequenceIndex].length + 3) / 4;
303
+ if ((tmp + arrayLength) > arrayEnd) {
304
+ velvetLog("array location 0x%lx for seq %ld beyond end 0x%lx\n", (uint64_t) tmp, (uint64_t) sequenceIndex, (uint64_t) arrayEnd);
305
+ exit(1);
306
+ }
307
+ totalLength += arrayLength;
308
+ reads->tSequences[sequenceIndex].sequence = tmp;
309
+ getCnySeqNucl(&seqReadInfo, tmp);
310
+ if (seqReadInfo.m_bIsRef) {
311
+ seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
312
+ // now the next ptr is advanced
313
+ seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
314
+ RefInfo refElem;
315
+ uint32_t refIdx;
316
+ for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
317
+ // not actually used so just read past refs
318
+ refElem.m_referenceID = readCnySeqUint32(&seqReadInfo);
319
+ refElem.m_pos = readCnySeqUint32(&seqReadInfo);
320
+ }
321
+ }
322
+ tmp += arrayLength;
323
+ if (sequenceIndex < sequenceCount) {
324
+ advanceCnySeqCurrentRead(&seqReadInfo);
325
+ }
326
+ }
327
+
328
+ fclose(seqReadInfo.m_pFile);
329
+ computeSecondInPair(reads);
330
+
331
+ free(seqReadInfo.m_pReadBuffer);
332
+ velvetLog("Done\n");
333
+ return reads;
334
+
335
+ }
336
+
337
+ // write routines
338
+ #define ADENINE 0
339
+ #define CYTOSINE 1
340
+ #define GUANINE 2
341
+ #define THYMINE 3
342
+ #define INVALID 5
343
+
344
+ static void cnySeqHostBufferFull(SequencesWriter *seqWriteInfo)
345
+ {
346
+ // The current Host buffer is full
347
+ switch (seqWriteInfo->m_hostBuffersInUse) {
348
+ case 1: // buf[0] is full, start using buf[1]
349
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[1];
350
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
351
+ seqWriteInfo->m_hostBufferFilePos[1] = seqWriteInfo->m_hostBufferFilePos[0] + WRITE_BUF_SIZE;
352
+ seqWriteInfo->m_hostBuffersInUse = 2;
353
+ break;
354
+ case 2: // buf[0] and buf[1] are full, start using buf[2]
355
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[2];
356
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
357
+ seqWriteInfo->m_hostBufferFilePos[2] = seqWriteInfo->m_hostBufferFilePos[1] + WRITE_BUF_SIZE;
358
+ seqWriteInfo->m_hostBuffersInUse = 3;
359
+ break;
360
+ case 3: // all three buffers are full, write out buf[2] and reuse
361
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[2], SEEK_SET) < 0) {
362
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
363
+ exit(1);
364
+ }
365
+
366
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[2], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1) {
367
+ velvetLog("Unable to write CnyUnifiedSeq\n");
368
+ exit(1);
369
+ }
370
+
371
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[2];
372
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
373
+ seqWriteInfo->m_hostBufferFilePos[2] = seqWriteInfo->m_hostBufferFilePos[2] + WRITE_BUF_SIZE;
374
+ break;
375
+ default:
376
+ velvetLog("Unknown CnySeq host buffer state %d\n", seqWriteInfo->m_hostBuffersInUse);
377
+ exit(1);
378
+ break;
379
+ }
380
+ }
381
+
382
+ static void moveCnySeqNucleotides(SequencesWriter *seqWriteInfo)
383
+ {
384
+ // move nucleotides in buffer to allow a four byte length value
385
+ // the current sequence may span two buffers
386
+
387
+ uint64_t bufIdx = (seqWriteInfo->m_hostBuffersInUse == 2) ? (seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[1] + WRITE_BUF_SIZE) : (seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[0]);
388
+ if (bufIdx + 4 > 2 * WRITE_BUF_SIZE) {
389
+ velvetLog("CnySeq bufIdx %ld too large\n", bufIdx);
390
+ exit(1);
391
+ }
392
+
393
+ if (bufIdx + 4 >= WRITE_BUF_SIZE) {
394
+ // continue writing to buf[1]
395
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[1] + (bufIdx + 4 - WRITE_BUF_SIZE);
396
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pWriteBuffer[1] + WRITE_BUF_SIZE;
397
+ seqWriteInfo->m_hostBufferFilePos[1] = seqWriteInfo->m_hostBufferFilePos[0] + WRITE_BUF_SIZE;
398
+ seqWriteInfo->m_hostBuffersInUse = 2;
399
+ } else
400
+ seqWriteInfo->m_pHostBufPtr += 4;
401
+
402
+ seqWriteInfo->m_insertCurrentIndex += 16;
403
+
404
+ uint64_t cnt;
405
+ for (cnt = (seqWriteInfo->m_insertLength+3)>>2; cnt > 0; cnt -= 1) {
406
+ seqWriteInfo->m_pWriteBuffer[(bufIdx+4) >> WRITE_BUF_SHFT][(bufIdx+4) & WRITE_BUF_MASK] = seqWriteInfo->m_pWriteBuffer[bufIdx >> WRITE_BUF_SHFT][bufIdx & WRITE_BUF_MASK];
407
+ bufIdx -= 1;
408
+ }
409
+ }
410
+
411
+ static void writeCnySeqNucleotide(uint8_t nucleotide, SequencesWriter *seqWriteInfo)
412
+ {
413
+ if (seqWriteInfo->m_insertLength == SHORT_NUCL_LENGTH-1) {
414
+ moveCnySeqNucleotides(seqWriteInfo);
415
+ }
416
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) == 0)
417
+ *seqWriteInfo->m_pHostBufPtr = 0;
418
+
419
+ *seqWriteInfo->m_pHostBufPtr = *seqWriteInfo->m_pHostBufPtr | (nucleotide << ((seqWriteInfo->m_insertCurrentIndex & 0x3) * 2));
420
+
421
+ seqWriteInfo->m_insertLength += 1;
422
+ seqWriteInfo->m_insertCurrentIndex += 1;
423
+
424
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) == 0) {
425
+ seqWriteInfo->m_pHostBufPtr += 1;
426
+
427
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
428
+ cnySeqHostBufferFull(seqWriteInfo);
429
+ }
430
+ }
431
+
432
+ void cnySeqInsertNucleotideString(const char *pReadBuf, SequencesWriter *seqWriteInfo) {
433
+ uint8_t nucleotide;
434
+
435
+ static boolean bInit = false;
436
+ static uint8_t charMap[256];
437
+
438
+ if (!bInit) {
439
+ bInit = true;
440
+ // anything unusual defaults to A
441
+ memset(charMap, ADENINE, 256);
442
+ charMap[(int)'C'] = charMap[(int)'c'] = CYTOSINE;
443
+ charMap[(int)'G'] = charMap[(int)'g'] = GUANINE;
444
+ charMap[(int)'T'] = charMap[(int)'t'] = THYMINE;
445
+ charMap[(int)'\0'] = 4;
446
+ }
447
+
448
+ for (;;) {
449
+ nucleotide = charMap[(int)*pReadBuf];
450
+ if (nucleotide < 4) {
451
+ writeCnySeqNucleotide(nucleotide, seqWriteInfo);
452
+ pReadBuf += 1;
453
+ continue;
454
+ } else if (nucleotide == 4) {
455
+ return;
456
+ } else {
457
+ velvetLog("CnySeq unexpected char %c (%d)\n", *pReadBuf, (int) *pReadBuf);
458
+ exit(1);
459
+ }
460
+ }
461
+ }
462
+
463
+ SequencesWriter * openCnySeqForWrite(const char *unifiedSeqFileName)
464
+ {
465
+ SequencesWriter *seqWriteInfo = callocOrExit(1, SequencesWriter);
466
+ seqWriteInfo->m_pWriteBuffer[0] = NULL;
467
+ seqWriteInfo->m_pWriteBuffer[1] = NULL;
468
+ seqWriteInfo->m_pWriteBuffer[2] = NULL;
469
+ char seqNamesFileName[5000];
470
+
471
+ strcpy(seqNamesFileName, unifiedSeqFileName);
472
+ strcat(seqNamesFileName, ".names");
473
+
474
+ #ifdef COLOR
475
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bColor = true;
476
+ #else
477
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bColor = false;
478
+ #endif
479
+
480
+ if ((seqWriteInfo->m_pFile = fopen(unifiedSeqFileName, "wb")) == 0) {
481
+ velvetLog("Unable to open %s for writing\n", unifiedSeqFileName);
482
+ exit(1);
483
+ }
484
+
485
+ if ((seqWriteInfo->m_nameFile = fopen(seqNamesFileName, "w")) == 0) {
486
+ velvetLog("Unable to open %s for writing\n", seqNamesFileName);
487
+ exit(1);
488
+ }
489
+
490
+ memcpy(&seqWriteInfo->m_unifiedSeqFileHeader.m_magic, "CSQ0", 4);
491
+ seqWriteInfo->m_unifiedSeqFileHeader.m_timeStamp = time(0);
492
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bFileWriteCompleted = false;
493
+
494
+ if (fwrite(&seqWriteInfo->m_unifiedSeqFileHeader, sizeof(CnyUnifiedSeqFileHeader), 1, seqWriteInfo->m_pFile) != 1) {
495
+ velvetLog("Unable to write file %s\n", unifiedSeqFileName);
496
+ exit(1);
497
+ }
498
+
499
+ seqWriteInfo->m_insertCurrentIndex = 0;
500
+ seqWriteInfo->m_pWriteBuffer[0] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
501
+ seqWriteInfo->m_pWriteBuffer[1] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
502
+ seqWriteInfo->m_pWriteBuffer[2] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
503
+
504
+ seqWriteInfo->m_hostBufferFilePos[0] = sizeof(CnyUnifiedSeqFileHeader);
505
+
506
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[0];
507
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pWriteBuffer[0] + WRITE_BUF_SIZE;
508
+ seqWriteInfo->m_hostBuffersInUse = 1;
509
+ seqWriteInfo->m_fileSegmentWriteIdx = 0; // file segment currently being written
510
+ seqWriteInfo->m_unifiedSeqFileHeader.m_sequenceCnt = 0;
511
+ seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen = ~0LL;
512
+ seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen = 0;
513
+ seqWriteInfo->m_unifiedSeqFileHeader.m_totalSeqLen = 0;
514
+ return seqWriteInfo;
515
+ }
516
+
517
+ static void alignCnySeqToNextByteBoundary(SequencesWriter *seqWriteInfo)
518
+ {
519
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) != 0)
520
+ seqWriteInfo->m_pHostBufPtr += 1;
521
+
522
+ seqWriteInfo->m_insertCurrentIndex = (seqWriteInfo->m_insertCurrentIndex + 3) & ~0x3LL;
523
+
524
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax) {
525
+ cnySeqHostBufferFull(seqWriteInfo);
526
+ }
527
+ }
528
+
529
+ static void writeCnySeqUint8(uint8_t uint8, SequencesWriter *seqWriteInfo)
530
+ {
531
+ *seqWriteInfo->m_pHostBufPtr++ = uint8;
532
+ seqWriteInfo->m_insertCurrentIndex += 4;
533
+
534
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax) {
535
+ cnySeqHostBufferFull(seqWriteInfo);
536
+ }
537
+ }
538
+
539
+ static void writeCnySeqUint32(uint32_t uint32, SequencesWriter *seqWriteInfo)
540
+ {
541
+ int i;
542
+ for (i = 0; i < 4; i += 1)
543
+ writeCnySeqUint8((uint8_t)(uint32 >> (i*8)), seqWriteInfo);
544
+ }
545
+
546
+ void inputCnySeqFileStart(Category category, SequencesWriter *seqWriteInfo)
547
+ {
548
+ if (category > REFERENCE) {
549
+ velvetLog("Found category %d beyond max of %d\n", category, REFERENCE);
550
+ exit(1);
551
+ }
552
+
553
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
554
+ writeCnySeqUint8(0xc0, seqWriteInfo);
555
+ writeCnySeqUint32(category, seqWriteInfo);
556
+ }
557
+
558
+ void cnySeqInsertStart(SequencesWriter *seqWriteInfo)
559
+ {
560
+ seqWriteInfo->m_unifiedSeqFileHeader.m_sequenceCnt += 1;
561
+
562
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
563
+
564
+ seqWriteInfo->m_insertLength = 0;
565
+ seqWriteInfo->m_pHostLengthBufPtr = seqWriteInfo->m_pHostBufPtr;
566
+ seqWriteInfo->m_pHostLengthBufPtrMax = seqWriteInfo->m_pHostBufPtrMax;
567
+ seqWriteInfo->m_pHostBufPtr += 1;
568
+ seqWriteInfo->m_insertLengthIndex = seqWriteInfo->m_insertCurrentIndex >> 2; // byte index
569
+ seqWriteInfo->m_insertCurrentIndex += 4; // allow for single byte header
570
+ seqWriteInfo->m_insertStartIndex = seqWriteInfo->m_insertCurrentIndex;
571
+
572
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
573
+ {
574
+ cnySeqHostBufferFull(seqWriteInfo);
575
+ }
576
+
577
+ seqWriteInfo->m_position = 0;
578
+ seqWriteInfo->m_openMask = false;
579
+ }
580
+
581
+ void cnySeqInsertSequenceName(const char *name, IDnum readID, SequencesWriter *seqWriteInfo, Category cat) {
582
+ if (fprintf(seqWriteInfo->m_nameFile, "%s\t%li\t%li\n", name, (long) readID, (long) cat) < 0) {
583
+ velvetLog("Unable to write in name file\n");
584
+ exit(1);
585
+ }
586
+ }
587
+
588
+ void cnySeqInsertReferenceMask(SequencesWriter *seqWriteInfo, Mask *referenceMask) {
589
+ Mask *tmp;
590
+ for (tmp = referenceMask; tmp; tmp = tmp->next) {
591
+ if (fprintf(seqWriteInfo->m_nameFile, "%li\t%li\n", (long) tmp->start, (long) tmp->finish) < 0) {
592
+ velvetLog("Unable to write ref in name file\n");
593
+ exit(1);
594
+ }
595
+ }
596
+ }
597
+
598
+ void cnySeqInsertEnd(SequencesWriter *seqWriteInfo)
599
+ {
600
+ uint8_t *tmp;
601
+
602
+ // fill last few empty nucleotides with a fixed pattern for consistency checking
603
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) != 0) {
604
+ *seqWriteInfo->m_pHostBufPtr |= 0xAA << ((seqWriteInfo->m_insertCurrentIndex & 0x3)*2);
605
+ }
606
+
607
+ // collect read length statistics
608
+ if (seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen > seqWriteInfo->m_insertLength)
609
+ seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen = seqWriteInfo->m_insertLength;
610
+ if (seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen < seqWriteInfo->m_insertLength)
611
+ seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen = seqWriteInfo->m_insertLength;
612
+
613
+ seqWriteInfo->m_unifiedSeqFileHeader.m_totalSeqLen += seqWriteInfo->m_insertLength;
614
+
615
+ if (seqWriteInfo->m_insertLength >= SHORT_NUCL_LENGTH || seqWriteInfo->m_bIsRef) {
616
+ if (seqWriteInfo->m_bIsRef) {
617
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
618
+
619
+ if (seqWriteInfo->m_insertLength < SHORT_NUCL_LENGTH) {
620
+ // the align above points to next byte,
621
+ // need to back up to last byte of nucl seq
622
+ seqWriteInfo->m_pHostBufPtr -= 1;
623
+ moveCnySeqNucleotides(seqWriteInfo);
624
+ // move to next byte
625
+ seqWriteInfo->m_pHostBufPtr += 1;
626
+ }
627
+
628
+ // write out map info
629
+ int idx;
630
+ for (idx = 0; idx < 4; idx += 1) {
631
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
632
+ cnySeqHostBufferFull(seqWriteInfo);
633
+ *seqWriteInfo->m_pHostBufPtr++ = (seqWriteInfo->m_refCnt >> (idx*8)) & 0xff;
634
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
635
+ }
636
+ int refIdx;
637
+ RefInfoList *refElem = seqWriteInfo->m_refInfoHead;
638
+ RefInfoList *prev = NULL;
639
+ for (refIdx = 0; refIdx < seqWriteInfo->m_refCnt; refIdx++) {
640
+
641
+ if (refElem == NULL) {
642
+ velvetLog("reference but element %d NULL\n", refIdx);
643
+ exit(1);
644
+ }
645
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
646
+
647
+ for (idx = 0; idx < 4; idx += 1) {
648
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
649
+ cnySeqHostBufferFull(seqWriteInfo);
650
+ *seqWriteInfo->m_pHostBufPtr++ = (refElem->m_elem.m_referenceID >> (idx*8)) & 0xff;
651
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
652
+ }
653
+ for (idx = 0; idx < 4; idx += 1) {
654
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
655
+ cnySeqHostBufferFull(seqWriteInfo);
656
+ *seqWriteInfo->m_pHostBufPtr++ = (refElem->m_elem.m_pos >> (idx*8)) & 0xff;
657
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
658
+ }
659
+
660
+ prev = refElem;
661
+ refElem = refElem->next;
662
+ free(prev);
663
+ }
664
+ if (refElem != NULL) {
665
+ velvetLog("more than %d elements in ref\n", seqWriteInfo->m_refCnt);
666
+ exit(1);
667
+ }
668
+ seqWriteInfo->m_bIsRef = false;
669
+ seqWriteInfo->m_refInfoHead = NULL;
670
+ seqWriteInfo->m_refCnt = 0; // set ref bit
671
+ *(seqWriteInfo->m_pHostLengthBufPtr++) = 0xa0 | ((seqWriteInfo->m_insertLength >> 32) & 0x1f);
672
+ } else {
673
+ // one byte control, four byte length
674
+ *(seqWriteInfo->m_pHostLengthBufPtr++) = 0x80 | ((seqWriteInfo->m_insertLength >> 32) & 0x1f);
675
+ }
676
+ int idx;
677
+ for (idx = 0; idx < 4; idx += 1) {
678
+ if (seqWriteInfo->m_pHostLengthBufPtr == seqWriteInfo->m_pHostLengthBufPtrMax) {
679
+ if (seqWriteInfo->m_hostBuffersInUse < 2) {
680
+ velvetLog("CnySeq m_hostBuffersInUse %d\n", seqWriteInfo->m_hostBuffersInUse);
681
+ exit(1);
682
+ }
683
+ seqWriteInfo->m_pHostLengthBufPtr = seqWriteInfo->m_pWriteBuffer[1];
684
+ }
685
+ *seqWriteInfo->m_pHostLengthBufPtr++ = (seqWriteInfo->m_insertLength >> (idx*8)) & 0xff;
686
+ }
687
+
688
+ } else {
689
+ // one byte length;
690
+ *seqWriteInfo->m_pHostLengthBufPtr = (uint8_t)seqWriteInfo->m_insertLength;
691
+ }
692
+
693
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
694
+
695
+ switch (seqWriteInfo->m_hostBuffersInUse) {
696
+ case 1: // buf[0] is being written
697
+ break;
698
+ case 2: // buf[0] and buf[1] are being written, write buf[0] to disk
699
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
700
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
701
+ exit(1);
702
+ }
703
+
704
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1) {
705
+ velvetLog("Unable to write CnyUnifiedSeq\n");
706
+ exit(1);
707
+ }
708
+
709
+ // swap buf[0] and buf[1]
710
+ tmp = seqWriteInfo->m_pWriteBuffer[0];
711
+ seqWriteInfo->m_pWriteBuffer[0] = seqWriteInfo->m_pWriteBuffer[1];
712
+ seqWriteInfo->m_pWriteBuffer[1] = tmp;
713
+
714
+ seqWriteInfo->m_hostBufferFilePos[0] = seqWriteInfo->m_hostBufferFilePos[1];
715
+
716
+ seqWriteInfo->m_hostBuffersInUse = 1;
717
+ break;
718
+ case 3: // buf[0], [1] and [2] are in use, write buf[0] and [1] to disk
719
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
720
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
721
+ exit(1);
722
+ }
723
+
724
+
725
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1){
726
+ velvetLog("Unable to write CnyUnifiedSeq\n");
727
+ exit(1);
728
+ }
729
+
730
+
731
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[1], SEEK_SET) < 0) {
732
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
733
+ exit(1);
734
+ }
735
+
736
+
737
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[1], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1){
738
+ velvetLog("Unable to write CnyUnifiedSeq\n");
739
+ exit(1);
740
+ }
741
+
742
+ // swap buf[0] and buf[2]
743
+ tmp = seqWriteInfo->m_pWriteBuffer[0];
744
+ seqWriteInfo->m_pWriteBuffer[0] = seqWriteInfo->m_pWriteBuffer[2];
745
+ seqWriteInfo->m_pWriteBuffer[2] = tmp;
746
+
747
+ seqWriteInfo->m_hostBufferFilePos[0] = seqWriteInfo->m_hostBufferFilePos[2];
748
+
749
+ seqWriteInfo->m_hostBuffersInUse = 1;
750
+ break;
751
+ }
752
+
753
+ // if ref masks, write mapping info to the names file
754
+ if (seqWriteInfo->m_referenceMask && *(seqWriteInfo->m_referenceMask)) {
755
+ cnySeqInsertReferenceMask(seqWriteInfo, *(seqWriteInfo->m_referenceMask));
756
+ // free memory and clear list
757
+ if (seqWriteInfo->m_maskMemory) {
758
+ destroyRecycleBin(seqWriteInfo->m_maskMemory);
759
+ seqWriteInfo->m_maskMemory = NULL;
760
+ }
761
+ *(seqWriteInfo->m_referenceMask) = NULL;
762
+ }
763
+ }
764
+
765
+ void closeCnySeqForWrite(SequencesWriter *seqWriteInfo)
766
+ {
767
+ // should be only one buffer in use
768
+ if (seqWriteInfo->m_hostBuffersInUse != 1) {
769
+ velvetLog("CnySeq host buffers in use %d\n", seqWriteInfo->m_hostBuffersInUse);
770
+ exit(1);
771
+ }
772
+
773
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
774
+ velvetLog("Unable to seek CnySeq\n");
775
+ exit(1);
776
+ }
777
+
778
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], (uint32_t)(seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[0]), 1, seqWriteInfo->m_pFile) != 1) {
779
+ velvetLog("Unable to write CnySeq\n");
780
+ exit(1);
781
+ }
782
+
783
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bFileWriteCompleted = true;
784
+ seqWriteInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize = seqWriteInfo->m_insertCurrentIndex >> 2;
785
+ seqWriteInfo->m_unifiedSeqFileHeader.m_numCategories = CATEGORIES;
786
+
787
+ if (fseek(seqWriteInfo->m_pFile, 0, SEEK_SET) < 0) {
788
+ velvetLog("Unable to seek CnySeq\n");
789
+ exit(1);
790
+ }
791
+
792
+ if (fwrite(&seqWriteInfo->m_unifiedSeqFileHeader, sizeof(CnyUnifiedSeqFileHeader), 1, seqWriteInfo->m_pFile) != 1) {
793
+ velvetLog("Unable to write CnySeq\n");
794
+ exit(1);
795
+ }
796
+
797
+ if (fclose(seqWriteInfo->m_pFile) < 0) {
798
+ velvetLog("Unable to close CnySeq\n");
799
+ exit(1);
800
+ }
801
+
802
+ if (fclose(seqWriteInfo->m_nameFile) < 0) {
803
+ velvetLog("Unable to close names file\n");
804
+ exit(1);
805
+ }
806
+
807
+ if (seqWriteInfo->m_pWriteBuffer[0])
808
+ free(seqWriteInfo->m_pWriteBuffer[0]);
809
+ if (seqWriteInfo->m_pWriteBuffer[1])
810
+ free(seqWriteInfo->m_pWriteBuffer[1]);
811
+ if (seqWriteInfo->m_pWriteBuffer[2])
812
+ free(seqWriteInfo->m_pWriteBuffer[2]);
813
+ }