finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,86 @@
1
+ /*
2
+ Copyright 2009 Sylvain Foret (sylvain.foret@anu.edu.au)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+
22
+ #ifndef _ALLOC_ARRAY_H_
23
+ #define _ALLOC_ARRAY_H_
24
+
25
+ #ifdef _OPENMP
26
+ #include <omp.h>
27
+ #endif
28
+
29
+ #include "globals.h"
30
+
31
+ typedef struct AllocArray_st AllocArray;
32
+ typedef struct AllocArrayFreeElement_st AllocArrayFreeElement;
33
+
34
+ struct AllocArray_st
35
+ {
36
+ void **blocks;
37
+ AllocArrayFreeElement *freeElements;
38
+ size_t elementSize;
39
+ size_t blockSize;
40
+ size_t maxBlocks;
41
+ size_t currentBlocks;
42
+ size_t maxElements;
43
+ size_t currentElements;
44
+ #ifdef DEBUG
45
+ char *name;
46
+ size_t elementsRecycled;
47
+ size_t elementsAllocated;
48
+ #endif
49
+ #ifdef _OPENMP
50
+ int nbThreads;
51
+ #endif
52
+ };
53
+
54
+ AllocArray* newAllocArray (size_t elementSize, char *name);
55
+ void destroyAllocArray (AllocArray *array);
56
+ ArrayIdx allocArrayAllocate (AllocArray *array);
57
+ void allocArrayFree (AllocArray *array, ArrayIdx idx);
58
+
59
+ #define DECLARE_FAST_ACCESSORS(name, type, array) \
60
+ /* Fast version, without null pointer checks */ \
61
+ static inline type* name##_FI2P(ArrayIdx idx) \
62
+ { \
63
+ const ArrayIdx i = idx - 1; \
64
+ const ArrayIdx blockIdx = i / array->maxElements; \
65
+ const ArrayIdx elementIdx = i % array->maxElements; \
66
+ return &((type*)(array->blocks[blockIdx]))[elementIdx]; \
67
+ } \
68
+ /* Slower version, with null pointer checks */ \
69
+ static inline type* name##_I2P(ArrayIdx idx) \
70
+ { \
71
+ if (idx != NULL_IDX) \
72
+ return name##_FI2P(idx); \
73
+ return NULL; \
74
+ }
75
+
76
+ #ifdef _OPENMP
77
+ // For multithreading: thread-specific alloc arrays
78
+ AllocArray *newAllocArrayArray(unsigned int n,
79
+ size_t elementSize,
80
+ char * name);
81
+ void destroyAllocArrayArray(AllocArray * allocArray);
82
+ ArrayIdx allocArrayArrayAllocate (AllocArray *array);
83
+ void allocArrayArrayFree (AllocArray *array, ArrayIdx idx);
84
+ #endif
85
+
86
+ #endif /* _ALLOC_ARRAY_H_ */
@@ -0,0 +1,107 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <unistd.h>
4
+ #include <sys/wait.h>
5
+ #include <string.h>
6
+
7
+ #include "autoOpen.h"
8
+
9
+ // Implementation of "popen" that ignores stderr
10
+ static FILE* popenNoStderr(const char *exe, const char *const argv[], int* retPid)
11
+ {
12
+ int out[2];
13
+ int pid;
14
+ int rc;
15
+
16
+ rc = pipe(out);
17
+ if (rc<0)
18
+ goto error_out;
19
+
20
+ pid = fork();
21
+ if (pid > 0) { // parent
22
+ close(out[1]);
23
+ *retPid = pid;
24
+ return fdopen(out[0], "r");
25
+ } else if (pid == 0) { // child
26
+ close(out[0]);
27
+ close(1);
28
+ dup(out[1]);
29
+
30
+ close(0); // Don't let child inherit stdin, nor stderr
31
+ close(2);
32
+
33
+ execvp(exe, (char**)argv);
34
+ exit(1);
35
+ } else
36
+ goto error_fork;
37
+
38
+ return NULL;
39
+
40
+ error_fork:
41
+ close(out[0]);
42
+ close(out[1]);
43
+ error_out:
44
+ return NULL;
45
+ }
46
+
47
+ static int pcloseNoStderr(int pid, FILE* out)
48
+ {
49
+ int rc, status;
50
+ fclose(out);
51
+ rc = waitpid(pid, &status, 0);
52
+ return status;
53
+ }
54
+
55
+
56
+ static const char const* decompressors[] = {"","pigz", "gunzip", "pbunzip2", "bunzip2", NULL};
57
+
58
+ AutoFile* openFileAuto(char*filename)
59
+ {
60
+ AutoFile* seqFile = calloc(1, sizeof(AutoFile));
61
+ int i;
62
+
63
+ if (strcmp(filename, "-")==0)
64
+ exitErrorf(EXIT_FAILURE, false, "Cannot read from stdin in auto mode\n");
65
+
66
+ for (i=0; decompressors[i] ; i++) {
67
+ if (strlen(decompressors[i])==0) {
68
+ seqFile->file = fopen(filename, "r");
69
+ seqFile->pid = 0;
70
+ seqFile->decompressor = "Raw read";
71
+ } else {
72
+ //printf("Trying : %s\n", decompressors[i]);
73
+ char const* args[] = {decompressors[i], "-c", "-d", filename, NULL};
74
+ seqFile->file = popenNoStderr(args[0], args, &(seqFile->pid));
75
+ seqFile->decompressor = decompressors[i];
76
+ }
77
+
78
+ if (!seqFile->file)
79
+ continue;
80
+
81
+ int c = fgetc(seqFile->file);
82
+ if (c=='>' || c=='@') {
83
+ // Ok, looks like FASTA or FASTQ
84
+ ungetc(c, seqFile->file);
85
+ seqFile->first_char = c;
86
+ return seqFile;
87
+ } else {
88
+ if (seqFile->pid)
89
+ pcloseNoStderr(seqFile->pid, seqFile->file);
90
+ else
91
+ fclose(seqFile->file);
92
+ }
93
+ }
94
+ //printf("Unable to determine file type\n");
95
+ return NULL;
96
+ }
97
+
98
+ void closeFileAuto(AutoFile* seqFile)
99
+ {
100
+ if (!seqFile)
101
+ return;
102
+
103
+ if (seqFile->pid)
104
+ pcloseNoStderr(seqFile->pid, seqFile->file);
105
+ else
106
+ fclose(seqFile->file);
107
+ }
@@ -0,0 +1,18 @@
1
+ #ifndef AUTOOPEN_H_
2
+ #define AUTOOPEN_H_
3
+
4
+ #include "globals.h"
5
+ #include "utility.h"
6
+
7
+ typedef struct {
8
+ int pid;
9
+ FILE* file;
10
+ char const* decompressor;
11
+ int first_char;
12
+ } AutoFile;
13
+
14
+ AutoFile* openFileAuto(char*filename);
15
+
16
+ void closeFileAuto(AutoFile* autoFile);
17
+
18
+ #endif
@@ -0,0 +1,813 @@
1
+ /*
2
+ Copyright 2011 Convey Computer Corporation (info@conveycomputer.com)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <math.h>
25
+ #include <time.h>
26
+ #include <limits.h>
27
+
28
+ #include "globals.h"
29
+ #include "tightString.h"
30
+ #include "readSet.h"
31
+ #include "binarySequences.h"
32
+ #include "utility.h"
33
+
34
+ #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
35
+ #include "../third-party/zlib-1.2.3/Win32/include/zlib.h"
36
+ #else
37
+ #include "../third-party/zlib-1.2.3/zlib.h"
38
+ #endif
39
+
40
+ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
41
+ # include <fcntl.h>
42
+ # include <io.h>
43
+ # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
44
+ #else
45
+ # define SET_BINARY_MODE(file)
46
+ #endif
47
+
48
+ // write defines, typedefs, and protos
49
+ #define WRITE_BUF_SHFT 16 // byte shift and mask
50
+ #define WRITE_BUF_SIZE (1<<WRITE_BUF_SHFT)
51
+ #define WRITE_BUF_MASK (WRITE_BUF_SIZE-1)
52
+ #define SHORT_NUCL_LENGTH 128 // Nucleotide length (2-bits each)
53
+
54
+ void computeSecondInPair(ReadSet * reads);
55
+
56
+ FILE *openCnySeqForRead(const char *fileName, CnyUnifiedSeqFileHeader *seqFileHeader)
57
+ {
58
+ FILE *pFile;
59
+ if ((pFile = fopen(fileName, "rb")) == 0) {
60
+ velvetLog("Unable to open %s for reading\n", fileName);
61
+ return NULL;
62
+ }
63
+
64
+ if (fread(seqFileHeader, sizeof(*seqFileHeader), 1, pFile) != 1) {
65
+ velvetLog("Unable to read file %s\n", fileName);
66
+ fclose(pFile);
67
+ return NULL;
68
+ }
69
+
70
+ if (strncmp((char *)&seqFileHeader->m_magic, "CSQ0", 4) != 0) {
71
+ velvetLog("Unknown format for file %s\n", fileName);
72
+ fclose(pFile);
73
+ return NULL;
74
+ }
75
+
76
+ if (seqFileHeader->m_bFileWriteCompleted == false) {
77
+ velvetLog("Corrupted file, %s\n", fileName);
78
+ fclose(pFile);
79
+ return NULL;
80
+ }
81
+
82
+ if (seqFileHeader->m_numCategories > CATEGORIES) {
83
+ velvetLog("File %s has %d categories, please rebuild velvet to match\n", fileName, seqFileHeader->m_numCategories);
84
+ fclose(pFile);
85
+ return NULL;
86
+ }
87
+
88
+ #ifdef COLOR
89
+ if (!seqFileHeader->m_bColor) {
90
+ velvetLog("File %s does not specify color, please rebuild velvet to match\n", fileName);
91
+ fclose(pFile);
92
+ return NULL;
93
+ }
94
+ #else
95
+ if (seqFileHeader->m_bColor) {
96
+ velvetLog("File %s specifies color, please rebuild velvet to match\n", fileName);
97
+ fclose(pFile);
98
+ return NULL;
99
+ }
100
+ #endif
101
+ return pFile;
102
+ }
103
+
104
+ static boolean refillCnySeqReadBuffer(SequencesReader *seqReadInfo)
105
+ {
106
+ uint64_t readLen = (USF_READ_BUF_SIZE < seqReadInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize - seqReadInfo->m_readBufPos) ?
107
+ USF_READ_BUF_SIZE : seqReadInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize - seqReadInfo->m_readBufPos;
108
+
109
+ if (readLen == 0)
110
+ return false;
111
+
112
+ if (fread(seqReadInfo->m_pReadBuffer, (uint32_t)readLen, 1, seqReadInfo->m_pFile) != 1) {
113
+ velvetLog("Unable to read file\n");
114
+ exit (1);
115
+ }
116
+
117
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pReadBuffer;
118
+ seqReadInfo->m_pReadBufEnd = seqReadInfo->m_pReadBuffer + readLen;
119
+ seqReadInfo->m_readBufPos += readLen;
120
+
121
+ if (seqReadInfo->m_pNextReadPtr >= seqReadInfo->m_pReadBufEnd) {
122
+ seqReadInfo->m_pNextReadPtr -= USF_READ_BUF_SIZE;
123
+ }
124
+
125
+ return true;
126
+ }
127
+
128
+ static int32_t readCnySeqUint8(SequencesReader *seqReadInfo)
129
+ {
130
+ if (seqReadInfo->m_pCurrentReadPtr == seqReadInfo->m_pReadBufEnd && !refillCnySeqReadBuffer(seqReadInfo))
131
+ {
132
+ return -1;
133
+ }
134
+
135
+ // printf("m_pCurrentReadPtr %llx\n", (long long) pReadInfo->m_pCurrentReadPtr);
136
+ return *seqReadInfo->m_pCurrentReadPtr++;
137
+ }
138
+
139
+ uint32_t readCnySeqUint32(SequencesReader *seqReadInfo)
140
+ {
141
+ uint32_t data;
142
+ data = 0;
143
+ int i;
144
+ for (i = 0; i < 4; i += 1)
145
+ data |= readCnySeqUint8(seqReadInfo) << (i*8);
146
+ return data;
147
+ }
148
+
149
+ boolean advanceCnySeqCurrentRead(SequencesReader *seqReadInfo)
150
+ {
151
+ // Perform consistency check, unused bits of previous sequence should have a fixed pattern
152
+ uint32_t finalNuclOffset = 1;
153
+ if (seqReadInfo->m_bIsRef) {
154
+ finalNuclOffset += (sizeof(seqReadInfo->m_refCnt));
155
+ finalNuclOffset += (sizeof(RefInfo) * seqReadInfo->m_refCnt);
156
+ }
157
+ if ((seqReadInfo->m_currentReadLength & 3) != 0 && ((seqReadInfo->m_pNextReadPtr - finalNuclOffset) >= seqReadInfo->m_pReadBuffer)) {
158
+ uint8_t mask = 0xFF << (seqReadInfo->m_currentReadLength & 3) * 2;
159
+ if ((*(seqReadInfo->m_pNextReadPtr - finalNuclOffset) & mask) != (0xAA & mask)) {
160
+ velvetLog("Cny seq consistency check failed in advance\n");
161
+ #ifdef DEBUG
162
+ abort();
163
+ #endif
164
+ exit(1);
165
+ }
166
+ }
167
+
168
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pNextReadPtr;
169
+ seqReadInfo->m_currentNuclReadIdx = 0;
170
+
171
+ // clear ref flag before each code check
172
+ seqReadInfo->m_bIsRef = false;
173
+ seqReadInfo->m_refCnt = 0;
174
+
175
+ for(;;) {
176
+ int32_t code = readCnySeqUint8(seqReadInfo);
177
+ // printf("checking code %d\n", code);
178
+ switch (code & 0xc0) {
179
+ case 0x00: // short sequence
180
+ case 0x40:
181
+ seqReadInfo->m_currentReadLength = code & 0x7f;
182
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pCurrentReadPtr + ((seqReadInfo->m_currentReadLength + 3) >> 2);
183
+ break;
184
+ case 0x80: // long sequence
185
+ seqReadInfo->m_currentReadLength = readCnySeqUint32(seqReadInfo);
186
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pCurrentReadPtr + ((seqReadInfo->m_currentReadLength + 3) >> 2);
187
+ if (code & 0x20) {
188
+ // ref info present
189
+ seqReadInfo->m_bIsRef = true;
190
+ seqReadInfo->m_pNextReadPtr += (sizeof(seqReadInfo->m_refCnt));
191
+ // length is updated once count is read
192
+ }
193
+ break;
194
+ case 0xc0: // new file / category
195
+ if (code == EOF) {
196
+ return false;
197
+ }
198
+ seqReadInfo->m_currCategory = (Category) readCnySeqUint32(seqReadInfo);
199
+ if (seqReadInfo->m_currCategory < 0 || seqReadInfo->m_currCategory > REFERENCE) {
200
+ velvetLog("Illegal category %d\n", (int32_t) seqReadInfo->m_currCategory);
201
+ exit(1);
202
+ }
203
+ continue;
204
+ }
205
+
206
+ if (seqReadInfo->m_currentReadLength > seqReadInfo->m_maxSeqLen ||
207
+ seqReadInfo->m_currentReadLength < seqReadInfo->m_minSeqLen) {
208
+ velvetLog("Cny seq consistency check failed, len mismatch\n");
209
+ #ifdef DEBUG
210
+ abort();
211
+ #endif
212
+ exit(1);
213
+ }
214
+
215
+ return true;
216
+
217
+ }
218
+ }
219
+
220
+ void resetCnySeqCurrentRead(SequencesReader *seqReadInfo)
221
+ {
222
+ seqReadInfo->m_pReadBufEnd = seqReadInfo->m_pReadBuffer;
223
+ seqReadInfo->m_pNextReadPtr = seqReadInfo->m_pReadBuffer;
224
+ seqReadInfo->m_pCurrentReadPtr = seqReadInfo->m_pReadBuffer;
225
+ seqReadInfo->m_currentReadLength = 0;
226
+ seqReadInfo->m_readBufPos = 0;
227
+
228
+ if (fseek(seqReadInfo->m_pFile, sizeof(CnyUnifiedSeqFileHeader), SEEK_SET) < 0) {
229
+ perror("Unable to seek\n");
230
+ exit(1);
231
+ }
232
+
233
+ advanceCnySeqCurrentRead(seqReadInfo);
234
+ }
235
+
236
+ void getCnySeqNucl(SequencesReader *seqReadInfo, uint8_t *sequence) {
237
+ uint32_t nuclIdx;
238
+ for (nuclIdx = 0; nuclIdx < seqReadInfo->m_currentReadLength; nuclIdx += 4) {
239
+ sequence[nuclIdx / 4] = (uint8_t)readCnySeqUint8(seqReadInfo);
240
+ }
241
+ }
242
+
243
+ ReadSet *importCnyReadSet(char *filename)
244
+ {
245
+ IDnum sequenceCount, sequenceIndex;
246
+ ReadSet *reads;
247
+ uint8_t *tmp;
248
+ Coordinate totalLength = 0;
249
+ int arrayLength;
250
+ SequencesReader seqReadInfo;
251
+ memset(&seqReadInfo, 0, sizeof(seqReadInfo));
252
+
253
+ seqReadInfo.m_pFile = openCnySeqForRead(filename, &seqReadInfo.m_unifiedSeqFileHeader);
254
+ seqReadInfo.m_numCategories = seqReadInfo.m_unifiedSeqFileHeader.m_numCategories;
255
+ seqReadInfo.m_minSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_minSeqLen;
256
+ seqReadInfo.m_maxSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_maxSeqLen;
257
+ seqReadInfo.m_bIsRef = false;
258
+
259
+ if (seqReadInfo.m_pFile != NULL)
260
+ velvetLog("Reading CNY read set file %s\n", filename);
261
+ else
262
+ exitErrorf(EXIT_FAILURE, true, "Could not open %s", filename);
263
+
264
+ // readInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, sizeof(*readInfo.m_pReadBuffer));
265
+ seqReadInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, uint8_t );
266
+ seqReadInfo.m_pCurrentReadPtr = seqReadInfo.m_pReadBufEnd = 0;
267
+
268
+ reads = newReadSet();
269
+
270
+ resetCnySeqCurrentRead(&seqReadInfo);
271
+ sequenceCount = seqReadInfo.m_unifiedSeqFileHeader.m_sequenceCnt;
272
+
273
+ velvetLog("%li sequences found\n", (long) sequenceCount);
274
+
275
+ reads->readCount = sequenceCount;
276
+
277
+ if (reads->readCount == 0) {
278
+ reads->sequences = NULL;
279
+ reads->categories = NULL;
280
+ free(seqReadInfo.m_pReadBuffer);
281
+ return reads;
282
+ }
283
+
284
+ reads->sequences = NULL;
285
+ reads->categories = callocOrExit(sequenceCount, Category);
286
+ reads->tSequences = mallocOrExit(sequenceCount, TightString);
287
+ // note there is some overhead with the seq store
288
+ reads->tSeqMem = callocOrExit (seqReadInfo.m_unifiedSeqFileHeader.m_seqNuclStoreSize, char);
289
+ tmp = (uint8_t *) reads->tSeqMem;
290
+ uint8_t * arrayEnd = tmp + seqReadInfo.m_unifiedSeqFileHeader.m_seqNuclStoreSize;
291
+ // read all sequence and category info in one pass
292
+ for (sequenceIndex = 0; sequenceIndex < sequenceCount; sequenceIndex += 1) {
293
+ reads->categories[sequenceIndex] = seqReadInfo.m_currCategory;
294
+ if (sizeof(ShortLength) == sizeof(int16_t) && seqReadInfo.m_currentReadLength > SHRT_MAX) {
295
+ velvetLog("Read %li of length %lli, longer than limit %i\n",
296
+ (long) sequenceIndex + 1, (long long) seqReadInfo.m_currentReadLength, SHRT_MAX);
297
+ velvetLog("You should recompile Velvet with the LONGSEQUENCES option.\n");
298
+ exit(1);
299
+ }
300
+ // only use tString to reduce memory use
301
+ reads->tSequences[sequenceIndex].length = seqReadInfo.m_currentReadLength;
302
+ arrayLength = (reads->tSequences[sequenceIndex].length + 3) / 4;
303
+ if ((tmp + arrayLength) > arrayEnd) {
304
+ velvetLog("array location 0x%lx for seq %ld beyond end 0x%lx\n", (uint64_t) tmp, (uint64_t) sequenceIndex, (uint64_t) arrayEnd);
305
+ exit(1);
306
+ }
307
+ totalLength += arrayLength;
308
+ reads->tSequences[sequenceIndex].sequence = tmp;
309
+ getCnySeqNucl(&seqReadInfo, tmp);
310
+ if (seqReadInfo.m_bIsRef) {
311
+ seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
312
+ // now the next ptr is advanced
313
+ seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
314
+ RefInfo refElem;
315
+ uint32_t refIdx;
316
+ for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
317
+ // not actually used so just read past refs
318
+ refElem.m_referenceID = readCnySeqUint32(&seqReadInfo);
319
+ refElem.m_pos = readCnySeqUint32(&seqReadInfo);
320
+ }
321
+ }
322
+ tmp += arrayLength;
323
+ if (sequenceIndex < sequenceCount) {
324
+ advanceCnySeqCurrentRead(&seqReadInfo);
325
+ }
326
+ }
327
+
328
+ fclose(seqReadInfo.m_pFile);
329
+ computeSecondInPair(reads);
330
+
331
+ free(seqReadInfo.m_pReadBuffer);
332
+ velvetLog("Done\n");
333
+ return reads;
334
+
335
+ }
336
+
337
+ // write routines
338
+ #define ADENINE 0
339
+ #define CYTOSINE 1
340
+ #define GUANINE 2
341
+ #define THYMINE 3
342
+ #define INVALID 5
343
+
344
+ static void cnySeqHostBufferFull(SequencesWriter *seqWriteInfo)
345
+ {
346
+ // The current Host buffer is full
347
+ switch (seqWriteInfo->m_hostBuffersInUse) {
348
+ case 1: // buf[0] is full, start using buf[1]
349
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[1];
350
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
351
+ seqWriteInfo->m_hostBufferFilePos[1] = seqWriteInfo->m_hostBufferFilePos[0] + WRITE_BUF_SIZE;
352
+ seqWriteInfo->m_hostBuffersInUse = 2;
353
+ break;
354
+ case 2: // buf[0] and buf[1] are full, start using buf[2]
355
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[2];
356
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
357
+ seqWriteInfo->m_hostBufferFilePos[2] = seqWriteInfo->m_hostBufferFilePos[1] + WRITE_BUF_SIZE;
358
+ seqWriteInfo->m_hostBuffersInUse = 3;
359
+ break;
360
+ case 3: // all three buffers are full, write out buf[2] and reuse
361
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[2], SEEK_SET) < 0) {
362
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
363
+ exit(1);
364
+ }
365
+
366
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[2], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1) {
367
+ velvetLog("Unable to write CnyUnifiedSeq\n");
368
+ exit(1);
369
+ }
370
+
371
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[2];
372
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pHostBufPtr + WRITE_BUF_SIZE;
373
+ seqWriteInfo->m_hostBufferFilePos[2] = seqWriteInfo->m_hostBufferFilePos[2] + WRITE_BUF_SIZE;
374
+ break;
375
+ default:
376
+ velvetLog("Unknown CnySeq host buffer state %d\n", seqWriteInfo->m_hostBuffersInUse);
377
+ exit(1);
378
+ break;
379
+ }
380
+ }
381
+
382
+ static void moveCnySeqNucleotides(SequencesWriter *seqWriteInfo)
383
+ {
384
+ // move nucleotides in buffer to allow a four byte length value
385
+ // the current sequence may span two buffers
386
+
387
+ uint64_t bufIdx = (seqWriteInfo->m_hostBuffersInUse == 2) ? (seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[1] + WRITE_BUF_SIZE) : (seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[0]);
388
+ if (bufIdx + 4 > 2 * WRITE_BUF_SIZE) {
389
+ velvetLog("CnySeq bufIdx %ld too large\n", bufIdx);
390
+ exit(1);
391
+ }
392
+
393
+ if (bufIdx + 4 >= WRITE_BUF_SIZE) {
394
+ // continue writing to buf[1]
395
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[1] + (bufIdx + 4 - WRITE_BUF_SIZE);
396
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pWriteBuffer[1] + WRITE_BUF_SIZE;
397
+ seqWriteInfo->m_hostBufferFilePos[1] = seqWriteInfo->m_hostBufferFilePos[0] + WRITE_BUF_SIZE;
398
+ seqWriteInfo->m_hostBuffersInUse = 2;
399
+ } else
400
+ seqWriteInfo->m_pHostBufPtr += 4;
401
+
402
+ seqWriteInfo->m_insertCurrentIndex += 16;
403
+
404
+ uint64_t cnt;
405
+ for (cnt = (seqWriteInfo->m_insertLength+3)>>2; cnt > 0; cnt -= 1) {
406
+ seqWriteInfo->m_pWriteBuffer[(bufIdx+4) >> WRITE_BUF_SHFT][(bufIdx+4) & WRITE_BUF_MASK] = seqWriteInfo->m_pWriteBuffer[bufIdx >> WRITE_BUF_SHFT][bufIdx & WRITE_BUF_MASK];
407
+ bufIdx -= 1;
408
+ }
409
+ }
410
+
411
+ static void writeCnySeqNucleotide(uint8_t nucleotide, SequencesWriter *seqWriteInfo)
412
+ {
413
+ if (seqWriteInfo->m_insertLength == SHORT_NUCL_LENGTH-1) {
414
+ moveCnySeqNucleotides(seqWriteInfo);
415
+ }
416
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) == 0)
417
+ *seqWriteInfo->m_pHostBufPtr = 0;
418
+
419
+ *seqWriteInfo->m_pHostBufPtr = *seqWriteInfo->m_pHostBufPtr | (nucleotide << ((seqWriteInfo->m_insertCurrentIndex & 0x3) * 2));
420
+
421
+ seqWriteInfo->m_insertLength += 1;
422
+ seqWriteInfo->m_insertCurrentIndex += 1;
423
+
424
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) == 0) {
425
+ seqWriteInfo->m_pHostBufPtr += 1;
426
+
427
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
428
+ cnySeqHostBufferFull(seqWriteInfo);
429
+ }
430
+ }
431
+
432
+ void cnySeqInsertNucleotideString(const char *pReadBuf, SequencesWriter *seqWriteInfo) {
433
+ uint8_t nucleotide;
434
+
435
+ static boolean bInit = false;
436
+ static uint8_t charMap[256];
437
+
438
+ if (!bInit) {
439
+ bInit = true;
440
+ // anything unusual defaults to A
441
+ memset(charMap, ADENINE, 256);
442
+ charMap[(int)'C'] = charMap[(int)'c'] = CYTOSINE;
443
+ charMap[(int)'G'] = charMap[(int)'g'] = GUANINE;
444
+ charMap[(int)'T'] = charMap[(int)'t'] = THYMINE;
445
+ charMap[(int)'\0'] = 4;
446
+ }
447
+
448
+ for (;;) {
449
+ nucleotide = charMap[(int)*pReadBuf];
450
+ if (nucleotide < 4) {
451
+ writeCnySeqNucleotide(nucleotide, seqWriteInfo);
452
+ pReadBuf += 1;
453
+ continue;
454
+ } else if (nucleotide == 4) {
455
+ return;
456
+ } else {
457
+ velvetLog("CnySeq unexpected char %c (%d)\n", *pReadBuf, (int) *pReadBuf);
458
+ exit(1);
459
+ }
460
+ }
461
+ }
462
+
463
+ SequencesWriter * openCnySeqForWrite(const char *unifiedSeqFileName)
464
+ {
465
+ SequencesWriter *seqWriteInfo = callocOrExit(1, SequencesWriter);
466
+ seqWriteInfo->m_pWriteBuffer[0] = NULL;
467
+ seqWriteInfo->m_pWriteBuffer[1] = NULL;
468
+ seqWriteInfo->m_pWriteBuffer[2] = NULL;
469
+ char seqNamesFileName[5000];
470
+
471
+ strcpy(seqNamesFileName, unifiedSeqFileName);
472
+ strcat(seqNamesFileName, ".names");
473
+
474
+ #ifdef COLOR
475
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bColor = true;
476
+ #else
477
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bColor = false;
478
+ #endif
479
+
480
+ if ((seqWriteInfo->m_pFile = fopen(unifiedSeqFileName, "wb")) == 0) {
481
+ velvetLog("Unable to open %s for writing\n", unifiedSeqFileName);
482
+ exit(1);
483
+ }
484
+
485
+ if ((seqWriteInfo->m_nameFile = fopen(seqNamesFileName, "w")) == 0) {
486
+ velvetLog("Unable to open %s for writing\n", seqNamesFileName);
487
+ exit(1);
488
+ }
489
+
490
+ memcpy(&seqWriteInfo->m_unifiedSeqFileHeader.m_magic, "CSQ0", 4);
491
+ seqWriteInfo->m_unifiedSeqFileHeader.m_timeStamp = time(0);
492
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bFileWriteCompleted = false;
493
+
494
+ if (fwrite(&seqWriteInfo->m_unifiedSeqFileHeader, sizeof(CnyUnifiedSeqFileHeader), 1, seqWriteInfo->m_pFile) != 1) {
495
+ velvetLog("Unable to write file %s\n", unifiedSeqFileName);
496
+ exit(1);
497
+ }
498
+
499
+ seqWriteInfo->m_insertCurrentIndex = 0;
500
+ seqWriteInfo->m_pWriteBuffer[0] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
501
+ seqWriteInfo->m_pWriteBuffer[1] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
502
+ seqWriteInfo->m_pWriteBuffer[2] = mallocOrExit(WRITE_BUF_SIZE, uint8_t);
503
+
504
+ seqWriteInfo->m_hostBufferFilePos[0] = sizeof(CnyUnifiedSeqFileHeader);
505
+
506
+ seqWriteInfo->m_pHostBufPtr = seqWriteInfo->m_pWriteBuffer[0];
507
+ seqWriteInfo->m_pHostBufPtrMax = seqWriteInfo->m_pWriteBuffer[0] + WRITE_BUF_SIZE;
508
+ seqWriteInfo->m_hostBuffersInUse = 1;
509
+ seqWriteInfo->m_fileSegmentWriteIdx = 0; // file segment currently being written
510
+ seqWriteInfo->m_unifiedSeqFileHeader.m_sequenceCnt = 0;
511
+ seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen = ~0LL;
512
+ seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen = 0;
513
+ seqWriteInfo->m_unifiedSeqFileHeader.m_totalSeqLen = 0;
514
+ return seqWriteInfo;
515
+ }
516
+
517
+ static void alignCnySeqToNextByteBoundary(SequencesWriter *seqWriteInfo)
518
+ {
519
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) != 0)
520
+ seqWriteInfo->m_pHostBufPtr += 1;
521
+
522
+ seqWriteInfo->m_insertCurrentIndex = (seqWriteInfo->m_insertCurrentIndex + 3) & ~0x3LL;
523
+
524
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax) {
525
+ cnySeqHostBufferFull(seqWriteInfo);
526
+ }
527
+ }
528
+
529
+ static void writeCnySeqUint8(uint8_t uint8, SequencesWriter *seqWriteInfo)
530
+ {
531
+ *seqWriteInfo->m_pHostBufPtr++ = uint8;
532
+ seqWriteInfo->m_insertCurrentIndex += 4;
533
+
534
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax) {
535
+ cnySeqHostBufferFull(seqWriteInfo);
536
+ }
537
+ }
538
+
539
+ static void writeCnySeqUint32(uint32_t uint32, SequencesWriter *seqWriteInfo)
540
+ {
541
+ int i;
542
+ for (i = 0; i < 4; i += 1)
543
+ writeCnySeqUint8((uint8_t)(uint32 >> (i*8)), seqWriteInfo);
544
+ }
545
+
546
+ void inputCnySeqFileStart(Category category, SequencesWriter *seqWriteInfo)
547
+ {
548
+ if (category > REFERENCE) {
549
+ velvetLog("Found category %d beyond max of %d\n", category, REFERENCE);
550
+ exit(1);
551
+ }
552
+
553
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
554
+ writeCnySeqUint8(0xc0, seqWriteInfo);
555
+ writeCnySeqUint32(category, seqWriteInfo);
556
+ }
557
+
558
+ void cnySeqInsertStart(SequencesWriter *seqWriteInfo)
559
+ {
560
+ seqWriteInfo->m_unifiedSeqFileHeader.m_sequenceCnt += 1;
561
+
562
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
563
+
564
+ seqWriteInfo->m_insertLength = 0;
565
+ seqWriteInfo->m_pHostLengthBufPtr = seqWriteInfo->m_pHostBufPtr;
566
+ seqWriteInfo->m_pHostLengthBufPtrMax = seqWriteInfo->m_pHostBufPtrMax;
567
+ seqWriteInfo->m_pHostBufPtr += 1;
568
+ seqWriteInfo->m_insertLengthIndex = seqWriteInfo->m_insertCurrentIndex >> 2; // byte index
569
+ seqWriteInfo->m_insertCurrentIndex += 4; // allow for single byte header
570
+ seqWriteInfo->m_insertStartIndex = seqWriteInfo->m_insertCurrentIndex;
571
+
572
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
573
+ {
574
+ cnySeqHostBufferFull(seqWriteInfo);
575
+ }
576
+
577
+ seqWriteInfo->m_position = 0;
578
+ seqWriteInfo->m_openMask = false;
579
+ }
580
+
581
+ void cnySeqInsertSequenceName(const char *name, IDnum readID, SequencesWriter *seqWriteInfo, Category cat) {
582
+ if (fprintf(seqWriteInfo->m_nameFile, "%s\t%li\t%li\n", name, (long) readID, (long) cat) < 0) {
583
+ velvetLog("Unable to write in name file\n");
584
+ exit(1);
585
+ }
586
+ }
587
+
588
+ void cnySeqInsertReferenceMask(SequencesWriter *seqWriteInfo, Mask *referenceMask) {
589
+ Mask *tmp;
590
+ for (tmp = referenceMask; tmp; tmp = tmp->next) {
591
+ if (fprintf(seqWriteInfo->m_nameFile, "%li\t%li\n", (long) tmp->start, (long) tmp->finish) < 0) {
592
+ velvetLog("Unable to write ref in name file\n");
593
+ exit(1);
594
+ }
595
+ }
596
+ }
597
+
598
+ void cnySeqInsertEnd(SequencesWriter *seqWriteInfo)
599
+ {
600
+ uint8_t *tmp;
601
+
602
+ // fill last few empty nucleotides with a fixed pattern for consistency checking
603
+ if ((seqWriteInfo->m_insertCurrentIndex & 0x3) != 0) {
604
+ *seqWriteInfo->m_pHostBufPtr |= 0xAA << ((seqWriteInfo->m_insertCurrentIndex & 0x3)*2);
605
+ }
606
+
607
+ // collect read length statistics
608
+ if (seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen > seqWriteInfo->m_insertLength)
609
+ seqWriteInfo->m_unifiedSeqFileHeader.m_minSeqLen = seqWriteInfo->m_insertLength;
610
+ if (seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen < seqWriteInfo->m_insertLength)
611
+ seqWriteInfo->m_unifiedSeqFileHeader.m_maxSeqLen = seqWriteInfo->m_insertLength;
612
+
613
+ seqWriteInfo->m_unifiedSeqFileHeader.m_totalSeqLen += seqWriteInfo->m_insertLength;
614
+
615
+ if (seqWriteInfo->m_insertLength >= SHORT_NUCL_LENGTH || seqWriteInfo->m_bIsRef) {
616
+ if (seqWriteInfo->m_bIsRef) {
617
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
618
+
619
+ if (seqWriteInfo->m_insertLength < SHORT_NUCL_LENGTH) {
620
+ // the align above points to next byte,
621
+ // need to back up to last byte of nucl seq
622
+ seqWriteInfo->m_pHostBufPtr -= 1;
623
+ moveCnySeqNucleotides(seqWriteInfo);
624
+ // move to next byte
625
+ seqWriteInfo->m_pHostBufPtr += 1;
626
+ }
627
+
628
+ // write out map info
629
+ int idx;
630
+ for (idx = 0; idx < 4; idx += 1) {
631
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
632
+ cnySeqHostBufferFull(seqWriteInfo);
633
+ *seqWriteInfo->m_pHostBufPtr++ = (seqWriteInfo->m_refCnt >> (idx*8)) & 0xff;
634
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
635
+ }
636
+ int refIdx;
637
+ RefInfoList *refElem = seqWriteInfo->m_refInfoHead;
638
+ RefInfoList *prev = NULL;
639
+ for (refIdx = 0; refIdx < seqWriteInfo->m_refCnt; refIdx++) {
640
+
641
+ if (refElem == NULL) {
642
+ velvetLog("reference but element %d NULL\n", refIdx);
643
+ exit(1);
644
+ }
645
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
646
+
647
+ for (idx = 0; idx < 4; idx += 1) {
648
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
649
+ cnySeqHostBufferFull(seqWriteInfo);
650
+ *seqWriteInfo->m_pHostBufPtr++ = (refElem->m_elem.m_referenceID >> (idx*8)) & 0xff;
651
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
652
+ }
653
+ for (idx = 0; idx < 4; idx += 1) {
654
+ if (seqWriteInfo->m_pHostBufPtr == seqWriteInfo->m_pHostBufPtrMax)
655
+ cnySeqHostBufferFull(seqWriteInfo);
656
+ *seqWriteInfo->m_pHostBufPtr++ = (refElem->m_elem.m_pos >> (idx*8)) & 0xff;
657
+ seqWriteInfo->m_insertCurrentIndex += 4; // single byte
658
+ }
659
+
660
+ prev = refElem;
661
+ refElem = refElem->next;
662
+ free(prev);
663
+ }
664
+ if (refElem != NULL) {
665
+ velvetLog("more than %d elements in ref\n", seqWriteInfo->m_refCnt);
666
+ exit(1);
667
+ }
668
+ seqWriteInfo->m_bIsRef = false;
669
+ seqWriteInfo->m_refInfoHead = NULL;
670
+ seqWriteInfo->m_refCnt = 0; // set ref bit
671
+ *(seqWriteInfo->m_pHostLengthBufPtr++) = 0xa0 | ((seqWriteInfo->m_insertLength >> 32) & 0x1f);
672
+ } else {
673
+ // one byte control, four byte length
674
+ *(seqWriteInfo->m_pHostLengthBufPtr++) = 0x80 | ((seqWriteInfo->m_insertLength >> 32) & 0x1f);
675
+ }
676
+ int idx;
677
+ for (idx = 0; idx < 4; idx += 1) {
678
+ if (seqWriteInfo->m_pHostLengthBufPtr == seqWriteInfo->m_pHostLengthBufPtrMax) {
679
+ if (seqWriteInfo->m_hostBuffersInUse < 2) {
680
+ velvetLog("CnySeq m_hostBuffersInUse %d\n", seqWriteInfo->m_hostBuffersInUse);
681
+ exit(1);
682
+ }
683
+ seqWriteInfo->m_pHostLengthBufPtr = seqWriteInfo->m_pWriteBuffer[1];
684
+ }
685
+ *seqWriteInfo->m_pHostLengthBufPtr++ = (seqWriteInfo->m_insertLength >> (idx*8)) & 0xff;
686
+ }
687
+
688
+ } else {
689
+ // one byte length;
690
+ *seqWriteInfo->m_pHostLengthBufPtr = (uint8_t)seqWriteInfo->m_insertLength;
691
+ }
692
+
693
+ alignCnySeqToNextByteBoundary(seqWriteInfo);
694
+
695
+ switch (seqWriteInfo->m_hostBuffersInUse) {
696
+ case 1: // buf[0] is being written
697
+ break;
698
+ case 2: // buf[0] and buf[1] are being written, write buf[0] to disk
699
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
700
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
701
+ exit(1);
702
+ }
703
+
704
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1) {
705
+ velvetLog("Unable to write CnyUnifiedSeq\n");
706
+ exit(1);
707
+ }
708
+
709
+ // swap buf[0] and buf[1]
710
+ tmp = seqWriteInfo->m_pWriteBuffer[0];
711
+ seqWriteInfo->m_pWriteBuffer[0] = seqWriteInfo->m_pWriteBuffer[1];
712
+ seqWriteInfo->m_pWriteBuffer[1] = tmp;
713
+
714
+ seqWriteInfo->m_hostBufferFilePos[0] = seqWriteInfo->m_hostBufferFilePos[1];
715
+
716
+ seqWriteInfo->m_hostBuffersInUse = 1;
717
+ break;
718
+ case 3: // buf[0], [1] and [2] are in use, write buf[0] and [1] to disk
719
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
720
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
721
+ exit(1);
722
+ }
723
+
724
+
725
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1){
726
+ velvetLog("Unable to write CnyUnifiedSeq\n");
727
+ exit(1);
728
+ }
729
+
730
+
731
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[1], SEEK_SET) < 0) {
732
+ velvetLog("Unable to seek in CnyUnifiedSeq\n");
733
+ exit(1);
734
+ }
735
+
736
+
737
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[1], WRITE_BUF_SIZE, 1, seqWriteInfo->m_pFile) != 1){
738
+ velvetLog("Unable to write CnyUnifiedSeq\n");
739
+ exit(1);
740
+ }
741
+
742
+ // swap buf[0] and buf[2]
743
+ tmp = seqWriteInfo->m_pWriteBuffer[0];
744
+ seqWriteInfo->m_pWriteBuffer[0] = seqWriteInfo->m_pWriteBuffer[2];
745
+ seqWriteInfo->m_pWriteBuffer[2] = tmp;
746
+
747
+ seqWriteInfo->m_hostBufferFilePos[0] = seqWriteInfo->m_hostBufferFilePos[2];
748
+
749
+ seqWriteInfo->m_hostBuffersInUse = 1;
750
+ break;
751
+ }
752
+
753
+ // if ref masks, write mapping info to the names file
754
+ if (seqWriteInfo->m_referenceMask && *(seqWriteInfo->m_referenceMask)) {
755
+ cnySeqInsertReferenceMask(seqWriteInfo, *(seqWriteInfo->m_referenceMask));
756
+ // free memory and clear list
757
+ if (seqWriteInfo->m_maskMemory) {
758
+ destroyRecycleBin(seqWriteInfo->m_maskMemory);
759
+ seqWriteInfo->m_maskMemory = NULL;
760
+ }
761
+ *(seqWriteInfo->m_referenceMask) = NULL;
762
+ }
763
+ }
764
+
765
+ void closeCnySeqForWrite(SequencesWriter *seqWriteInfo)
766
+ {
767
+ // should be only one buffer in use
768
+ if (seqWriteInfo->m_hostBuffersInUse != 1) {
769
+ velvetLog("CnySeq host buffers in use %d\n", seqWriteInfo->m_hostBuffersInUse);
770
+ exit(1);
771
+ }
772
+
773
+ if (fseek(seqWriteInfo->m_pFile, seqWriteInfo->m_hostBufferFilePos[0], SEEK_SET) < 0) {
774
+ velvetLog("Unable to seek CnySeq\n");
775
+ exit(1);
776
+ }
777
+
778
+ if (fwrite(seqWriteInfo->m_pWriteBuffer[0], (uint32_t)(seqWriteInfo->m_pHostBufPtr - seqWriteInfo->m_pWriteBuffer[0]), 1, seqWriteInfo->m_pFile) != 1) {
779
+ velvetLog("Unable to write CnySeq\n");
780
+ exit(1);
781
+ }
782
+
783
+ seqWriteInfo->m_unifiedSeqFileHeader.m_bFileWriteCompleted = true;
784
+ seqWriteInfo->m_unifiedSeqFileHeader.m_seqNuclStoreSize = seqWriteInfo->m_insertCurrentIndex >> 2;
785
+ seqWriteInfo->m_unifiedSeqFileHeader.m_numCategories = CATEGORIES;
786
+
787
+ if (fseek(seqWriteInfo->m_pFile, 0, SEEK_SET) < 0) {
788
+ velvetLog("Unable to seek CnySeq\n");
789
+ exit(1);
790
+ }
791
+
792
+ if (fwrite(&seqWriteInfo->m_unifiedSeqFileHeader, sizeof(CnyUnifiedSeqFileHeader), 1, seqWriteInfo->m_pFile) != 1) {
793
+ velvetLog("Unable to write CnySeq\n");
794
+ exit(1);
795
+ }
796
+
797
+ if (fclose(seqWriteInfo->m_pFile) < 0) {
798
+ velvetLog("Unable to close CnySeq\n");
799
+ exit(1);
800
+ }
801
+
802
+ if (fclose(seqWriteInfo->m_nameFile) < 0) {
803
+ velvetLog("Unable to close names file\n");
804
+ exit(1);
805
+ }
806
+
807
+ if (seqWriteInfo->m_pWriteBuffer[0])
808
+ free(seqWriteInfo->m_pWriteBuffer[0]);
809
+ if (seqWriteInfo->m_pWriteBuffer[1])
810
+ free(seqWriteInfo->m_pWriteBuffer[1]);
811
+ if (seqWriteInfo->m_pWriteBuffer[2])
812
+ free(seqWriteInfo->m_pWriteBuffer[2]);
813
+ }