finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,106 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _PREGRAPH_H_
22
+ #define _PREGRAPH_H_
23
+
24
+ ////////////////////////////////////////////////////////////
25
+ // PreNode functions
26
+ ////////////////////////////////////////////////////////////
27
+
28
+ void destroyPreNode_pg(IDnum preNode, PreGraph * preGraph);
29
+
30
+ // Locator
31
+ PreNode *getPreNodeInPreGraph_pg(PreGraph * preGraph, IDnum preNodeID);
32
+
33
+ // PreArc info
34
+ PreArcI getPreArc_pg(IDnum preNodeID, PreGraph * preGraph);
35
+ boolean hasSinglePreArc_pg(IDnum preNodeID, PreGraph * graph);
36
+ char simplePreArcCount_pg(IDnum preNodeID, PreGraph * preGraph);
37
+
38
+ // Descriptor
39
+ Coordinate getPreNodeLength_pg(IDnum preNodeID, PreGraph * preGraph);
40
+ void setPreNodeDescriptor_pg(Descriptor * descr, Coordinate length, IDnum preNodeID, PreGraph * preGraph);
41
+ void appendDescriptors_pg(Descriptor ** start, int * writeOffset, IDnum preNodeID, PreGraph* preGraph, boolean initial);
42
+
43
+ ////////////////////////////////////////////////////////////
44
+ // PreMarker functions
45
+ ////////////////////////////////////////////////////////////
46
+
47
+ boolean referenceMarkersAreActivated_pg(PreGraph * preGraph);
48
+ void allocatePreMarkerCountSpace_pg(PreGraph * preGraph);
49
+ void incrementNodeReferenceMarkerCount_pg(PreGraph * preGraph, IDnum preNodeID);
50
+ void allocatePreMarkerSpace_pg(PreGraph * preGraph);
51
+ PreMarker * addPreMarker_pg(PreGraph * preGraph, IDnum nodeID, IDnum seqID, Coordinate * start, PreMarker * previous);
52
+ void concatenateReferenceMarkers_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset);
53
+ boolean hasPreMarkers(IDnum nodeID, PreGraph * preGraph);
54
+
55
+ ////////////////////////////////////////////////////////////
56
+ // PreArc functions
57
+ ////////////////////////////////////////////////////////////
58
+
59
+ // Creators/destructor
60
+ PreArcI createPreArc_pg(IDnum originID, IDnum destinationID,
61
+ PreGraph * preGraph);
62
+ void createAnalogousPreArc_pg(IDnum originID, IDnum destinationID,
63
+ PreArcI refPreArc, PreGraph * preGraph);
64
+ void destroyPreArc_pg(PreArcI preArc, PreGraph * preGraph);
65
+
66
+ // Multiplicity
67
+ void setMultiplicity_pg(PreArcI preArc, IDnum mult);
68
+ IDnum getMultiplicity_pg(PreArcI preArc);
69
+
70
+ // Extremities
71
+ IDnum getDestination_pg(PreArcI preArc, IDnum nodeID);
72
+ IDnum getOtherEnd_pg(PreArcI preArc, IDnum preNodeID);
73
+
74
+ // Finding preArcs
75
+ PreArcI getPreArcBetweenPreNodes_pg(IDnum originID, IDnum destinationID,
76
+ PreGraph * preGraph);
77
+ PreArcI getNextPreArc_pg(PreArcI preArc, IDnum originPreNodeID);
78
+
79
+ // Misc
80
+ boolean isLoop_pg(PreArcI preArc);
81
+
82
+ ////////////////////////////////////////////////////////////
83
+ // PreGraph functions
84
+ ////////////////////////////////////////////////////////////
85
+
86
+ // Memory allocation
87
+ PreGraph *emptyPreGraph_pg(IDnum sequenceCount, IDnum referenceCount, int wordLength, boolean double_strand);
88
+ void allocatePreNodeSpace_pg(PreGraph * preGraph, IDnum preNodeCount);
89
+ void addPreNodeToPreGraph_pg(PreGraph * preGraph, Coordinate start,
90
+ Coordinate stop, SequencesReader *seqReadInfo,
91
+ Kmer * initialKmer, IDnum ID);
92
+
93
+ // Deallocation
94
+ void destroyPreGraph_pg(PreGraph * preGraph);
95
+
96
+ // Dimensions
97
+ IDnum preNodeCount_pg(PreGraph * preGraph);
98
+ IDnum sequenceCount_pg(PreGraph * preGraph);
99
+ void renumberPreNodes_pg(PreGraph * preGraph);
100
+
101
+ // File IO
102
+ void exportPreGraph_pg(char *filename, PreGraph * preGraph);
103
+
104
+ int getWordLength_pg(PreGraph * preGraph);
105
+
106
+ #endif
@@ -0,0 +1,990 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <ctype.h>
25
+
26
+ #ifdef _OPENMP
27
+ #include <omp.h>
28
+ #endif
29
+
30
+ #include "globals.h"
31
+ #include "preGraph.h"
32
+ #include "recycleBin.h"
33
+ #include "roadMap.h"
34
+ #include "readSet.h"
35
+ #include "concatenatedPreGraph.h"
36
+ #include "utility.h"
37
+ #include "kmer.h"
38
+ #include "tightString.h"
39
+ #include "binarySequences.h"
40
+ #define ADENINE 0
41
+ #define CYTOSINE 1
42
+ #define GUANINE 2
43
+ #define THYMINE 3
44
+
45
+ #ifdef _OPENMP
46
+
47
+ Coordinate *annotationOffset = NULL;
48
+
49
+ static omp_lock_t *nodeLocks = NULL;
50
+
51
+ static void createNodeLocks(PreGraph *preGraph)
52
+ {
53
+ IDnum nbNodes;
54
+ IDnum nodeIndex;
55
+
56
+ nbNodes = preNodeCount_pg(preGraph) + 1;
57
+ if (nodeLocks)
58
+ free (nodeLocks);
59
+ nodeLocks = mallocOrExit(nbNodes, omp_lock_t);
60
+
61
+ #pragma omp parallel for
62
+ for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++)
63
+ omp_init_lock(nodeLocks + nodeIndex);
64
+ }
65
+
66
+ static void lockNode(IDnum preNodeID)
67
+ {
68
+ omp_set_lock(nodeLocks + preNodeID);
69
+ }
70
+
71
+ static void unLockNode(IDnum preNodeID)
72
+ {
73
+ omp_unset_lock(nodeLocks + preNodeID);
74
+ }
75
+
76
+ static void lockTwoNodes(IDnum preNodeID, IDnum preNode2ID)
77
+ {
78
+ if (preNodeID < 0)
79
+ preNodeID = -preNodeID;
80
+ if (preNode2ID < 0)
81
+ preNode2ID = -preNode2ID;
82
+
83
+ /* Lock lowest ID first to avoid deadlocks */
84
+ if (preNodeID == preNode2ID)
85
+ omp_set_lock (nodeLocks + preNodeID);
86
+ else if (preNodeID < preNode2ID)
87
+ {
88
+ omp_set_lock (nodeLocks + preNodeID);
89
+ omp_set_lock (nodeLocks + preNode2ID);
90
+ }
91
+ else
92
+ {
93
+ omp_set_lock (nodeLocks + preNode2ID);
94
+ omp_set_lock (nodeLocks + preNodeID);
95
+ }
96
+ }
97
+
98
+ static void unLockTwoNodes(IDnum preNodeID, IDnum preNode2ID)
99
+ {
100
+ if (preNodeID < 0)
101
+ preNodeID = -preNodeID;
102
+ if (preNode2ID < 0)
103
+ preNode2ID = -preNode2ID;
104
+
105
+ omp_unset_lock (nodeLocks + preNodeID);
106
+ if (preNodeID != preNode2ID)
107
+ omp_unset_lock (nodeLocks + preNode2ID);
108
+ }
109
+ #endif
110
+
111
+ // Internal structure used to mark the ends of an Annotation
112
+ struct insertionMarker_st {
113
+ Annotation *annot;
114
+ boolean isStart;
115
+ } ATTRIBUTE_PACKED;
116
+
117
+ Coordinate getInsertionMarkerPosition(InsertionMarker * marker)
118
+ {
119
+ if (marker->isStart)
120
+ return getStart(marker->annot);
121
+ else
122
+ return getFinish(marker->annot);
123
+ }
124
+
125
+ int compareInsertionMarkers(const void *A, const void *B)
126
+ {
127
+ Coordinate Apos =
128
+ getInsertionMarkerPosition((InsertionMarker *) A);
129
+ Coordinate Bpos =
130
+ getInsertionMarkerPosition((InsertionMarker *) B);
131
+
132
+ if (Apos < Bpos)
133
+ return -1;
134
+ else if (Apos == Bpos)
135
+ return 0;
136
+ else
137
+ return 1;
138
+ }
139
+
140
+ // Applies mergeSort to each insertion marker list (in order of position)
141
+ static void
142
+ orderInsertionMarkers(InsertionMarker ** insMarkers,
143
+ IDnum * markerCounters, RoadMapArray * rdmaps)
144
+ {
145
+ IDnum sequenceIndex;
146
+ IDnum sequenceCounter = rdmaps->length;
147
+
148
+ velvetLog("Ordering insertion markers\n");
149
+ #ifdef _OPENMP
150
+ #pragma omp parallel for
151
+ #endif
152
+ for (sequenceIndex = 1; sequenceIndex <= sequenceCounter;
153
+ sequenceIndex++) {
154
+ qsort(insMarkers[sequenceIndex],
155
+ markerCounters[sequenceIndex],
156
+ sizeof(InsertionMarker), compareInsertionMarkers);
157
+ }
158
+ }
159
+
160
+ // Creates insertion marker lists
161
+ static void
162
+ setInsertionMarkers(RoadMapArray * rdmaps,
163
+ IDnum * markerCounters,
164
+ InsertionMarker ** veryLastMarker,
165
+ InsertionMarker ** insertionMarkers)
166
+ {
167
+ IDnum sequenceCounter = rdmaps->length;
168
+ IDnum sequenceIndex, sequenceIndex2;
169
+ Coordinate totalCount = 0;
170
+ RoadMap *rdmap;
171
+ Annotation *annot = rdmaps->annotations;
172
+ InsertionMarker *nextMarker, *newMarker;
173
+ IDnum annotIndex, lastAnnotIndex;
174
+ InsertionMarker **insMarkers =
175
+ callocOrExit(rdmaps->length + 1, InsertionMarker *);
176
+ // Counting insertion markers
177
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
178
+ sequenceIndex++) {
179
+ //velvetLog("Going through sequence %d\n", sequenceIndex);
180
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
181
+ lastAnnotIndex = getAnnotationCount(rdmap);
182
+
183
+ // Set insertion markers in previous sequences :
184
+
185
+ for (annotIndex = 0; annotIndex < lastAnnotIndex;
186
+ annotIndex++) {
187
+ if (getAnnotSequenceID(annot) > 0) {
188
+ markerCounters[getAnnotSequenceID(annot)]
189
+ += 2;
190
+ } else {
191
+ markerCounters[-getAnnotSequenceID(annot)]
192
+ += 2;
193
+ }
194
+ totalCount += 2;
195
+ annot = getNextAnnotation(annot);
196
+ }
197
+ }
198
+
199
+ // Allocating space
200
+ *insertionMarkers = callocOrExit(totalCount, InsertionMarker);
201
+ *veryLastMarker = *insertionMarkers + totalCount;
202
+
203
+ // Pointing each node to its space
204
+ nextMarker = *insertionMarkers;
205
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
206
+ sequenceIndex++) {
207
+ insMarkers[sequenceIndex] = nextMarker;
208
+ nextMarker = nextMarker + markerCounters[sequenceIndex];
209
+ markerCounters[sequenceIndex] = 0;
210
+ }
211
+
212
+ // Filling up space with data
213
+ annot = rdmaps->annotations;
214
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
215
+ sequenceIndex++) {
216
+ //velvetLog("Going through sequence %d\n", sequenceIndex);
217
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
218
+ lastAnnotIndex = getAnnotationCount(rdmap);
219
+
220
+ // Set insertion markers in previous sequences :
221
+
222
+ for (annotIndex = 0; annotIndex < lastAnnotIndex;
223
+ annotIndex++) {
224
+ sequenceIndex2 = getAnnotSequenceID(annot);
225
+ if (sequenceIndex2 > 0) {
226
+ newMarker =
227
+ insMarkers[sequenceIndex2] +
228
+ (markerCounters[sequenceIndex2])++;
229
+ newMarker->annot = annot;
230
+ newMarker->isStart = true;
231
+
232
+ newMarker =
233
+ insMarkers[sequenceIndex2] +
234
+ (markerCounters[sequenceIndex2])++;
235
+ newMarker->annot = annot;
236
+ newMarker->isStart = false;
237
+ } else {
238
+ incrementAnnotationCoordinates(annot);
239
+
240
+ newMarker =
241
+ insMarkers[-sequenceIndex2] +
242
+ (markerCounters[-sequenceIndex2])++;
243
+ newMarker->annot = annot;
244
+ newMarker->isStart = true;
245
+
246
+ newMarker =
247
+ insMarkers[-sequenceIndex2] +
248
+ (markerCounters[-sequenceIndex2])++;
249
+ newMarker->annot = annot;
250
+ newMarker->isStart = false;
251
+ }
252
+ annot = getNextAnnotation(annot);
253
+ }
254
+ }
255
+
256
+ orderInsertionMarkers(insMarkers, markerCounters, rdmaps);
257
+ free(insMarkers);
258
+ }
259
+
260
+ // Counts how many preNodes are to be created to allocate appropriate memory
261
+ static void
262
+ countPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
263
+ IDnum * markerCounters, InsertionMarker * insertionMarkers,
264
+ InsertionMarker * veryLastMarker)
265
+ {
266
+ Annotation *annot = rdmaps->annotations;
267
+ InsertionMarker *currentMarker = insertionMarkers;
268
+ IDnum markerIndex, lastMarkerIndex;
269
+ IDnum sequenceIndex;
270
+ Coordinate currentPosition, nextStop;
271
+ IDnum preNodeCounter = 0;
272
+ RoadMap *rdmap;
273
+ IDnum annotIndex, lastAnnotIndex;
274
+
275
+ // Now that we have read all of the annotations, we go on to create the preNodes and tie them up
276
+ for (sequenceIndex = 1;
277
+ sequenceIndex <= sequenceCount_pg(preGraph);
278
+ sequenceIndex++) {
279
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
280
+ annotIndex = 0;
281
+ lastAnnotIndex = getAnnotationCount(rdmap);
282
+ markerIndex = 0;
283
+ lastMarkerIndex = markerCounters[sequenceIndex];
284
+ currentPosition = 0;
285
+
286
+
287
+ while (annotIndex < lastAnnotIndex) {
288
+ if (markerIndex == lastMarkerIndex
289
+ || getPosition(annot) <=
290
+ getInsertionMarkerPosition(currentMarker))
291
+ nextStop = getPosition(annot);
292
+ else
293
+ nextStop =
294
+ getInsertionMarkerPosition
295
+ (currentMarker);
296
+
297
+ if (currentPosition != nextStop) {
298
+ preNodeCounter++;
299
+ currentPosition = nextStop;
300
+ }
301
+
302
+ while (markerIndex < lastMarkerIndex
303
+ && getInsertionMarkerPosition(currentMarker)
304
+ == currentPosition) {
305
+ currentMarker++;
306
+ markerIndex++;
307
+ }
308
+
309
+ while (annotIndex < lastAnnotIndex
310
+ && getPosition(annot) == currentPosition) {
311
+ annot = getNextAnnotation(annot);
312
+ annotIndex++;
313
+ }
314
+
315
+ }
316
+
317
+ while (markerIndex < lastMarkerIndex) {
318
+ if (currentPosition ==
319
+ getInsertionMarkerPosition(currentMarker)) {
320
+ currentMarker++;
321
+ markerIndex++;
322
+ } else {
323
+ preNodeCounter++;
324
+ currentPosition =
325
+ getInsertionMarkerPosition
326
+ (currentMarker);
327
+ }
328
+ }
329
+ }
330
+
331
+ allocatePreNodeSpace_pg(preGraph, preNodeCounter);
332
+ }
333
+
334
+ static void convertInsertionMarkers(InsertionMarker * insertionMarkers,
335
+ InsertionMarker * veryLastMarker,
336
+ IDnum * chains)
337
+ {
338
+ InsertionMarker *marker;
339
+ Annotation *annot;
340
+
341
+ for (marker = insertionMarkers; marker != veryLastMarker; marker++) {
342
+ annot = marker->annot;
343
+
344
+ if (getAnnotSequenceID(annot) > 0) {
345
+ if (marker->isStart) {
346
+ if (getStartID(annot) == 0)
347
+ setStartID(annot,
348
+ chains
349
+ [getAnnotSequenceID
350
+ (annot)]);
351
+ else
352
+ setStartID(annot,
353
+ getStartID(annot) + 1);
354
+ }
355
+ } else {
356
+ if (marker->isStart)
357
+ setStartID(annot, -getStartID(annot));
358
+ else {
359
+ if (getFinishID(annot) == 0)
360
+ setFinishID(annot,
361
+ -chains
362
+ [-getAnnotSequenceID
363
+ (annot)]);
364
+ else
365
+ setFinishID(annot,
366
+ -getFinishID(annot) -
367
+ 1);
368
+ }
369
+ }
370
+ }
371
+
372
+ free(insertionMarkers);
373
+ }
374
+
375
+ static void convertMarker(InsertionMarker * marker, IDnum nodeID)
376
+ {
377
+ if (marker->isStart)
378
+ setStartID(marker->annot, nodeID);
379
+ else
380
+ setFinishID(marker->annot, nodeID);
381
+ }
382
+
383
+ // Creates the preNode using insertion marker and annotation lists for each sequence
384
+ static void
385
+ // Creates the preNode using insertion marker and annotation lists for each sequence
386
+ createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
387
+ IDnum * markerCounters, InsertionMarker * insertionMarkers,
388
+ InsertionMarker * veryLastMarker, IDnum * chains,
389
+ SequencesReader *seqReadInfo, int WORDLENGTH)
390
+ {
391
+ char *sequenceFilename = seqReadInfo->m_seqFilename;
392
+ Annotation *annot = rdmaps->annotations;
393
+ IDnum latestPreNodeID;
394
+ InsertionMarker *currentMarker = insertionMarkers;
395
+ IDnum sequenceIndex;
396
+ Coordinate currentPosition, nextStop;
397
+ IDnum preNodeCounter = 1;
398
+ FILE *file = NULL;
399
+ char line[50000];
400
+ int lineLength = 50000;
401
+ Coordinate readIndex;
402
+ boolean tooShort;
403
+ Kmer initialKmer;
404
+ char c;
405
+ RoadMap *rdmap;
406
+ IDnum annotIndex, lastAnnotIndex;
407
+ IDnum markerIndex, lastMarkerIndex;
408
+
409
+ if (!seqReadInfo->m_bIsBinary) {
410
+ file = fopen(sequenceFilename, "r");
411
+ if (file == NULL)
412
+ exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename);
413
+ // Reading sequence descriptor in first line
414
+ if (sequenceCount_pg(preGraph) > 0 && !fgets(line, lineLength, file))
415
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
416
+ seqReadInfo->m_pFile = file;
417
+ }
418
+
419
+ // Now that we have read all of the annotations, we go on to create the preNodes and tie them up
420
+ for (sequenceIndex = 1;
421
+ sequenceIndex <= sequenceCount_pg(preGraph);
422
+ sequenceIndex++) {
423
+ if (sequenceIndex % 1000000 == 0)
424
+ velvetLog("Sequence %li / %li\n", (long) sequenceIndex,
425
+ (long) sequenceCount_pg(preGraph));
426
+
427
+ if (!seqReadInfo->m_bIsBinary) {
428
+ while (line[0] != '>')
429
+ if (!fgets(line, lineLength, file))
430
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
431
+ }
432
+
433
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
434
+ annotIndex = 0;
435
+ lastAnnotIndex = getAnnotationCount(rdmap);
436
+ markerIndex = 0;
437
+ lastMarkerIndex = markerCounters[sequenceIndex];
438
+ currentPosition = 0;
439
+
440
+ // Reading first (k-1) nucleotides
441
+ tooShort = false;
442
+ clearKmer(&initialKmer);
443
+ //velvetLog("Initial kmer: ");
444
+ TightString *tString = NULL;
445
+ char *strString = NULL;
446
+ if (seqReadInfo->m_bIsBinary) {
447
+ tString = getTightStringInArray(seqReadInfo->m_sequences->tSequences, sequenceIndex - 1);
448
+ strString = readTightString(tString);
449
+ }
450
+ for (readIndex = 0; readIndex < WORDLENGTH - 1;
451
+ readIndex++) {
452
+ if (seqReadInfo->m_bIsBinary) {
453
+ if (readIndex >= tString->length) {
454
+ tooShort = true;
455
+ break;
456
+ }
457
+
458
+ c = strString[readIndex];
459
+ } else {
460
+ c = getc(file);
461
+ while (c == '\n' || c == '\r')
462
+ c = getc(file);
463
+
464
+ if (c == '>' || c == 'M' || c == EOF) {
465
+ ungetc(c, file);
466
+ tooShort = true;
467
+ break;
468
+ }
469
+ }
470
+ switch (c) {
471
+ case 'A':
472
+ case 'N':
473
+ pushNucleotide(&initialKmer, ADENINE);
474
+ break;
475
+ case 'C':
476
+ pushNucleotide(&initialKmer, CYTOSINE);
477
+ break;
478
+ case 'G':
479
+ pushNucleotide(&initialKmer, GUANINE);
480
+ break;
481
+ case 'T':
482
+ pushNucleotide(&initialKmer, THYMINE);
483
+ break;
484
+ default:
485
+ velvetLog
486
+ ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
487
+ fflush(stdout);
488
+ abort();
489
+ }
490
+ }
491
+
492
+ if (tooShort) {
493
+ //velvetLog("Skipping short read.. %d\n", sequenceIndex);
494
+ chains[sequenceIndex] = preNodeCounter;
495
+ if (seqReadInfo->m_bIsBinary) {
496
+ free(strString);
497
+ } else {
498
+ if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph))
499
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
500
+ }
501
+ continue;
502
+ }
503
+
504
+ char *currString = NULL;
505
+ if (seqReadInfo->m_bIsBinary) {
506
+ currString = &strString[readIndex];
507
+ seqReadInfo->m_ppCurrString = &currString;
508
+ }
509
+ latestPreNodeID = 0;
510
+
511
+ while (annotIndex < lastAnnotIndex) {
512
+ if (markerIndex == lastMarkerIndex
513
+ || getPosition(annot) <=
514
+ getInsertionMarkerPosition(currentMarker))
515
+ nextStop = getPosition(annot);
516
+ else {
517
+ nextStop =
518
+ getInsertionMarkerPosition
519
+ (currentMarker);
520
+ }
521
+
522
+ if (currentPosition != nextStop) {
523
+ if (seqReadInfo->m_bIsBinary) {
524
+ if (readIndex >= tString->length) {
525
+ velvetLog("readIndex %ld beyond string len %ld\n", (uint64_t) readIndex, (uint64_t) tString->length);
526
+ exit(1);
527
+ }
528
+ }
529
+ //if (sequenceIndex == 481)
530
+ // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop);
531
+ addPreNodeToPreGraph_pg(preGraph,
532
+ currentPosition,
533
+ nextStop,
534
+ seqReadInfo,
535
+ &initialKmer,
536
+ preNodeCounter);
537
+ if (latestPreNodeID == 0) {
538
+ chains[sequenceIndex] =
539
+ preNodeCounter;
540
+ }
541
+ latestPreNodeID = preNodeCounter++;
542
+ currentPosition = nextStop;
543
+ }
544
+
545
+ while (markerIndex < lastMarkerIndex
546
+ && getInsertionMarkerPosition(currentMarker)
547
+ == nextStop) {
548
+ convertMarker(currentMarker,
549
+ latestPreNodeID);
550
+ currentMarker++;
551
+ markerIndex++;
552
+ }
553
+
554
+ while (annotIndex < lastAnnotIndex
555
+ && getPosition(annot) == nextStop) {
556
+ for (readIndex = 0;
557
+ readIndex <
558
+ getAnnotationLength(annot);
559
+ readIndex++) {
560
+ if (seqReadInfo->m_bIsBinary) {
561
+ c = *currString;
562
+ currString += 1; // increment the pointer
563
+ } else {
564
+ c = getc(file);
565
+ while (!isalpha(c))
566
+ c = getc(file);
567
+ }
568
+
569
+ //if (sequenceIndex == 481)
570
+ // velvetLog("(%c)", c);
571
+ switch (c) {
572
+ case 'A':
573
+ case 'N':
574
+ pushNucleotide(&initialKmer, ADENINE);
575
+ break;
576
+ case 'C':
577
+ pushNucleotide(&initialKmer, CYTOSINE);
578
+ break;
579
+ case 'G':
580
+ pushNucleotide(&initialKmer, GUANINE);
581
+ break;
582
+ case 'T':
583
+ pushNucleotide(&initialKmer, THYMINE);
584
+ break;
585
+ default:
586
+ velvetLog
587
+ ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
588
+ fflush(stdout);
589
+ #ifdef DEBUG
590
+ abort();
591
+ #endif
592
+ exit(1);
593
+ }
594
+ }
595
+
596
+ annot = getNextAnnotation(annot);
597
+ annotIndex++;
598
+ }
599
+
600
+ }
601
+
602
+ while (markerIndex < lastMarkerIndex) {
603
+ if (currentPosition ==
604
+ getInsertionMarkerPosition(currentMarker)) {
605
+ convertMarker(currentMarker,
606
+ latestPreNodeID);
607
+ currentMarker++;
608
+ markerIndex++;
609
+ } else {
610
+ nextStop =
611
+ getInsertionMarkerPosition
612
+ (currentMarker);
613
+ //if (sequenceIndex == 481)
614
+ // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop);
615
+ addPreNodeToPreGraph_pg(preGraph,
616
+ currentPosition,
617
+ nextStop, seqReadInfo,
618
+ &initialKmer,
619
+ preNodeCounter);
620
+ if (latestPreNodeID == 0)
621
+ chains[sequenceIndex] =
622
+ preNodeCounter;
623
+ latestPreNodeID = preNodeCounter++;
624
+ currentPosition =
625
+ getInsertionMarkerPosition
626
+ (currentMarker);
627
+ }
628
+ }
629
+ if (seqReadInfo->m_bIsBinary) {
630
+ free(strString);
631
+ } else {
632
+ // End of sequence
633
+ if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph))
634
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
635
+ //velvetLog(" \n");
636
+ }
637
+
638
+ if (latestPreNodeID == 0)
639
+ chains[sequenceIndex] = preNodeCounter;
640
+ }
641
+
642
+ free(markerCounters);
643
+ if (!seqReadInfo->m_bIsBinary) {
644
+ fclose(file);
645
+ }
646
+
647
+ }
648
+
649
+ static void connectPreNodeToTheNext(IDnum * currentPreNodeID,
650
+ IDnum nextPreNodeID,
651
+ Coordinate * currentPosition,
652
+ IDnum sequenceIndex,
653
+ boolean isReference,
654
+ PreGraph * preGraph)
655
+ {
656
+ if (nextPreNodeID == 0)
657
+ return;
658
+
659
+ #ifdef _OPENMP
660
+ lockTwoNodes(*currentPreNodeID, nextPreNodeID);
661
+ #endif
662
+
663
+ if (isReference)
664
+ incrementNodeReferenceMarkerCount_pg(preGraph, nextPreNodeID);
665
+
666
+ if (!isReference && *currentPreNodeID != 0)
667
+ createPreArc_pg(*currentPreNodeID, nextPreNodeID,
668
+ preGraph);
669
+
670
+ #ifdef _OPENMP
671
+ unLockTwoNodes(*currentPreNodeID, nextPreNodeID);
672
+ #endif
673
+
674
+ *currentPreNodeID = nextPreNodeID;
675
+
676
+ *currentPosition +=
677
+ getPreNodeLength_pg(*currentPreNodeID, preGraph);
678
+
679
+ }
680
+
681
+ static IDnum chooseNextInternalPreNode(IDnum currentPreNodeID,
682
+ IDnum sequenceIndex,
683
+ PreGraph * preGraph, IDnum * chains)
684
+ {
685
+ if (currentPreNodeID >= preNodeCount_pg(preGraph))
686
+ return 0;
687
+ if (sequenceIndex >= sequenceCount_pg(preGraph))
688
+ return currentPreNodeID + 1;
689
+ if (currentPreNodeID + 1 < chains[sequenceIndex + 1])
690
+ return currentPreNodeID + 1;
691
+ return 0;
692
+ }
693
+
694
+ static void connectAnnotation(IDnum * currentPreNodeID, Annotation * annot,
695
+ Coordinate * currentPosition,
696
+ IDnum sequenceIndex, boolean isReference,
697
+ PreGraph * preGraph)
698
+ {
699
+ IDnum nextPreNodeID = getStartID(annot);
700
+
701
+ connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
702
+ currentPosition,
703
+ sequenceIndex, isReference, preGraph);
704
+
705
+ while (*currentPreNodeID != getFinishID(annot)) {
706
+ nextPreNodeID = (*currentPreNodeID) + 1;
707
+
708
+ connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
709
+ currentPosition,
710
+ sequenceIndex,
711
+ isReference,
712
+ preGraph);
713
+ }
714
+ }
715
+
716
+ static void reConnectAnnotation(IDnum * currentPreNodeID, Annotation * annot,
717
+ Coordinate * currentPosition,
718
+ IDnum sequenceIndex,
719
+ PreGraph * preGraph,
720
+ PreMarker ** previous)
721
+ {
722
+ IDnum nextPreNodeID = getStartID(annot);
723
+
724
+ #ifdef _OPENMP
725
+ lockNode(nextPreNodeID);
726
+ #endif
727
+ *previous = addPreMarker_pg(preGraph,
728
+ nextPreNodeID,
729
+ sequenceIndex,
730
+ currentPosition,
731
+ *previous);
732
+ #ifdef _OPENMP
733
+ unLockNode(nextPreNodeID);
734
+ #endif
735
+
736
+ while (*currentPreNodeID != getFinishID(annot)) {
737
+ nextPreNodeID = (*currentPreNodeID) + 1;
738
+
739
+ #ifdef _OPENMP
740
+ lockNode(nextPreNodeID);
741
+ #endif
742
+ *previous = addPreMarker_pg(preGraph,
743
+ nextPreNodeID,
744
+ sequenceIndex,
745
+ currentPosition,
746
+ *previous);
747
+ #ifdef _OPENMP
748
+ unLockNode(nextPreNodeID);
749
+ #endif
750
+ *currentPreNodeID = nextPreNodeID;
751
+ }
752
+ }
753
+
754
+ static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph,
755
+ IDnum * chains)
756
+ {
757
+ IDnum sequenceIndex;
758
+ IDnum referenceCount = rdmaps->referenceCount;
759
+ #ifndef _OPENMP
760
+ Annotation *annot = rdmaps->annotations;
761
+ #endif
762
+
763
+ #ifdef _OPENMP
764
+ int threads = omp_get_max_threads();
765
+ if (threads > 8)
766
+ threads = 8;
767
+
768
+ #pragma omp parallel for num_threads(threads)
769
+ #endif
770
+ for (sequenceIndex = 1;
771
+ sequenceIndex <= referenceCount;
772
+ sequenceIndex++) {
773
+ #ifdef _OPENMP
774
+ Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]);
775
+ #endif
776
+ RoadMap *rdmap;
777
+ Coordinate currentPosition, currentInternalPosition;
778
+ IDnum currentPreNodeID, nextInternalPreNodeID;
779
+ IDnum annotIndex, lastAnnotIndex;
780
+ PreMarker * previous;
781
+
782
+ if (sequenceIndex % 1000000 == 0)
783
+ velvetLog("Connecting %li / %li\n", (long) sequenceIndex,
784
+ (long) sequenceCount_pg(preGraph));
785
+
786
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
787
+ annotIndex = 0;
788
+ lastAnnotIndex = getAnnotationCount(rdmap);
789
+ nextInternalPreNodeID = chooseNextInternalPreNode
790
+ (chains[sequenceIndex] - 1, sequenceIndex,
791
+ preGraph, chains);
792
+
793
+ previous = NULL;
794
+ currentPosition = 0;
795
+ currentInternalPosition = 0;
796
+ currentPreNodeID = 0;
797
+ // Recursion up to last annotation
798
+ while (annotIndex < lastAnnotIndex
799
+ || nextInternalPreNodeID != 0) {
800
+ if (annotIndex == lastAnnotIndex
801
+ || (nextInternalPreNodeID != 0
802
+ && currentInternalPosition <
803
+ getPosition(annot))) {
804
+ #ifdef _OPENMP
805
+ lockNode(nextInternalPreNodeID);
806
+ #endif
807
+ previous = addPreMarker_pg(preGraph,
808
+ nextInternalPreNodeID,
809
+ sequenceIndex,
810
+ &currentPosition,
811
+ previous);
812
+ #ifdef _OPENMP
813
+ unLockNode(nextInternalPreNodeID);
814
+ #endif
815
+ currentPreNodeID = nextInternalPreNodeID;
816
+ nextInternalPreNodeID =
817
+ chooseNextInternalPreNode
818
+ (currentPreNodeID, sequenceIndex,
819
+ preGraph, chains);
820
+ currentInternalPosition +=
821
+ getPreNodeLength_pg(currentPreNodeID,
822
+ preGraph);
823
+
824
+ } else {
825
+ reConnectAnnotation(&currentPreNodeID, annot,
826
+ &currentPosition,
827
+ sequenceIndex,
828
+ preGraph,
829
+ &previous);
830
+ annot = getNextAnnotation(annot);
831
+ annotIndex++;
832
+ }
833
+ }
834
+ }
835
+ }
836
+
837
+ // Threads each sequences and creates preArcs according to road map indications
838
+ static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
839
+ IDnum * chains)
840
+ {
841
+ IDnum sequenceIndex;
842
+ IDnum referenceCount = rdmaps->referenceCount;
843
+ #ifdef _OPENMP
844
+ annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate);
845
+ annotationOffset[0] = 0;
846
+ for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++)
847
+ annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] +
848
+ getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1));
849
+ #else
850
+ Annotation *annot = rdmaps->annotations;
851
+ #endif
852
+
853
+ if (rdmaps->referenceCount > 0)
854
+ allocatePreMarkerCountSpace_pg(preGraph);
855
+
856
+ #ifdef _OPENMP
857
+ int threads = omp_get_max_threads();
858
+ if (threads > 8)
859
+ threads = 8;
860
+
861
+ #pragma omp parallel for num_threads(threads)
862
+ #endif
863
+ for (sequenceIndex = 1;
864
+ sequenceIndex <= sequenceCount_pg(preGraph);
865
+ sequenceIndex++) {
866
+ #ifdef _OPENMP
867
+ Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]);
868
+ #endif
869
+ RoadMap *rdmap;
870
+ Coordinate currentPosition, currentInternalPosition;
871
+ IDnum currentPreNodeID, nextInternalPreNodeID;
872
+ IDnum annotIndex, lastAnnotIndex;
873
+ boolean isReference;
874
+
875
+ if (sequenceIndex % 1000000 == 0)
876
+ velvetLog("Connecting %li / %li\n", (long) sequenceIndex,
877
+ (long) sequenceCount_pg(preGraph));
878
+
879
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
880
+ annotIndex = 0;
881
+ lastAnnotIndex = getAnnotationCount(rdmap);
882
+ nextInternalPreNodeID = chooseNextInternalPreNode
883
+ (chains[sequenceIndex] - 1, sequenceIndex,
884
+ preGraph, chains);
885
+ isReference = (sequenceIndex <= referenceCount);
886
+
887
+ currentPosition = 0;
888
+ currentInternalPosition = 0;
889
+ currentPreNodeID = 0;
890
+ // Recursion up to last annotation
891
+ while (annotIndex < lastAnnotIndex
892
+ || nextInternalPreNodeID != 0) {
893
+ if (annotIndex == lastAnnotIndex
894
+ || (nextInternalPreNodeID != 0
895
+ && currentInternalPosition <
896
+ getPosition(annot))) {
897
+ connectPreNodeToTheNext(&currentPreNodeID,
898
+ nextInternalPreNodeID,
899
+ &currentPosition,
900
+ sequenceIndex,
901
+ isReference,
902
+ preGraph);
903
+ nextInternalPreNodeID =
904
+ chooseNextInternalPreNode
905
+ (currentPreNodeID, sequenceIndex,
906
+ preGraph, chains);
907
+ currentInternalPosition +=
908
+ getPreNodeLength_pg(currentPreNodeID,
909
+ preGraph);
910
+
911
+ } else {
912
+ connectAnnotation(&currentPreNodeID, annot,
913
+ &currentPosition,
914
+ sequenceIndex, isReference,
915
+ preGraph);
916
+ annot = getNextAnnotation(annot);
917
+ annotIndex++;
918
+ }
919
+ }
920
+ }
921
+
922
+ if (rdmaps->referenceCount > 0) {
923
+ allocatePreMarkerSpace_pg(preGraph);
924
+ createPreMarkers(rdmaps, preGraph, chains);
925
+ }
926
+
927
+ #ifdef _OPENMP
928
+ free(annotationOffset);
929
+ annotationOffset = NULL;
930
+ #endif
931
+ }
932
+
933
+ // Post construction memory deallocation routine (of sorts, could certainly be optimized)
934
+ static void
935
+ cleanUpMemory(PreGraph * preGraph, RoadMapArray * rdmaps, IDnum * chains)
936
+ {
937
+ // Killing off roadmaps
938
+ destroyRoadMapArray(rdmaps);
939
+
940
+ // Finishing off the chain markers
941
+ free(chains);
942
+ }
943
+
944
+ // The full monty, wrapped up in one function
945
+ PreGraph *newPreGraph_pg(RoadMapArray * rdmapArray, SequencesReader *seqReadInfo)
946
+ {
947
+ int WORDLENGTH = rdmapArray->WORDLENGTH;
948
+ IDnum sequenceCount = rdmapArray->length;
949
+ IDnum *markerCounters = callocOrExit(sequenceCount + 1, IDnum);
950
+ IDnum *chains = callocOrExit(sequenceCount + 1, IDnum);
951
+ InsertionMarker *insertionMarkers;
952
+ InsertionMarker *veryLastMarker;
953
+
954
+ PreGraph *preGraph =
955
+ emptyPreGraph_pg(sequenceCount, rdmapArray->referenceCount, rdmapArray->WORDLENGTH, rdmapArray->double_strand);
956
+
957
+ velvetLog("Creating insertion markers\n");
958
+ setInsertionMarkers(rdmapArray, markerCounters, &veryLastMarker,
959
+ &insertionMarkers);
960
+
961
+ velvetLog("Counting preNodes\n");
962
+ countPreNodes(rdmapArray, preGraph, markerCounters,
963
+ insertionMarkers, veryLastMarker);
964
+
965
+ velvetLog("%li preNodes counted, creating them now\n",
966
+ (long) preNodeCount_pg(preGraph));
967
+ createPreNodes(rdmapArray, preGraph, markerCounters,
968
+ insertionMarkers, veryLastMarker, chains,
969
+ seqReadInfo, WORDLENGTH);
970
+
971
+ velvetLog("Adjusting marker info...\n");
972
+ convertInsertionMarkers(insertionMarkers, veryLastMarker, chains);
973
+
974
+ #ifdef _OPENMP
975
+ createNodeLocks(preGraph);
976
+ #endif
977
+ velvetLog("Connecting preNodes\n");
978
+ connectPreNodes(rdmapArray, preGraph, chains);
979
+
980
+ velvetLog("Cleaning up memory\n");
981
+ cleanUpMemory(preGraph, rdmapArray, chains);
982
+ #ifdef _OPENMP
983
+ free(nodeLocks);
984
+ nodeLocks = NULL;
985
+ #endif
986
+
987
+ velvetLog("Done creating preGraph\n");
988
+
989
+ return preGraph;
990
+ }