finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,106 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _PREGRAPH_H_
22
+ #define _PREGRAPH_H_
23
+
24
+ ////////////////////////////////////////////////////////////
25
+ // PreNode functions
26
+ ////////////////////////////////////////////////////////////
27
+
28
+ void destroyPreNode_pg(IDnum preNode, PreGraph * preGraph);
29
+
30
+ // Locator
31
+ PreNode *getPreNodeInPreGraph_pg(PreGraph * preGraph, IDnum preNodeID);
32
+
33
+ // PreArc info
34
+ PreArcI getPreArc_pg(IDnum preNodeID, PreGraph * preGraph);
35
+ boolean hasSinglePreArc_pg(IDnum preNodeID, PreGraph * graph);
36
+ char simplePreArcCount_pg(IDnum preNodeID, PreGraph * preGraph);
37
+
38
+ // Descriptor
39
+ Coordinate getPreNodeLength_pg(IDnum preNodeID, PreGraph * preGraph);
40
+ void setPreNodeDescriptor_pg(Descriptor * descr, Coordinate length, IDnum preNodeID, PreGraph * preGraph);
41
+ void appendDescriptors_pg(Descriptor ** start, int * writeOffset, IDnum preNodeID, PreGraph* preGraph, boolean initial);
42
+
43
+ ////////////////////////////////////////////////////////////
44
+ // PreMarker functions
45
+ ////////////////////////////////////////////////////////////
46
+
47
+ boolean referenceMarkersAreActivated_pg(PreGraph * preGraph);
48
+ void allocatePreMarkerCountSpace_pg(PreGraph * preGraph);
49
+ void incrementNodeReferenceMarkerCount_pg(PreGraph * preGraph, IDnum preNodeID);
50
+ void allocatePreMarkerSpace_pg(PreGraph * preGraph);
51
+ PreMarker * addPreMarker_pg(PreGraph * preGraph, IDnum nodeID, IDnum seqID, Coordinate * start, PreMarker * previous);
52
+ void concatenateReferenceMarkers_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset);
53
+ boolean hasPreMarkers(IDnum nodeID, PreGraph * preGraph);
54
+
55
+ ////////////////////////////////////////////////////////////
56
+ // PreArc functions
57
+ ////////////////////////////////////////////////////////////
58
+
59
+ // Creators/destructor
60
+ PreArcI createPreArc_pg(IDnum originID, IDnum destinationID,
61
+ PreGraph * preGraph);
62
+ void createAnalogousPreArc_pg(IDnum originID, IDnum destinationID,
63
+ PreArcI refPreArc, PreGraph * preGraph);
64
+ void destroyPreArc_pg(PreArcI preArc, PreGraph * preGraph);
65
+
66
+ // Multiplicity
67
+ void setMultiplicity_pg(PreArcI preArc, IDnum mult);
68
+ IDnum getMultiplicity_pg(PreArcI preArc);
69
+
70
+ // Extremities
71
+ IDnum getDestination_pg(PreArcI preArc, IDnum nodeID);
72
+ IDnum getOtherEnd_pg(PreArcI preArc, IDnum preNodeID);
73
+
74
+ // Finding preArcs
75
+ PreArcI getPreArcBetweenPreNodes_pg(IDnum originID, IDnum destinationID,
76
+ PreGraph * preGraph);
77
+ PreArcI getNextPreArc_pg(PreArcI preArc, IDnum originPreNodeID);
78
+
79
+ // Misc
80
+ boolean isLoop_pg(PreArcI preArc);
81
+
82
+ ////////////////////////////////////////////////////////////
83
+ // PreGraph functions
84
+ ////////////////////////////////////////////////////////////
85
+
86
+ // Memory allocation
87
+ PreGraph *emptyPreGraph_pg(IDnum sequenceCount, IDnum referenceCount, int wordLength, boolean double_strand);
88
+ void allocatePreNodeSpace_pg(PreGraph * preGraph, IDnum preNodeCount);
89
+ void addPreNodeToPreGraph_pg(PreGraph * preGraph, Coordinate start,
90
+ Coordinate stop, SequencesReader *seqReadInfo,
91
+ Kmer * initialKmer, IDnum ID);
92
+
93
+ // Deallocation
94
+ void destroyPreGraph_pg(PreGraph * preGraph);
95
+
96
+ // Dimensions
97
+ IDnum preNodeCount_pg(PreGraph * preGraph);
98
+ IDnum sequenceCount_pg(PreGraph * preGraph);
99
+ void renumberPreNodes_pg(PreGraph * preGraph);
100
+
101
+ // File IO
102
+ void exportPreGraph_pg(char *filename, PreGraph * preGraph);
103
+
104
+ int getWordLength_pg(PreGraph * preGraph);
105
+
106
+ #endif
@@ -0,0 +1,990 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <ctype.h>
25
+
26
+ #ifdef _OPENMP
27
+ #include <omp.h>
28
+ #endif
29
+
30
+ #include "globals.h"
31
+ #include "preGraph.h"
32
+ #include "recycleBin.h"
33
+ #include "roadMap.h"
34
+ #include "readSet.h"
35
+ #include "concatenatedPreGraph.h"
36
+ #include "utility.h"
37
+ #include "kmer.h"
38
+ #include "tightString.h"
39
+ #include "binarySequences.h"
40
+ #define ADENINE 0
41
+ #define CYTOSINE 1
42
+ #define GUANINE 2
43
+ #define THYMINE 3
44
+
45
+ #ifdef _OPENMP
46
+
47
+ Coordinate *annotationOffset = NULL;
48
+
49
+ static omp_lock_t *nodeLocks = NULL;
50
+
51
+ static void createNodeLocks(PreGraph *preGraph)
52
+ {
53
+ IDnum nbNodes;
54
+ IDnum nodeIndex;
55
+
56
+ nbNodes = preNodeCount_pg(preGraph) + 1;
57
+ if (nodeLocks)
58
+ free (nodeLocks);
59
+ nodeLocks = mallocOrExit(nbNodes, omp_lock_t);
60
+
61
+ #pragma omp parallel for
62
+ for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++)
63
+ omp_init_lock(nodeLocks + nodeIndex);
64
+ }
65
+
66
+ static void lockNode(IDnum preNodeID)
67
+ {
68
+ omp_set_lock(nodeLocks + preNodeID);
69
+ }
70
+
71
+ static void unLockNode(IDnum preNodeID)
72
+ {
73
+ omp_unset_lock(nodeLocks + preNodeID);
74
+ }
75
+
76
+ static void lockTwoNodes(IDnum preNodeID, IDnum preNode2ID)
77
+ {
78
+ if (preNodeID < 0)
79
+ preNodeID = -preNodeID;
80
+ if (preNode2ID < 0)
81
+ preNode2ID = -preNode2ID;
82
+
83
+ /* Lock lowest ID first to avoid deadlocks */
84
+ if (preNodeID == preNode2ID)
85
+ omp_set_lock (nodeLocks + preNodeID);
86
+ else if (preNodeID < preNode2ID)
87
+ {
88
+ omp_set_lock (nodeLocks + preNodeID);
89
+ omp_set_lock (nodeLocks + preNode2ID);
90
+ }
91
+ else
92
+ {
93
+ omp_set_lock (nodeLocks + preNode2ID);
94
+ omp_set_lock (nodeLocks + preNodeID);
95
+ }
96
+ }
97
+
98
+ static void unLockTwoNodes(IDnum preNodeID, IDnum preNode2ID)
99
+ {
100
+ if (preNodeID < 0)
101
+ preNodeID = -preNodeID;
102
+ if (preNode2ID < 0)
103
+ preNode2ID = -preNode2ID;
104
+
105
+ omp_unset_lock (nodeLocks + preNodeID);
106
+ if (preNodeID != preNode2ID)
107
+ omp_unset_lock (nodeLocks + preNode2ID);
108
+ }
109
+ #endif
110
+
111
+ // Internal structure used to mark the ends of an Annotation
112
+ struct insertionMarker_st {
113
+ Annotation *annot;
114
+ boolean isStart;
115
+ } ATTRIBUTE_PACKED;
116
+
117
+ Coordinate getInsertionMarkerPosition(InsertionMarker * marker)
118
+ {
119
+ if (marker->isStart)
120
+ return getStart(marker->annot);
121
+ else
122
+ return getFinish(marker->annot);
123
+ }
124
+
125
+ int compareInsertionMarkers(const void *A, const void *B)
126
+ {
127
+ Coordinate Apos =
128
+ getInsertionMarkerPosition((InsertionMarker *) A);
129
+ Coordinate Bpos =
130
+ getInsertionMarkerPosition((InsertionMarker *) B);
131
+
132
+ if (Apos < Bpos)
133
+ return -1;
134
+ else if (Apos == Bpos)
135
+ return 0;
136
+ else
137
+ return 1;
138
+ }
139
+
140
+ // Applies mergeSort to each insertion marker list (in order of position)
141
+ static void
142
+ orderInsertionMarkers(InsertionMarker ** insMarkers,
143
+ IDnum * markerCounters, RoadMapArray * rdmaps)
144
+ {
145
+ IDnum sequenceIndex;
146
+ IDnum sequenceCounter = rdmaps->length;
147
+
148
+ velvetLog("Ordering insertion markers\n");
149
+ #ifdef _OPENMP
150
+ #pragma omp parallel for
151
+ #endif
152
+ for (sequenceIndex = 1; sequenceIndex <= sequenceCounter;
153
+ sequenceIndex++) {
154
+ qsort(insMarkers[sequenceIndex],
155
+ markerCounters[sequenceIndex],
156
+ sizeof(InsertionMarker), compareInsertionMarkers);
157
+ }
158
+ }
159
+
160
+ // Creates insertion marker lists
161
+ static void
162
+ setInsertionMarkers(RoadMapArray * rdmaps,
163
+ IDnum * markerCounters,
164
+ InsertionMarker ** veryLastMarker,
165
+ InsertionMarker ** insertionMarkers)
166
+ {
167
+ IDnum sequenceCounter = rdmaps->length;
168
+ IDnum sequenceIndex, sequenceIndex2;
169
+ Coordinate totalCount = 0;
170
+ RoadMap *rdmap;
171
+ Annotation *annot = rdmaps->annotations;
172
+ InsertionMarker *nextMarker, *newMarker;
173
+ IDnum annotIndex, lastAnnotIndex;
174
+ InsertionMarker **insMarkers =
175
+ callocOrExit(rdmaps->length + 1, InsertionMarker *);
176
+ // Counting insertion markers
177
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
178
+ sequenceIndex++) {
179
+ //velvetLog("Going through sequence %d\n", sequenceIndex);
180
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
181
+ lastAnnotIndex = getAnnotationCount(rdmap);
182
+
183
+ // Set insertion markers in previous sequences :
184
+
185
+ for (annotIndex = 0; annotIndex < lastAnnotIndex;
186
+ annotIndex++) {
187
+ if (getAnnotSequenceID(annot) > 0) {
188
+ markerCounters[getAnnotSequenceID(annot)]
189
+ += 2;
190
+ } else {
191
+ markerCounters[-getAnnotSequenceID(annot)]
192
+ += 2;
193
+ }
194
+ totalCount += 2;
195
+ annot = getNextAnnotation(annot);
196
+ }
197
+ }
198
+
199
+ // Allocating space
200
+ *insertionMarkers = callocOrExit(totalCount, InsertionMarker);
201
+ *veryLastMarker = *insertionMarkers + totalCount;
202
+
203
+ // Pointing each node to its space
204
+ nextMarker = *insertionMarkers;
205
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
206
+ sequenceIndex++) {
207
+ insMarkers[sequenceIndex] = nextMarker;
208
+ nextMarker = nextMarker + markerCounters[sequenceIndex];
209
+ markerCounters[sequenceIndex] = 0;
210
+ }
211
+
212
+ // Filling up space with data
213
+ annot = rdmaps->annotations;
214
+ for (sequenceIndex = 1; sequenceIndex < sequenceCounter + 1;
215
+ sequenceIndex++) {
216
+ //velvetLog("Going through sequence %d\n", sequenceIndex);
217
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
218
+ lastAnnotIndex = getAnnotationCount(rdmap);
219
+
220
+ // Set insertion markers in previous sequences :
221
+
222
+ for (annotIndex = 0; annotIndex < lastAnnotIndex;
223
+ annotIndex++) {
224
+ sequenceIndex2 = getAnnotSequenceID(annot);
225
+ if (sequenceIndex2 > 0) {
226
+ newMarker =
227
+ insMarkers[sequenceIndex2] +
228
+ (markerCounters[sequenceIndex2])++;
229
+ newMarker->annot = annot;
230
+ newMarker->isStart = true;
231
+
232
+ newMarker =
233
+ insMarkers[sequenceIndex2] +
234
+ (markerCounters[sequenceIndex2])++;
235
+ newMarker->annot = annot;
236
+ newMarker->isStart = false;
237
+ } else {
238
+ incrementAnnotationCoordinates(annot);
239
+
240
+ newMarker =
241
+ insMarkers[-sequenceIndex2] +
242
+ (markerCounters[-sequenceIndex2])++;
243
+ newMarker->annot = annot;
244
+ newMarker->isStart = true;
245
+
246
+ newMarker =
247
+ insMarkers[-sequenceIndex2] +
248
+ (markerCounters[-sequenceIndex2])++;
249
+ newMarker->annot = annot;
250
+ newMarker->isStart = false;
251
+ }
252
+ annot = getNextAnnotation(annot);
253
+ }
254
+ }
255
+
256
+ orderInsertionMarkers(insMarkers, markerCounters, rdmaps);
257
+ free(insMarkers);
258
+ }
259
+
260
+ // Counts how many preNodes are to be created to allocate appropriate memory
261
+ static void
262
+ countPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
263
+ IDnum * markerCounters, InsertionMarker * insertionMarkers,
264
+ InsertionMarker * veryLastMarker)
265
+ {
266
+ Annotation *annot = rdmaps->annotations;
267
+ InsertionMarker *currentMarker = insertionMarkers;
268
+ IDnum markerIndex, lastMarkerIndex;
269
+ IDnum sequenceIndex;
270
+ Coordinate currentPosition, nextStop;
271
+ IDnum preNodeCounter = 0;
272
+ RoadMap *rdmap;
273
+ IDnum annotIndex, lastAnnotIndex;
274
+
275
+ // Now that we have read all of the annotations, we go on to create the preNodes and tie them up
276
+ for (sequenceIndex = 1;
277
+ sequenceIndex <= sequenceCount_pg(preGraph);
278
+ sequenceIndex++) {
279
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
280
+ annotIndex = 0;
281
+ lastAnnotIndex = getAnnotationCount(rdmap);
282
+ markerIndex = 0;
283
+ lastMarkerIndex = markerCounters[sequenceIndex];
284
+ currentPosition = 0;
285
+
286
+
287
+ while (annotIndex < lastAnnotIndex) {
288
+ if (markerIndex == lastMarkerIndex
289
+ || getPosition(annot) <=
290
+ getInsertionMarkerPosition(currentMarker))
291
+ nextStop = getPosition(annot);
292
+ else
293
+ nextStop =
294
+ getInsertionMarkerPosition
295
+ (currentMarker);
296
+
297
+ if (currentPosition != nextStop) {
298
+ preNodeCounter++;
299
+ currentPosition = nextStop;
300
+ }
301
+
302
+ while (markerIndex < lastMarkerIndex
303
+ && getInsertionMarkerPosition(currentMarker)
304
+ == currentPosition) {
305
+ currentMarker++;
306
+ markerIndex++;
307
+ }
308
+
309
+ while (annotIndex < lastAnnotIndex
310
+ && getPosition(annot) == currentPosition) {
311
+ annot = getNextAnnotation(annot);
312
+ annotIndex++;
313
+ }
314
+
315
+ }
316
+
317
+ while (markerIndex < lastMarkerIndex) {
318
+ if (currentPosition ==
319
+ getInsertionMarkerPosition(currentMarker)) {
320
+ currentMarker++;
321
+ markerIndex++;
322
+ } else {
323
+ preNodeCounter++;
324
+ currentPosition =
325
+ getInsertionMarkerPosition
326
+ (currentMarker);
327
+ }
328
+ }
329
+ }
330
+
331
+ allocatePreNodeSpace_pg(preGraph, preNodeCounter);
332
+ }
333
+
334
+ static void convertInsertionMarkers(InsertionMarker * insertionMarkers,
335
+ InsertionMarker * veryLastMarker,
336
+ IDnum * chains)
337
+ {
338
+ InsertionMarker *marker;
339
+ Annotation *annot;
340
+
341
+ for (marker = insertionMarkers; marker != veryLastMarker; marker++) {
342
+ annot = marker->annot;
343
+
344
+ if (getAnnotSequenceID(annot) > 0) {
345
+ if (marker->isStart) {
346
+ if (getStartID(annot) == 0)
347
+ setStartID(annot,
348
+ chains
349
+ [getAnnotSequenceID
350
+ (annot)]);
351
+ else
352
+ setStartID(annot,
353
+ getStartID(annot) + 1);
354
+ }
355
+ } else {
356
+ if (marker->isStart)
357
+ setStartID(annot, -getStartID(annot));
358
+ else {
359
+ if (getFinishID(annot) == 0)
360
+ setFinishID(annot,
361
+ -chains
362
+ [-getAnnotSequenceID
363
+ (annot)]);
364
+ else
365
+ setFinishID(annot,
366
+ -getFinishID(annot) -
367
+ 1);
368
+ }
369
+ }
370
+ }
371
+
372
+ free(insertionMarkers);
373
+ }
374
+
375
+ static void convertMarker(InsertionMarker * marker, IDnum nodeID)
376
+ {
377
+ if (marker->isStart)
378
+ setStartID(marker->annot, nodeID);
379
+ else
380
+ setFinishID(marker->annot, nodeID);
381
+ }
382
+
383
+ // Creates the preNode using insertion marker and annotation lists for each sequence
384
+ static void
385
+ // Creates the preNode using insertion marker and annotation lists for each sequence
386
+ createPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
387
+ IDnum * markerCounters, InsertionMarker * insertionMarkers,
388
+ InsertionMarker * veryLastMarker, IDnum * chains,
389
+ SequencesReader *seqReadInfo, int WORDLENGTH)
390
+ {
391
+ char *sequenceFilename = seqReadInfo->m_seqFilename;
392
+ Annotation *annot = rdmaps->annotations;
393
+ IDnum latestPreNodeID;
394
+ InsertionMarker *currentMarker = insertionMarkers;
395
+ IDnum sequenceIndex;
396
+ Coordinate currentPosition, nextStop;
397
+ IDnum preNodeCounter = 1;
398
+ FILE *file = NULL;
399
+ char line[50000];
400
+ int lineLength = 50000;
401
+ Coordinate readIndex;
402
+ boolean tooShort;
403
+ Kmer initialKmer;
404
+ char c;
405
+ RoadMap *rdmap;
406
+ IDnum annotIndex, lastAnnotIndex;
407
+ IDnum markerIndex, lastMarkerIndex;
408
+
409
+ if (!seqReadInfo->m_bIsBinary) {
410
+ file = fopen(sequenceFilename, "r");
411
+ if (file == NULL)
412
+ exitErrorf(EXIT_FAILURE, true, "Could not read %s", sequenceFilename);
413
+ // Reading sequence descriptor in first line
414
+ if (sequenceCount_pg(preGraph) > 0 && !fgets(line, lineLength, file))
415
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
416
+ seqReadInfo->m_pFile = file;
417
+ }
418
+
419
+ // Now that we have read all of the annotations, we go on to create the preNodes and tie them up
420
+ for (sequenceIndex = 1;
421
+ sequenceIndex <= sequenceCount_pg(preGraph);
422
+ sequenceIndex++) {
423
+ if (sequenceIndex % 1000000 == 0)
424
+ velvetLog("Sequence %li / %li\n", (long) sequenceIndex,
425
+ (long) sequenceCount_pg(preGraph));
426
+
427
+ if (!seqReadInfo->m_bIsBinary) {
428
+ while (line[0] != '>')
429
+ if (!fgets(line, lineLength, file))
430
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
431
+ }
432
+
433
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
434
+ annotIndex = 0;
435
+ lastAnnotIndex = getAnnotationCount(rdmap);
436
+ markerIndex = 0;
437
+ lastMarkerIndex = markerCounters[sequenceIndex];
438
+ currentPosition = 0;
439
+
440
+ // Reading first (k-1) nucleotides
441
+ tooShort = false;
442
+ clearKmer(&initialKmer);
443
+ //velvetLog("Initial kmer: ");
444
+ TightString *tString = NULL;
445
+ char *strString = NULL;
446
+ if (seqReadInfo->m_bIsBinary) {
447
+ tString = getTightStringInArray(seqReadInfo->m_sequences->tSequences, sequenceIndex - 1);
448
+ strString = readTightString(tString);
449
+ }
450
+ for (readIndex = 0; readIndex < WORDLENGTH - 1;
451
+ readIndex++) {
452
+ if (seqReadInfo->m_bIsBinary) {
453
+ if (readIndex >= tString->length) {
454
+ tooShort = true;
455
+ break;
456
+ }
457
+
458
+ c = strString[readIndex];
459
+ } else {
460
+ c = getc(file);
461
+ while (c == '\n' || c == '\r')
462
+ c = getc(file);
463
+
464
+ if (c == '>' || c == 'M' || c == EOF) {
465
+ ungetc(c, file);
466
+ tooShort = true;
467
+ break;
468
+ }
469
+ }
470
+ switch (c) {
471
+ case 'A':
472
+ case 'N':
473
+ pushNucleotide(&initialKmer, ADENINE);
474
+ break;
475
+ case 'C':
476
+ pushNucleotide(&initialKmer, CYTOSINE);
477
+ break;
478
+ case 'G':
479
+ pushNucleotide(&initialKmer, GUANINE);
480
+ break;
481
+ case 'T':
482
+ pushNucleotide(&initialKmer, THYMINE);
483
+ break;
484
+ default:
485
+ velvetLog
486
+ ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
487
+ fflush(stdout);
488
+ abort();
489
+ }
490
+ }
491
+
492
+ if (tooShort) {
493
+ //velvetLog("Skipping short read.. %d\n", sequenceIndex);
494
+ chains[sequenceIndex] = preNodeCounter;
495
+ if (seqReadInfo->m_bIsBinary) {
496
+ free(strString);
497
+ } else {
498
+ if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph))
499
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
500
+ }
501
+ continue;
502
+ }
503
+
504
+ char *currString = NULL;
505
+ if (seqReadInfo->m_bIsBinary) {
506
+ currString = &strString[readIndex];
507
+ seqReadInfo->m_ppCurrString = &currString;
508
+ }
509
+ latestPreNodeID = 0;
510
+
511
+ while (annotIndex < lastAnnotIndex) {
512
+ if (markerIndex == lastMarkerIndex
513
+ || getPosition(annot) <=
514
+ getInsertionMarkerPosition(currentMarker))
515
+ nextStop = getPosition(annot);
516
+ else {
517
+ nextStop =
518
+ getInsertionMarkerPosition
519
+ (currentMarker);
520
+ }
521
+
522
+ if (currentPosition != nextStop) {
523
+ if (seqReadInfo->m_bIsBinary) {
524
+ if (readIndex >= tString->length) {
525
+ velvetLog("readIndex %ld beyond string len %ld\n", (uint64_t) readIndex, (uint64_t) tString->length);
526
+ exit(1);
527
+ }
528
+ }
529
+ //if (sequenceIndex == 481)
530
+ // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop);
531
+ addPreNodeToPreGraph_pg(preGraph,
532
+ currentPosition,
533
+ nextStop,
534
+ seqReadInfo,
535
+ &initialKmer,
536
+ preNodeCounter);
537
+ if (latestPreNodeID == 0) {
538
+ chains[sequenceIndex] =
539
+ preNodeCounter;
540
+ }
541
+ latestPreNodeID = preNodeCounter++;
542
+ currentPosition = nextStop;
543
+ }
544
+
545
+ while (markerIndex < lastMarkerIndex
546
+ && getInsertionMarkerPosition(currentMarker)
547
+ == nextStop) {
548
+ convertMarker(currentMarker,
549
+ latestPreNodeID);
550
+ currentMarker++;
551
+ markerIndex++;
552
+ }
553
+
554
+ while (annotIndex < lastAnnotIndex
555
+ && getPosition(annot) == nextStop) {
556
+ for (readIndex = 0;
557
+ readIndex <
558
+ getAnnotationLength(annot);
559
+ readIndex++) {
560
+ if (seqReadInfo->m_bIsBinary) {
561
+ c = *currString;
562
+ currString += 1; // increment the pointer
563
+ } else {
564
+ c = getc(file);
565
+ while (!isalpha(c))
566
+ c = getc(file);
567
+ }
568
+
569
+ //if (sequenceIndex == 481)
570
+ // velvetLog("(%c)", c);
571
+ switch (c) {
572
+ case 'A':
573
+ case 'N':
574
+ pushNucleotide(&initialKmer, ADENINE);
575
+ break;
576
+ case 'C':
577
+ pushNucleotide(&initialKmer, CYTOSINE);
578
+ break;
579
+ case 'G':
580
+ pushNucleotide(&initialKmer, GUANINE);
581
+ break;
582
+ case 'T':
583
+ pushNucleotide(&initialKmer, THYMINE);
584
+ break;
585
+ default:
586
+ velvetLog
587
+ ("Irregular sequence file: are you sure your Sequence and Roadmap file come from the same source?\n");
588
+ fflush(stdout);
589
+ #ifdef DEBUG
590
+ abort();
591
+ #endif
592
+ exit(1);
593
+ }
594
+ }
595
+
596
+ annot = getNextAnnotation(annot);
597
+ annotIndex++;
598
+ }
599
+
600
+ }
601
+
602
+ while (markerIndex < lastMarkerIndex) {
603
+ if (currentPosition ==
604
+ getInsertionMarkerPosition(currentMarker)) {
605
+ convertMarker(currentMarker,
606
+ latestPreNodeID);
607
+ currentMarker++;
608
+ markerIndex++;
609
+ } else {
610
+ nextStop =
611
+ getInsertionMarkerPosition
612
+ (currentMarker);
613
+ //if (sequenceIndex == 481)
614
+ // velvetLog("Adding pre nodes from %lli to %lli\n", (long long) currentPosition, (long long) nextStop);
615
+ addPreNodeToPreGraph_pg(preGraph,
616
+ currentPosition,
617
+ nextStop, seqReadInfo,
618
+ &initialKmer,
619
+ preNodeCounter);
620
+ if (latestPreNodeID == 0)
621
+ chains[sequenceIndex] =
622
+ preNodeCounter;
623
+ latestPreNodeID = preNodeCounter++;
624
+ currentPosition =
625
+ getInsertionMarkerPosition
626
+ (currentMarker);
627
+ }
628
+ }
629
+ if (seqReadInfo->m_bIsBinary) {
630
+ free(strString);
631
+ } else {
632
+ // End of sequence
633
+ if (!fgets(line, lineLength, file) && sequenceIndex < sequenceCount_pg(preGraph))
634
+ exitErrorf(EXIT_FAILURE, true, "%s incomplete.", sequenceFilename);
635
+ //velvetLog(" \n");
636
+ }
637
+
638
+ if (latestPreNodeID == 0)
639
+ chains[sequenceIndex] = preNodeCounter;
640
+ }
641
+
642
+ free(markerCounters);
643
+ if (!seqReadInfo->m_bIsBinary) {
644
+ fclose(file);
645
+ }
646
+
647
+ }
648
+
649
+ static void connectPreNodeToTheNext(IDnum * currentPreNodeID,
650
+ IDnum nextPreNodeID,
651
+ Coordinate * currentPosition,
652
+ IDnum sequenceIndex,
653
+ boolean isReference,
654
+ PreGraph * preGraph)
655
+ {
656
+ if (nextPreNodeID == 0)
657
+ return;
658
+
659
+ #ifdef _OPENMP
660
+ lockTwoNodes(*currentPreNodeID, nextPreNodeID);
661
+ #endif
662
+
663
+ if (isReference)
664
+ incrementNodeReferenceMarkerCount_pg(preGraph, nextPreNodeID);
665
+
666
+ if (!isReference && *currentPreNodeID != 0)
667
+ createPreArc_pg(*currentPreNodeID, nextPreNodeID,
668
+ preGraph);
669
+
670
+ #ifdef _OPENMP
671
+ unLockTwoNodes(*currentPreNodeID, nextPreNodeID);
672
+ #endif
673
+
674
+ *currentPreNodeID = nextPreNodeID;
675
+
676
+ *currentPosition +=
677
+ getPreNodeLength_pg(*currentPreNodeID, preGraph);
678
+
679
+ }
680
+
681
+ static IDnum chooseNextInternalPreNode(IDnum currentPreNodeID,
682
+ IDnum sequenceIndex,
683
+ PreGraph * preGraph, IDnum * chains)
684
+ {
685
+ if (currentPreNodeID >= preNodeCount_pg(preGraph))
686
+ return 0;
687
+ if (sequenceIndex >= sequenceCount_pg(preGraph))
688
+ return currentPreNodeID + 1;
689
+ if (currentPreNodeID + 1 < chains[sequenceIndex + 1])
690
+ return currentPreNodeID + 1;
691
+ return 0;
692
+ }
693
+
694
+ static void connectAnnotation(IDnum * currentPreNodeID, Annotation * annot,
695
+ Coordinate * currentPosition,
696
+ IDnum sequenceIndex, boolean isReference,
697
+ PreGraph * preGraph)
698
+ {
699
+ IDnum nextPreNodeID = getStartID(annot);
700
+
701
+ connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
702
+ currentPosition,
703
+ sequenceIndex, isReference, preGraph);
704
+
705
+ while (*currentPreNodeID != getFinishID(annot)) {
706
+ nextPreNodeID = (*currentPreNodeID) + 1;
707
+
708
+ connectPreNodeToTheNext(currentPreNodeID, nextPreNodeID,
709
+ currentPosition,
710
+ sequenceIndex,
711
+ isReference,
712
+ preGraph);
713
+ }
714
+ }
715
+
716
+ static void reConnectAnnotation(IDnum * currentPreNodeID, Annotation * annot,
717
+ Coordinate * currentPosition,
718
+ IDnum sequenceIndex,
719
+ PreGraph * preGraph,
720
+ PreMarker ** previous)
721
+ {
722
+ IDnum nextPreNodeID = getStartID(annot);
723
+
724
+ #ifdef _OPENMP
725
+ lockNode(nextPreNodeID);
726
+ #endif
727
+ *previous = addPreMarker_pg(preGraph,
728
+ nextPreNodeID,
729
+ sequenceIndex,
730
+ currentPosition,
731
+ *previous);
732
+ #ifdef _OPENMP
733
+ unLockNode(nextPreNodeID);
734
+ #endif
735
+
736
+ while (*currentPreNodeID != getFinishID(annot)) {
737
+ nextPreNodeID = (*currentPreNodeID) + 1;
738
+
739
+ #ifdef _OPENMP
740
+ lockNode(nextPreNodeID);
741
+ #endif
742
+ *previous = addPreMarker_pg(preGraph,
743
+ nextPreNodeID,
744
+ sequenceIndex,
745
+ currentPosition,
746
+ *previous);
747
+ #ifdef _OPENMP
748
+ unLockNode(nextPreNodeID);
749
+ #endif
750
+ *currentPreNodeID = nextPreNodeID;
751
+ }
752
+ }
753
+
754
+ static void createPreMarkers(RoadMapArray * rdmaps, PreGraph * preGraph,
755
+ IDnum * chains)
756
+ {
757
+ IDnum sequenceIndex;
758
+ IDnum referenceCount = rdmaps->referenceCount;
759
+ #ifndef _OPENMP
760
+ Annotation *annot = rdmaps->annotations;
761
+ #endif
762
+
763
+ #ifdef _OPENMP
764
+ int threads = omp_get_max_threads();
765
+ if (threads > 8)
766
+ threads = 8;
767
+
768
+ #pragma omp parallel for num_threads(threads)
769
+ #endif
770
+ for (sequenceIndex = 1;
771
+ sequenceIndex <= referenceCount;
772
+ sequenceIndex++) {
773
+ #ifdef _OPENMP
774
+ Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]);
775
+ #endif
776
+ RoadMap *rdmap;
777
+ Coordinate currentPosition, currentInternalPosition;
778
+ IDnum currentPreNodeID, nextInternalPreNodeID;
779
+ IDnum annotIndex, lastAnnotIndex;
780
+ PreMarker * previous;
781
+
782
+ if (sequenceIndex % 1000000 == 0)
783
+ velvetLog("Connecting %li / %li\n", (long) sequenceIndex,
784
+ (long) sequenceCount_pg(preGraph));
785
+
786
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
787
+ annotIndex = 0;
788
+ lastAnnotIndex = getAnnotationCount(rdmap);
789
+ nextInternalPreNodeID = chooseNextInternalPreNode
790
+ (chains[sequenceIndex] - 1, sequenceIndex,
791
+ preGraph, chains);
792
+
793
+ previous = NULL;
794
+ currentPosition = 0;
795
+ currentInternalPosition = 0;
796
+ currentPreNodeID = 0;
797
+ // Recursion up to last annotation
798
+ while (annotIndex < lastAnnotIndex
799
+ || nextInternalPreNodeID != 0) {
800
+ if (annotIndex == lastAnnotIndex
801
+ || (nextInternalPreNodeID != 0
802
+ && currentInternalPosition <
803
+ getPosition(annot))) {
804
+ #ifdef _OPENMP
805
+ lockNode(nextInternalPreNodeID);
806
+ #endif
807
+ previous = addPreMarker_pg(preGraph,
808
+ nextInternalPreNodeID,
809
+ sequenceIndex,
810
+ &currentPosition,
811
+ previous);
812
+ #ifdef _OPENMP
813
+ unLockNode(nextInternalPreNodeID);
814
+ #endif
815
+ currentPreNodeID = nextInternalPreNodeID;
816
+ nextInternalPreNodeID =
817
+ chooseNextInternalPreNode
818
+ (currentPreNodeID, sequenceIndex,
819
+ preGraph, chains);
820
+ currentInternalPosition +=
821
+ getPreNodeLength_pg(currentPreNodeID,
822
+ preGraph);
823
+
824
+ } else {
825
+ reConnectAnnotation(&currentPreNodeID, annot,
826
+ &currentPosition,
827
+ sequenceIndex,
828
+ preGraph,
829
+ &previous);
830
+ annot = getNextAnnotation(annot);
831
+ annotIndex++;
832
+ }
833
+ }
834
+ }
835
+ }
836
+
837
+ // Threads each sequences and creates preArcs according to road map indications
838
+ static void connectPreNodes(RoadMapArray * rdmaps, PreGraph * preGraph,
839
+ IDnum * chains)
840
+ {
841
+ IDnum sequenceIndex;
842
+ IDnum referenceCount = rdmaps->referenceCount;
843
+ #ifdef _OPENMP
844
+ annotationOffset = mallocOrExit(rdmaps->length + 1, Coordinate);
845
+ annotationOffset[0] = 0;
846
+ for (sequenceIndex = 1; sequenceIndex <= rdmaps->length; sequenceIndex++)
847
+ annotationOffset[sequenceIndex] = annotationOffset[sequenceIndex - 1] +
848
+ getAnnotationCount(getRoadMapInArray(rdmaps, sequenceIndex - 1));
849
+ #else
850
+ Annotation *annot = rdmaps->annotations;
851
+ #endif
852
+
853
+ if (rdmaps->referenceCount > 0)
854
+ allocatePreMarkerCountSpace_pg(preGraph);
855
+
856
+ #ifdef _OPENMP
857
+ int threads = omp_get_max_threads();
858
+ if (threads > 8)
859
+ threads = 8;
860
+
861
+ #pragma omp parallel for num_threads(threads)
862
+ #endif
863
+ for (sequenceIndex = 1;
864
+ sequenceIndex <= sequenceCount_pg(preGraph);
865
+ sequenceIndex++) {
866
+ #ifdef _OPENMP
867
+ Annotation *annot = getAnnotationInArray(rdmaps->annotations, annotationOffset[sequenceIndex - 1]);
868
+ #endif
869
+ RoadMap *rdmap;
870
+ Coordinate currentPosition, currentInternalPosition;
871
+ IDnum currentPreNodeID, nextInternalPreNodeID;
872
+ IDnum annotIndex, lastAnnotIndex;
873
+ boolean isReference;
874
+
875
+ if (sequenceIndex % 1000000 == 0)
876
+ velvetLog("Connecting %li / %li\n", (long) sequenceIndex,
877
+ (long) sequenceCount_pg(preGraph));
878
+
879
+ rdmap = getRoadMapInArray(rdmaps, sequenceIndex - 1);
880
+ annotIndex = 0;
881
+ lastAnnotIndex = getAnnotationCount(rdmap);
882
+ nextInternalPreNodeID = chooseNextInternalPreNode
883
+ (chains[sequenceIndex] - 1, sequenceIndex,
884
+ preGraph, chains);
885
+ isReference = (sequenceIndex <= referenceCount);
886
+
887
+ currentPosition = 0;
888
+ currentInternalPosition = 0;
889
+ currentPreNodeID = 0;
890
+ // Recursion up to last annotation
891
+ while (annotIndex < lastAnnotIndex
892
+ || nextInternalPreNodeID != 0) {
893
+ if (annotIndex == lastAnnotIndex
894
+ || (nextInternalPreNodeID != 0
895
+ && currentInternalPosition <
896
+ getPosition(annot))) {
897
+ connectPreNodeToTheNext(&currentPreNodeID,
898
+ nextInternalPreNodeID,
899
+ &currentPosition,
900
+ sequenceIndex,
901
+ isReference,
902
+ preGraph);
903
+ nextInternalPreNodeID =
904
+ chooseNextInternalPreNode
905
+ (currentPreNodeID, sequenceIndex,
906
+ preGraph, chains);
907
+ currentInternalPosition +=
908
+ getPreNodeLength_pg(currentPreNodeID,
909
+ preGraph);
910
+
911
+ } else {
912
+ connectAnnotation(&currentPreNodeID, annot,
913
+ &currentPosition,
914
+ sequenceIndex, isReference,
915
+ preGraph);
916
+ annot = getNextAnnotation(annot);
917
+ annotIndex++;
918
+ }
919
+ }
920
+ }
921
+
922
+ if (rdmaps->referenceCount > 0) {
923
+ allocatePreMarkerSpace_pg(preGraph);
924
+ createPreMarkers(rdmaps, preGraph, chains);
925
+ }
926
+
927
+ #ifdef _OPENMP
928
+ free(annotationOffset);
929
+ annotationOffset = NULL;
930
+ #endif
931
+ }
932
+
933
+ // Post construction memory deallocation routine (of sorts, could certainly be optimized)
934
+ static void
935
+ cleanUpMemory(PreGraph * preGraph, RoadMapArray * rdmaps, IDnum * chains)
936
+ {
937
+ // Killing off roadmaps
938
+ destroyRoadMapArray(rdmaps);
939
+
940
+ // Finishing off the chain markers
941
+ free(chains);
942
+ }
943
+
944
+ // The full monty, wrapped up in one function
945
+ PreGraph *newPreGraph_pg(RoadMapArray * rdmapArray, SequencesReader *seqReadInfo)
946
+ {
947
+ int WORDLENGTH = rdmapArray->WORDLENGTH;
948
+ IDnum sequenceCount = rdmapArray->length;
949
+ IDnum *markerCounters = callocOrExit(sequenceCount + 1, IDnum);
950
+ IDnum *chains = callocOrExit(sequenceCount + 1, IDnum);
951
+ InsertionMarker *insertionMarkers;
952
+ InsertionMarker *veryLastMarker;
953
+
954
+ PreGraph *preGraph =
955
+ emptyPreGraph_pg(sequenceCount, rdmapArray->referenceCount, rdmapArray->WORDLENGTH, rdmapArray->double_strand);
956
+
957
+ velvetLog("Creating insertion markers\n");
958
+ setInsertionMarkers(rdmapArray, markerCounters, &veryLastMarker,
959
+ &insertionMarkers);
960
+
961
+ velvetLog("Counting preNodes\n");
962
+ countPreNodes(rdmapArray, preGraph, markerCounters,
963
+ insertionMarkers, veryLastMarker);
964
+
965
+ velvetLog("%li preNodes counted, creating them now\n",
966
+ (long) preNodeCount_pg(preGraph));
967
+ createPreNodes(rdmapArray, preGraph, markerCounters,
968
+ insertionMarkers, veryLastMarker, chains,
969
+ seqReadInfo, WORDLENGTH);
970
+
971
+ velvetLog("Adjusting marker info...\n");
972
+ convertInsertionMarkers(insertionMarkers, veryLastMarker, chains);
973
+
974
+ #ifdef _OPENMP
975
+ createNodeLocks(preGraph);
976
+ #endif
977
+ velvetLog("Connecting preNodes\n");
978
+ connectPreNodes(rdmapArray, preGraph, chains);
979
+
980
+ velvetLog("Cleaning up memory\n");
981
+ cleanUpMemory(preGraph, rdmapArray, chains);
982
+ #ifdef _OPENMP
983
+ free(nodeLocks);
984
+ nodeLocks = NULL;
985
+ #endif
986
+
987
+ velvetLog("Done creating preGraph\n");
988
+
989
+ return preGraph;
990
+ }