finishm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,137 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _PASSAGEMARKER_H_
22
+ #define _PASSAGEMARKER_H_
23
+
24
+ struct passageList_st {
25
+ PassageMarkerI marker;
26
+ PassageMarkerList *next;
27
+ } ATTRIBUTE_PACKED;
28
+
29
+ ///////////////////////////////////////////////////////////////////
30
+ // PassageMarker lists
31
+ ///////////////////////////////////////////////////////////////////
32
+ // You can always malloc a PassaegMarkerList but these routines manage the
33
+ // memory for you, thus avoiding fragmentation
34
+ PassageMarkerList *newPassageMarkerList(PassageMarkerI marker,
35
+ PassageMarkerList * next);
36
+
37
+ void deallocatePassageMarkerList(PassageMarkerList * list);
38
+
39
+ ///////////////////////////////////////////////////////////////////
40
+ // Creators/Destructors
41
+ ///////////////////////////////////////////////////////////////////
42
+ PassageMarkerI addPassageMarker(IDnum sequenceID, Coordinate start,
43
+ Node * node);
44
+
45
+ PassageMarkerI addUncertainPassageMarker(IDnum sequenceID, Node * node);
46
+
47
+ PassageMarkerI newPassageMarker(IDnum seqID, Coordinate start,
48
+ Coordinate finish, Coordinate startOffset,
49
+ Coordinate finishOffset);
50
+
51
+ // Deallocates but also removes all pointers towards that structure
52
+ void destroyPassageMarker(PassageMarkerI marker);
53
+ void destroyAllPassageMarkers();
54
+
55
+ ///////////////////////////////////////////////////////////////////
56
+ // Node
57
+ ///////////////////////////////////////////////////////////////////
58
+
59
+ // Current node
60
+ Node *getNode(PassageMarkerI marker);
61
+
62
+ // Yank out of current node
63
+ void extractPassageMarker(PassageMarkerI marker);
64
+
65
+ // Insert into a node
66
+ void transposePassageMarker(PassageMarkerI marker, Node * destination);
67
+
68
+ ///////////////////////////////////////////////////////////////////
69
+ // General Info
70
+ ///////////////////////////////////////////////////////////////////
71
+ // Export into file
72
+ void exportMarker(FILE * outfile, PassageMarkerI marker,
73
+ TightString * sequences, int wordLength);
74
+
75
+ // General info for debugging
76
+ char *readPassageMarker(PassageMarkerI marker);
77
+
78
+ // Sequence ID associated to the passage marker
79
+ IDnum getPassageMarkerSequenceID(PassageMarkerI marker);
80
+ IDnum getAbsolutePassMarkerSeqID(PassageMarkerI marker);
81
+ int passageMarkerDirection(PassageMarkerI marker);
82
+
83
+ // Coordinates
84
+ Coordinate getPassageMarkerStart(PassageMarkerI marker);
85
+ void setPassageMarkerStart(PassageMarkerI marker, Coordinate start);
86
+ Coordinate getPassageMarkerFinish(PassageMarkerI marker);
87
+ void setPassageMarkerFinish(PassageMarkerI marker, Coordinate finish);
88
+ Coordinate getPassageMarkerLength(PassageMarkerI marker);
89
+
90
+ // Offsets
91
+ Coordinate getStartOffset(PassageMarkerI marker);
92
+ void setStartOffset(PassageMarkerI marker, Coordinate offset);
93
+ void incrementStartOffset(PassageMarkerI marker, Coordinate offset);
94
+ Coordinate getFinishOffset(PassageMarkerI marker);
95
+ void setFinishOffset(PassageMarkerI marker, Coordinate offset);
96
+ void incrementFinishOffset(PassageMarkerI marker, Coordinate offset);
97
+
98
+ // Status
99
+ void setPassageMarkerStatus(PassageMarkerI marker, boolean status);
100
+ boolean getPassageMarkerStatus(PassageMarkerI marker);
101
+
102
+ ///////////////////////////////////////////////////////////////////
103
+ // Marker Sequences
104
+ ///////////////////////////////////////////////////////////////////
105
+
106
+ // Corresponding marker of reverse complement sequence
107
+ PassageMarkerI getTwinMarker(PassageMarkerI marker);
108
+
109
+ // Within a node
110
+ PassageMarkerI getNextInNode(PassageMarkerI marker);
111
+ void setNextInNode(PassageMarkerI marker, PassageMarkerI next);
112
+ void setTopOfTheNode(PassageMarkerI marker);
113
+
114
+ // Within a sequence
115
+ PassageMarkerI getNextInSequence(PassageMarkerI marker);
116
+ void setNextInSequence(PassageMarkerI previous, PassageMarkerI next);
117
+ PassageMarkerI getPreviousInSequence(PassageMarkerI marker);
118
+ void setPreviousInSequence(PassageMarkerI previous, PassageMarkerI next);
119
+ void connectPassageMarkers(PassageMarkerI previous, PassageMarkerI next,
120
+ Graph * graph);
121
+
122
+ // End of read chains
123
+ boolean isTerminal(PassageMarkerI marker);
124
+ boolean isInitial(PassageMarkerI marker);
125
+
126
+ // Checks whether the node of the next marker is the one given in parameter
127
+ boolean isDestinationToMarker(PassageMarkerI marker, Node * node);
128
+
129
+ // Bypasses the middle marker
130
+ void disconnectNextPassageMarker(PassageMarkerI marker, Graph * graph);
131
+ void deleteNextPassageMarker(PassageMarkerI marker, Graph * graph);
132
+
133
+ // Merge two markers (cf concatenateGraph())
134
+ void concatenatePassageMarkers(PassageMarkerI marker,
135
+ PassageMarkerI nextMarker);
136
+
137
+ #endif
@@ -0,0 +1,1717 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <ctype.h>
25
+
26
+ #ifdef _OPENMP
27
+ #include <omp.h>
28
+ #endif
29
+
30
+ #include "globals.h"
31
+ #include "allocArray.h"
32
+ #include "preGraph.h"
33
+ #include "recycleBin.h"
34
+ #include "tightString.h"
35
+ #include "run.h"
36
+ #include "utility.h"
37
+
38
+ #define ADENINE 0
39
+ #define CYTOSINE 1
40
+ #define GUANINE 2
41
+ #define THYMINE 3
42
+
43
+ struct preMarker_st {
44
+ PreMarker * previous;
45
+ PreMarker * next;
46
+ IDnum referenceStart;
47
+ IDnum preNodeStart;
48
+ IDnum length;
49
+ IDnum referenceID;
50
+ IDnum preNodeID; /* SF TODO only the sign seems to matter. Could replace with char or bit field */
51
+ } ATTRIBUTE_PACKED;
52
+
53
+ typedef struct preArc_st PreArc;
54
+
55
+ struct preArc_st {
56
+ PreArcI nextLeft; /* Index of the previous PreArc */
57
+ PreArcI nextRight; /* Index of the next PreArc */
58
+ IDnum multiplicity;
59
+ IDnum preNodeIDLeft;
60
+ IDnum preNodeIDRight;
61
+ } ATTRIBUTE_PACKED;
62
+
63
+ struct preNode_st {
64
+ PreArcI preArcLeft;
65
+ PreArcI preArcRight;
66
+ Descriptor *descriptor;
67
+ IDnum length;
68
+ } ATTRIBUTE_PACKED;
69
+
70
+ struct preGraph_st {
71
+ PreNode *preNodes;
72
+ IDnum * nodeReferenceMarkerCounts;
73
+ PreMarker ** nodeReferenceMarkers;
74
+ IDnum sequenceCount;
75
+ IDnum referenceCount;
76
+ IDnum preNodeCount;
77
+ int wordLength;
78
+ boolean double_strand;
79
+ };
80
+
81
+ static AllocArray *preArcMemory = NULL;
82
+
83
+ DECLARE_FAST_ACCESSORS(PREARC, PreArc, preArcMemory)
84
+
85
+ PreArcI allocatePreArc_pg()
86
+ {
87
+ #ifdef _OPENMP
88
+ return allocArrayArrayAllocate (preArcMemory);
89
+ #else
90
+ if (preArcMemory == NULL)
91
+ preArcMemory = newAllocArray(sizeof(PreArc), "PreArc");
92
+ return allocArrayAllocate (preArcMemory);
93
+ #endif
94
+
95
+ }
96
+
97
+ void deallocatePreArc_pg(PreArcI preArc)
98
+ {
99
+ #ifdef _OPENMP
100
+ allocArrayArrayFree (preArcMemory, preArc);
101
+ #else
102
+ allocArrayFree (preArcMemory, preArc);
103
+ #endif
104
+ }
105
+
106
+ // Returns the length of the preNode's descriptor list
107
+ Coordinate getPreNodeLength_pg(IDnum preNodeID, PreGraph * preGraph)
108
+ {
109
+ IDnum ID = preNodeID;
110
+
111
+ if (ID < 0)
112
+ ID = -ID;
113
+
114
+ return (preGraph->preNodes[ID]).length;
115
+ }
116
+
117
+ // Returns the number of preNodes in the preGraph
118
+ IDnum preNodeCount_pg(PreGraph * preGraph)
119
+ {
120
+ return preGraph->preNodeCount;
121
+ }
122
+
123
+ // returns the number of sequences used to buid the preGraph
124
+ IDnum sequenceCount_pg(PreGraph * preGraph)
125
+ {
126
+ return preGraph->sequenceCount;
127
+ }
128
+
129
+ PreArcI getPreArcBetweenPreNodes_pg(IDnum originPreNodeID,
130
+ IDnum destinationPreNodeID,
131
+ PreGraph * preGraph)
132
+ {
133
+ PreArcI preArc;
134
+
135
+ if (originPreNodeID == 0 || destinationPreNodeID == 0) {
136
+ return NULL_IDX;
137
+ }
138
+
139
+ for (preArc = getPreArc_pg(originPreNodeID, preGraph);
140
+ preArc != NULL_IDX;
141
+ preArc = getNextPreArc_pg(preArc, originPreNodeID)) {
142
+ if (getDestination_pg(preArc, originPreNodeID) ==
143
+ destinationPreNodeID) {
144
+ return preArc;
145
+ }
146
+ }
147
+
148
+ return NULL_IDX;
149
+ }
150
+
151
+ static void addPreArcToPreNode_pg(PreArcI preArc, IDnum preNodeID,
152
+ PreGraph * preGraph)
153
+ {
154
+ IDnum ID = preNodeID;
155
+ PreNode *preNode;
156
+ PreArcI *preArcPtr;
157
+ PreArc *preArcVal;
158
+
159
+ if (ID < 0)
160
+ ID = -ID;
161
+
162
+ preNode = &(preGraph->preNodes[ID]);
163
+
164
+ if (preNodeID > 0)
165
+ preArcPtr = &(preNode->preArcRight);
166
+ else
167
+ preArcPtr = &(preNode->preArcLeft);
168
+
169
+ preArcVal = PREARC_I2P (preArc);
170
+ preArcVal = PREARC_I2P (preArc);
171
+
172
+ if (preNodeID == preArcVal->preNodeIDLeft) {
173
+ preArcVal->nextLeft = *preArcPtr;
174
+ *preArcPtr = preArc;
175
+ }
176
+
177
+ if (preNodeID == preArcVal->preNodeIDRight) {
178
+ preArcVal->nextRight = *preArcPtr;
179
+ *preArcPtr = preArc;
180
+ }
181
+ }
182
+
183
+ // Creates an preArc from preNode origin to preNode destination.
184
+ // If this preArc already exists, increments its multiplicity by 1.
185
+ PreArcI createPreArc_pg(IDnum originPreNodeID, IDnum destinationPreNodeID,
186
+ PreGraph * preGraph)
187
+ {
188
+ PreArcI preArc;
189
+ PreArc *preArcVal;
190
+
191
+
192
+ if (originPreNodeID == 0 || destinationPreNodeID == 0)
193
+ return NULL_IDX;
194
+
195
+ preArc =
196
+ getPreArcBetweenPreNodes_pg(originPreNodeID,
197
+ destinationPreNodeID, preGraph);
198
+
199
+ if (preArc != NULL_IDX) {
200
+ PREARC_FI2P (preArc)->multiplicity++;
201
+ if (destinationPreNodeID == -originPreNodeID)
202
+ PREARC_FI2P (preArc)->multiplicity++;
203
+ return preArc;
204
+ }
205
+ // If not found
206
+ preArc = allocatePreArc_pg();
207
+ preArcVal = PREARC_FI2P (preArc);
208
+ preArcVal->preNodeIDLeft = originPreNodeID;
209
+ preArcVal->preNodeIDRight = -destinationPreNodeID;
210
+ preArcVal->multiplicity = 1;
211
+
212
+ addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
213
+
214
+ // Hairpin case
215
+ if (destinationPreNodeID == -originPreNodeID) {
216
+ preArcVal->multiplicity++;
217
+ return preArc;
218
+ }
219
+
220
+ addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
221
+
222
+ return preArc;
223
+ }
224
+
225
+ void createAnalogousPreArc_pg(IDnum originPreNodeID,
226
+ IDnum destinationPreNodeID,
227
+ PreArcI refPreArc, PreGraph * preGraph)
228
+ {
229
+ PreArcI preArc;
230
+ PreArc *preArcVal;
231
+
232
+ if (originPreNodeID == 0 || destinationPreNodeID == 0)
233
+ return;
234
+
235
+ preArc =
236
+ getPreArcBetweenPreNodes_pg(originPreNodeID,
237
+ destinationPreNodeID, preGraph);
238
+
239
+ if (preArc != NULL_IDX) {
240
+ PREARC_FI2P (preArc)->multiplicity += PREARC_FI2P (refPreArc)->multiplicity;
241
+ return;
242
+ }
243
+ // If not found
244
+ preArc = allocatePreArc_pg();
245
+ preArcVal = PREARC_FI2P (preArc);
246
+ preArcVal->preNodeIDLeft = originPreNodeID;
247
+ preArcVal->preNodeIDRight = -destinationPreNodeID;
248
+ preArcVal->multiplicity = PREARC_FI2P (refPreArc)->multiplicity;
249
+
250
+ addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
251
+
252
+ // Hairpin case
253
+ if (destinationPreNodeID == -originPreNodeID)
254
+ return;
255
+
256
+ addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
257
+ }
258
+
259
+ static void setNextPreArc_pg(PreArcI preArc, IDnum preNodeID,
260
+ PreArcI nextPreArc)
261
+ {
262
+ PreArc *preArcVal;
263
+
264
+ preArcVal = PREARC_FI2P (preArc);
265
+ if (preNodeID == preArcVal->preNodeIDLeft)
266
+ preArcVal->nextLeft = nextPreArc;
267
+ if (preNodeID == preArcVal->preNodeIDRight)
268
+ preArcVal->nextRight = nextPreArc;
269
+ }
270
+
271
+ void removePreArcFromList_pg(PreArcI preArc, IDnum preNodeID,
272
+ PreGraph * preGraph)
273
+ {
274
+ IDnum ID = preNodeID;
275
+ PreNode *preNode;
276
+ PreArcI *preArcPtr;
277
+ PreArcI tempPreArc;
278
+
279
+ if (ID < 0)
280
+ ID = -ID;
281
+
282
+ preNode = &(preGraph->preNodes[ID]);
283
+
284
+ if (preNodeID > 0)
285
+ preArcPtr = &(preNode->preArcRight);
286
+ else
287
+ preArcPtr = &(preNode->preArcLeft);
288
+
289
+ if (*preArcPtr == preArc) {
290
+ *preArcPtr = getNextPreArc_pg(preArc, preNodeID);
291
+ return;
292
+ }
293
+
294
+ for (tempPreArc = *preArcPtr; tempPreArc != NULL_IDX;
295
+ tempPreArc = getNextPreArc_pg(tempPreArc, preNodeID))
296
+ if (getNextPreArc_pg(tempPreArc, preNodeID) == preArc)
297
+ setNextPreArc_pg(tempPreArc, preNodeID,
298
+ getNextPreArc_pg(preArc,
299
+ preNodeID));
300
+ }
301
+
302
+ void destroyPreArc_pg(PreArcI preArc, PreGraph * preGraph)
303
+ {
304
+ IDnum leftID, rightID;
305
+ PreArc *preArcVal;
306
+
307
+ if (preArc == NULL_IDX)
308
+ return;
309
+
310
+ preArcVal = PREARC_FI2P (preArc);
311
+ leftID = preArcVal->preNodeIDLeft;
312
+ rightID = preArcVal->preNodeIDRight;
313
+
314
+ // Removing preArc from list
315
+ removePreArcFromList_pg(preArc, leftID, preGraph);
316
+
317
+ // Removing preArc's twin from list
318
+ if (rightID != leftID)
319
+ removePreArcFromList_pg(preArc, rightID, preGraph);
320
+
321
+ deallocatePreArc_pg(preArc);
322
+ }
323
+
324
+ void destroyPreNode_pg(IDnum preNodeID, PreGraph * preGraph)
325
+ {
326
+ PreNode *preNode;
327
+ IDnum ID = preNodeID;
328
+ IDnum index;
329
+ PreMarker * preMarker;
330
+
331
+ //velvetLog("Destroying %ld\n", (long) preNodeID);
332
+
333
+ if (ID < 0)
334
+ ID = -ID;
335
+
336
+ preNode = &(preGraph->preNodes[ID]);
337
+
338
+ // PreNode preArcs:
339
+ while (preNode->preArcLeft != NULL_IDX)
340
+ destroyPreArc_pg(preNode->preArcLeft, preGraph);
341
+ while (preNode->preArcRight != NULL_IDX)
342
+ destroyPreArc_pg(preNode->preArcRight, preGraph);
343
+
344
+ // PreMarkers
345
+ if (preGraph->nodeReferenceMarkers) {
346
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[ID]; index++) {
347
+ preMarker = &(preGraph->nodeReferenceMarkers[ID][index]);
348
+ if (preMarker->previous != NULL)
349
+ preMarker->previous->next = NULL;
350
+ if (preMarker->next != NULL)
351
+ preMarker->next->previous = NULL;
352
+ preMarker->preNodeID = 0;
353
+ preMarker->referenceID = 0;
354
+ }
355
+ if (preGraph->nodeReferenceMarkers[ID])
356
+ free(preGraph->nodeReferenceMarkers[ID]);
357
+ preGraph->nodeReferenceMarkers[ID] = NULL;
358
+ preGraph->nodeReferenceMarkerCounts[ID] = 0;
359
+ }
360
+
361
+ // Descriptors
362
+ free(preNode->descriptor);
363
+
364
+ // Flag as destroyed
365
+ preNode->descriptor = NULL;
366
+ }
367
+
368
+ void destroyPreGraph_pg(PreGraph * preGraph)
369
+ {
370
+ IDnum index;
371
+ PreNode *preNode = &(preGraph->preNodes[1]);
372
+
373
+ // Descriptors
374
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
375
+ free(preNode->descriptor);
376
+ preNode++;
377
+ }
378
+
379
+ // Arcs
380
+ #ifdef _OPENMP
381
+ destroyAllocArrayArray(preArcMemory);
382
+ #else
383
+ destroyAllocArray(preArcMemory);
384
+ #endif
385
+
386
+ // Nodes
387
+ free(preGraph->preNodes);
388
+
389
+ // PreMarkers
390
+ if (preGraph->nodeReferenceMarkerCounts) {
391
+ free(preGraph->nodeReferenceMarkerCounts);
392
+ free(preGraph->nodeReferenceMarkers);
393
+ }
394
+
395
+ // Graph
396
+ free(preGraph);
397
+
398
+ }
399
+
400
+ static Nucleotide getNucleotideInDescriptor_pg(Descriptor * descriptor,
401
+ Coordinate i)
402
+ {
403
+ Descriptor *fourMer = descriptor + i / 4;
404
+
405
+ switch (i % 4) {
406
+ case 0:
407
+ return (*fourMer & 3);
408
+ case 1:
409
+ return (*fourMer & 12) >> 2;
410
+ case 2:
411
+ return (*fourMer & 48) >> 4;
412
+ case 3:
413
+ return (*fourMer & 192) >> 6;
414
+ }
415
+ return 0;
416
+ }
417
+
418
+ PreNode *getPreNodeInPreGraph_pg(PreGraph * preGraph, IDnum preNodeID)
419
+ {
420
+ PreNode *preNode;
421
+ if (preNodeID <= 0)
422
+ abort();
423
+ else {
424
+ preNode = &(preGraph->preNodes[preNodeID]);
425
+ if (preNode->descriptor != NULL)
426
+ return preNode;
427
+ else
428
+ return NULL;
429
+ }
430
+ return NULL;
431
+ }
432
+
433
+ PreArcI getPreArc_pg(IDnum preNodeID, PreGraph * preGraph)
434
+ {
435
+ IDnum ID = preNodeID;
436
+ PreNode *preNode;
437
+
438
+ if (ID < 0)
439
+ ID = -ID;
440
+
441
+ preNode = &(preGraph->preNodes[ID]);
442
+
443
+ if (preNodeID > 0)
444
+ return preNode->preArcRight;
445
+ else
446
+ return preNode->preArcLeft;
447
+ }
448
+
449
+ PreArcI getNextPreArc_pg(PreArcI preArc, IDnum preNodeID)
450
+ {
451
+ PreArc *preArcVal;
452
+
453
+ preArcVal = PREARC_FI2P (preArc);
454
+
455
+ if (preNodeID == preArcVal->preNodeIDLeft) {
456
+ return preArcVal->nextLeft;
457
+ } else {
458
+ return preArcVal->nextRight;
459
+ }
460
+ }
461
+
462
+ IDnum getMultiplicity_pg(PreArcI preArc)
463
+ {
464
+ if (preArc == NULL_IDX)
465
+ return 0;
466
+
467
+ return PREARC_FI2P (preArc)->multiplicity;
468
+ }
469
+
470
+ IDnum getOtherEnd_pg(PreArcI preArc, IDnum preNodeID)
471
+ {
472
+ PreArc *preArcVal;
473
+
474
+ preArcVal = PREARC_FI2P (preArc);
475
+ if (preNodeID == preArcVal->preNodeIDLeft)
476
+ return preArcVal->preNodeIDRight;
477
+ else
478
+ return preArcVal->preNodeIDLeft;
479
+ }
480
+
481
+ IDnum getDestination_pg(PreArcI preArc, IDnum preNodeID)
482
+ {
483
+ PreArc *preArcVal;
484
+
485
+ if (preArc == NULL_IDX)
486
+ return 0;
487
+
488
+ preArcVal = PREARC_FI2P (preArc);
489
+
490
+ if (preNodeID == preArcVal->preNodeIDLeft)
491
+ return -preArcVal->preNodeIDRight;
492
+ else
493
+ return -preArcVal->preNodeIDLeft;
494
+ }
495
+
496
+ static void writeNucleotideInDescriptor_pg(Nucleotide nucleotide,
497
+ Descriptor * descriptor,
498
+ Coordinate i)
499
+ {
500
+ Descriptor *fourMer = descriptor + i / 4;
501
+ switch (i % 4) {
502
+ case 3:
503
+ *fourMer &= 63;
504
+ *fourMer += nucleotide << 6;
505
+ return;
506
+ case 2:
507
+ *fourMer &= 207;
508
+ *fourMer += nucleotide << 4;
509
+ return;
510
+ case 1:
511
+ *fourMer &= 243;
512
+ *fourMer += nucleotide << 2;
513
+ return;
514
+ case 0:
515
+ *fourMer &= 252;
516
+ *fourMer += nucleotide;
517
+ }
518
+ }
519
+
520
+ static inline Descriptor *mergeDescriptors_pg(Descriptor * descr,
521
+ Coordinate destinationLength,
522
+ Descriptor * copy,
523
+ Coordinate sourceLength,
524
+ int wordLength)
525
+ {
526
+ Descriptor *readPtr, *writePtr;
527
+ Descriptor readCopy = 0;
528
+ int readOffset, writeOffset;
529
+ size_t arrayLength;
530
+ Coordinate newLength =
531
+ destinationLength + sourceLength + wordLength - 1;
532
+ Descriptor *new;
533
+ Coordinate index;
534
+
535
+ // Specify new array
536
+ arrayLength = newLength / 4;
537
+ if (newLength % 4)
538
+ arrayLength++;
539
+ new = callocOrExit(arrayLength, Descriptor);
540
+ for (index = 0; index < arrayLength; index++)
541
+ new[index] = 0;
542
+
543
+ // Copying first descriptor
544
+ readPtr = descr;
545
+ writePtr = new;
546
+ writeOffset = 0;
547
+ for (index = 0; index < destinationLength + wordLength - 1;
548
+ index++) {
549
+ (*writePtr) >>= 2;
550
+ if (writeOffset == 0)
551
+ readCopy = *readPtr;
552
+ (*writePtr) += (readCopy & 3) << 6;
553
+
554
+ /*switch ((readCopy & 3)) {
555
+ case ADENINE:
556
+ velvetLog("A%ld", index);
557
+ break;
558
+ case CYTOSINE:
559
+ velvetLog("C%ld", index);
560
+ break;
561
+ case GUANINE:
562
+ velvetLog("G%ld", index);
563
+ break;
564
+ case THYMINE:
565
+ velvetLog("T%ld", index);
566
+ break;
567
+ } */
568
+ readCopy >>= 2;
569
+
570
+ writeOffset++;
571
+ if (writeOffset == 4) {
572
+ writePtr++;
573
+ readPtr++;
574
+ writeOffset = 0;
575
+ }
576
+ }
577
+
578
+ //velvetLog("\n");
579
+
580
+ // Skipping initial k-1 letters in second descriptor
581
+ readPtr = &(copy[(wordLength - 1) / 4]);
582
+ readCopy = *readPtr;
583
+ readOffset = (wordLength - 1) % 4;
584
+ readCopy >>= (readOffset * 2);
585
+
586
+ // Going on copying second descriptor
587
+ for (index = 0; index < sourceLength; index++) {
588
+ (*writePtr) >>= 2;
589
+ if (readOffset == 0)
590
+ readCopy = *readPtr;
591
+ (*writePtr) += (readCopy & 3) << 6;
592
+ /*switch ((readCopy & 3)) {
593
+ case ADENINE:
594
+ velvetLog("A%ld", index);
595
+ break;
596
+ case CYTOSINE:
597
+ velvetLog("C%ld", index);
598
+ break;
599
+ case GUANINE:
600
+ velvetLog("G%ld", index);
601
+ break;
602
+ case THYMINE:
603
+ velvetLog("T%ld", index);
604
+ break;
605
+ default:
606
+ velvetLog("?%ld;", index);
607
+ } */
608
+ readCopy >>= 2;
609
+
610
+ writeOffset++;
611
+ if (writeOffset == 4) {
612
+ writePtr++;
613
+ writeOffset = 0;
614
+ }
615
+
616
+ readOffset++;
617
+ if (readOffset == 4) {
618
+ readPtr++;
619
+ readOffset = 0;
620
+ }
621
+ }
622
+
623
+ //velvetLog("\n");
624
+
625
+ if (writeOffset != 0) {
626
+ while (writeOffset != 4) {
627
+ (*writePtr) >>= 2;
628
+ writeOffset++;
629
+ }
630
+ }
631
+
632
+ return new;
633
+ }
634
+
635
+ static inline Descriptor *mergeDescriptorsH2H_pg(Descriptor * descr,
636
+ Coordinate
637
+ destinationLength,
638
+ Descriptor * copy,
639
+ Coordinate sourceLength,
640
+ int wordLength)
641
+ {
642
+ Descriptor *readPtr, *writePtr;
643
+ Descriptor readCopy;
644
+ int readOffset, writeOffset;
645
+ size_t arrayLength;
646
+ Coordinate newLength =
647
+ destinationLength + sourceLength + wordLength - 1;
648
+ Descriptor *new;
649
+ Coordinate index;
650
+
651
+ // Specify new array
652
+ arrayLength = newLength / 4;
653
+ if (newLength % 4)
654
+ arrayLength++;
655
+ new = callocOrExit(arrayLength, Descriptor);
656
+ for (index = 0; index < arrayLength; index++)
657
+ new[index] = 0;
658
+
659
+ // Copying first descriptor (including final (k-1)-mer)
660
+ readPtr = descr;
661
+ readCopy = *readPtr;
662
+ writePtr = new;
663
+ writeOffset = 0;
664
+ readOffset = 0;
665
+ for (index = 0; index < destinationLength + wordLength - 1;
666
+ index++) {
667
+ (*writePtr) >>= 2;
668
+ if (writeOffset == 0)
669
+ readCopy = *readPtr;
670
+ (*writePtr) += (readCopy & 3) << 6;
671
+ /*switch ((readCopy & 3)) {
672
+ case ADENINE:
673
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
674
+ break;
675
+ case CYTOSINE:
676
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
677
+ break;
678
+ case GUANINE:
679
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
680
+ break;
681
+ case THYMINE:
682
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
683
+ break;
684
+ default:
685
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
686
+ } */
687
+ readCopy >>= 2;
688
+
689
+ writeOffset++;
690
+ if (writeOffset == 4) {
691
+ writePtr++;
692
+ readPtr++;
693
+ writeOffset = 0;
694
+ }
695
+ }
696
+
697
+ //velvetLog("\n");
698
+
699
+ // Going to end of second descriptor
700
+ readPtr = &(copy[(sourceLength - 1) / 4]);
701
+ readCopy = *readPtr;
702
+ readOffset = (sourceLength - 1) % 4;
703
+ readCopy <<= ((3 - readOffset) * 2);
704
+
705
+ //velvetLog("Read copy %x\n", readCopy);
706
+
707
+ // Going on copying reverse complement of second descriptor
708
+ for (index = 0; index < sourceLength; index++) {
709
+ (*writePtr) >>= 2;
710
+ if (readOffset == 3)
711
+ readCopy = *readPtr;
712
+ #ifndef COLOR
713
+ (*writePtr) += 192 - (readCopy & 192);
714
+ #else
715
+ (*writePtr) += (readCopy & 192);
716
+ #endif
717
+ /*switch (3 - ((readCopy & 192) >> 6)) {
718
+ case ADENINE:
719
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
720
+ break;
721
+ case CYTOSINE:
722
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
723
+ break;
724
+ case GUANINE:
725
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
726
+ break;
727
+ case THYMINE:
728
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
729
+ break;
730
+ default:
731
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
732
+ } */
733
+ readCopy <<= 2;
734
+
735
+ writeOffset++;
736
+ if (writeOffset == 4) {
737
+ writePtr++;
738
+ writeOffset = 0;
739
+ }
740
+
741
+ readOffset--;
742
+ if (readOffset == -1) {
743
+ readPtr--;
744
+ readOffset = 3;
745
+ }
746
+ }
747
+
748
+ //velvetLog("\n");
749
+
750
+ if (writeOffset != 0) {
751
+ while (writeOffset != 4) {
752
+ (*writePtr) >>= 2;
753
+ writeOffset++;
754
+ }
755
+ }
756
+
757
+ return new;
758
+ }
759
+
760
+ static inline Descriptor *mergeDescriptorsF2F_pg(Descriptor * descr,
761
+ Coordinate
762
+ destinationLength,
763
+ Descriptor * copy,
764
+ Coordinate sourceLength,
765
+ int wordLength)
766
+ {
767
+ Descriptor *readPtr, *writePtr;
768
+ Descriptor readCopy;
769
+ int readOffset, writeOffset;
770
+ size_t arrayLength;
771
+ Coordinate newLength =
772
+ destinationLength + sourceLength + wordLength - 1;
773
+ Descriptor *new;
774
+ Coordinate index;
775
+
776
+ // Specify new array
777
+ arrayLength = newLength / 4;
778
+ if (newLength % 4)
779
+ arrayLength++;
780
+ new = callocOrExit(arrayLength, Descriptor);
781
+ for (index = 0; index < arrayLength; index++)
782
+ new[index] = 0;
783
+
784
+ writePtr = new;
785
+ writeOffset = 0;
786
+
787
+ // Going to end of first descriptor
788
+ readPtr = &(copy[(sourceLength + wordLength - 2) / 4]);
789
+ readCopy = *readPtr;
790
+ readOffset = (sourceLength + wordLength - 2) % 4;
791
+ readCopy <<= ((3 - readOffset) * 2);
792
+
793
+ // Copying reverse complement of first descriptor (minus final (k-1)-mer)
794
+ for (index = 0; index < sourceLength; index++) {
795
+ (*writePtr) >>= 2;
796
+ if (readOffset == 3)
797
+ readCopy = *readPtr;
798
+ #ifndef COLOR
799
+ (*writePtr) += 192 - (readCopy & 192);
800
+ #else
801
+ (*writePtr) += (readCopy & 192);
802
+ #endif
803
+ /*switch (3 - ((readCopy & 192) >> 6)) {
804
+ case ADENINE:
805
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
806
+ break;
807
+ case CYTOSINE:
808
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
809
+ break;
810
+ case GUANINE:
811
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
812
+ break;
813
+ case THYMINE:
814
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
815
+ break;
816
+ default:
817
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
818
+ } */
819
+ readCopy <<= 2;
820
+
821
+ writeOffset++;
822
+ if (writeOffset == 4) {
823
+ writePtr++;
824
+ writeOffset = 0;
825
+ }
826
+
827
+ readOffset--;
828
+ if (readOffset == -1) {
829
+ readPtr--;
830
+ readOffset = 3;
831
+ }
832
+ }
833
+
834
+ //velvetLog("\n");
835
+
836
+ // Going on copying second descriptor
837
+ readPtr = descr;
838
+ readCopy = *readPtr;
839
+ readOffset = 0;
840
+
841
+ for (index = 0; index < destinationLength + wordLength - 1;
842
+ index++) {
843
+ (*writePtr) >>= 2;
844
+ if (readOffset == 0)
845
+ readCopy = *readPtr;
846
+ (*writePtr) += (readCopy & 3) << 6;
847
+ /*switch ((readCopy & 3)) {
848
+ case ADENINE:
849
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
850
+ break;
851
+ case CYTOSINE:
852
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
853
+ break;
854
+ case GUANINE:
855
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
856
+ break;
857
+ case THYMINE:
858
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
859
+ break;
860
+ default:
861
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
862
+ } */
863
+ readCopy >>= 2;
864
+
865
+ writeOffset++;
866
+ if (writeOffset == 4) {
867
+ writePtr++;
868
+ writeOffset = 0;
869
+ }
870
+
871
+ readOffset++;
872
+ if (readOffset == 4) {
873
+ readPtr++;
874
+ readOffset = 0;
875
+ }
876
+ }
877
+
878
+ //velvetLog("\n");
879
+
880
+ if (writeOffset != 0) {
881
+ while (writeOffset != 4) {
882
+ (*writePtr) >>= 2;
883
+ writeOffset++;
884
+ }
885
+ }
886
+
887
+ return new;
888
+ }
889
+
890
+ void setMultiplicity_pg(PreArcI preArc, IDnum mult)
891
+ {
892
+ PREARC_FI2P (preArc)->multiplicity = mult;
893
+ }
894
+
895
+ static void updatePreArcData_pg(PreArcI preArc, IDnum oldPreNodeID,
896
+ IDnum newPreNodeID)
897
+ {
898
+ PreArc *preArcVal;
899
+
900
+ preArcVal = PREARC_FI2P (preArc);
901
+ if (preArcVal->preNodeIDLeft == oldPreNodeID)
902
+ preArcVal->preNodeIDLeft = newPreNodeID;
903
+ if (preArcVal->preNodeIDRight == oldPreNodeID)
904
+ preArcVal->preNodeIDRight = newPreNodeID;
905
+ }
906
+
907
+ // Reshuffles the preGraph->preNodes array to remove NULL pointers
908
+ // Beware that preNode IDs are accordingly reshuffled (all pointers remain valid though)
909
+ void renumberPreNodes_pg(PreGraph * preGraph)
910
+ {
911
+ IDnum preNodeIndex;
912
+ PreNode *currentPreNode, *destinationPreNode;
913
+ IDnum counter = 0;
914
+ IDnum preNodes = preGraph->preNodeCount;
915
+ IDnum newIndex;
916
+ IDnum preMarkerIndex;
917
+ PreMarker * preMarker;
918
+ PreArcI preArc;
919
+
920
+ velvetLog("Renumbering preNodes\n");
921
+ velvetLog("Initial preNode count %li\n", (long) preGraph->preNodeCount);
922
+
923
+ for (preNodeIndex = 1; preNodeIndex <= preNodes; preNodeIndex++) {
924
+ currentPreNode = &(preGraph->preNodes[preNodeIndex]);
925
+
926
+ if (currentPreNode->descriptor == NULL)
927
+ counter++;
928
+ else if (counter != 0) {
929
+ newIndex = preNodeIndex - counter;
930
+ destinationPreNode =
931
+ &(preGraph->preNodes[newIndex]);
932
+
933
+ destinationPreNode->preArcLeft =
934
+ currentPreNode->preArcLeft;
935
+ destinationPreNode->preArcRight =
936
+ currentPreNode->preArcRight;
937
+ destinationPreNode->descriptor =
938
+ currentPreNode->descriptor;
939
+ destinationPreNode->length =
940
+ currentPreNode->length;
941
+
942
+ for (preArc = getPreArc_pg(newIndex, preGraph);
943
+ preArc != NULL_IDX;
944
+ preArc = getNextPreArc_pg(preArc, newIndex))
945
+ updatePreArcData_pg(preArc, preNodeIndex,
946
+ newIndex);
947
+ for (preArc = getPreArc_pg(-newIndex, preGraph);
948
+ preArc != NULL_IDX;
949
+ preArc = getNextPreArc_pg(preArc, -newIndex))
950
+ updatePreArcData_pg(preArc, -preNodeIndex,
951
+ -newIndex);
952
+
953
+ if (preGraph->nodeReferenceMarkers) {
954
+ preGraph->nodeReferenceMarkerCounts[newIndex] = preGraph->nodeReferenceMarkerCounts[preNodeIndex];
955
+ preGraph->nodeReferenceMarkers[newIndex] = preGraph->nodeReferenceMarkers[preNodeIndex];
956
+
957
+ for (preMarkerIndex = 0; preMarkerIndex < preGraph->nodeReferenceMarkerCounts[newIndex]; preMarkerIndex++) {
958
+ preMarker = &(preGraph->nodeReferenceMarkers[newIndex][preMarkerIndex]);
959
+ if (preMarker->preNodeID == preNodeIndex)
960
+ preMarker->preNodeID = newIndex;
961
+ else if (preMarker->preNodeID == -preNodeIndex)
962
+ preMarker->preNodeID = -newIndex;
963
+ else
964
+ abort();
965
+ }
966
+ }
967
+ }
968
+ }
969
+
970
+ preGraph->preNodeCount -= counter;
971
+ preGraph->preNodes = reallocOrExit(preGraph->preNodes,
972
+ preGraph->preNodeCount +
973
+ 1, PreNode);
974
+
975
+ velvetLog("Destroyed %li preNodes\n", (long) counter);
976
+ }
977
+
978
+ // Allocate memory for an empty preGraph created with sequenceCount different sequences
979
+ PreGraph *emptyPreGraph_pg(IDnum sequenceCount, IDnum referenceCount, int wordLength, boolean double_strand)
980
+ {
981
+ PreGraph *newPreGraph = mallocOrExit(1, PreGraph);
982
+ newPreGraph->sequenceCount = sequenceCount;
983
+ newPreGraph->wordLength = wordLength;
984
+ newPreGraph->preNodeCount = 0;
985
+ newPreGraph->double_strand = double_strand;
986
+ newPreGraph->referenceCount = referenceCount;
987
+ newPreGraph->preNodes = NULL;
988
+ newPreGraph->nodeReferenceMarkerCounts = NULL;
989
+ newPreGraph->nodeReferenceMarkers = NULL;
990
+
991
+ #ifdef _OPENMP
992
+ preArcMemory = newAllocArrayArray(omp_get_max_threads(), sizeof(PreArc), "PreArc");
993
+ #endif
994
+
995
+ return newPreGraph;
996
+ }
997
+
998
+ static Descriptor *newDescriptor_pg(Coordinate length, SequencesReader *seqReadInfo,
999
+ Kmer * initialKmer, int wordLength)
1000
+ {
1001
+ char letter;
1002
+ Nucleotide nucleotide;
1003
+ Coordinate totalLength = length + wordLength - 1;
1004
+ size_t arrayLength = totalLength / 4;
1005
+ Descriptor *res;
1006
+ Coordinate index;
1007
+ Kmer kmerCopy;
1008
+
1009
+ if (totalLength % 4 > 0)
1010
+ arrayLength++;
1011
+
1012
+ res = callocOrExit(arrayLength, Descriptor);
1013
+
1014
+ copyKmers(&kmerCopy, initialKmer);
1015
+ for (index = wordLength - 2; index >= 0; index--)
1016
+ writeNucleotideInDescriptor_pg(popNucleotide(&kmerCopy), res,
1017
+ index);
1018
+
1019
+ for (index = wordLength - 1; index < totalLength; index++) {
1020
+ if (seqReadInfo->m_bIsBinary) {
1021
+ letter = **seqReadInfo->m_ppCurrString;
1022
+ *seqReadInfo->m_ppCurrString += 1; // increment the pointer
1023
+ } else {
1024
+ letter = getc(seqReadInfo->m_pFile);
1025
+ while (!isalpha(letter))
1026
+ letter = getc(seqReadInfo->m_pFile);
1027
+ }
1028
+ //velvetLog("%c", letter);
1029
+ switch (letter) {
1030
+ case 'N':
1031
+ case 'A':
1032
+ nucleotide = ADENINE;
1033
+ break;
1034
+ case 'C':
1035
+ nucleotide = CYTOSINE;
1036
+ break;
1037
+ case 'G':
1038
+ nucleotide = GUANINE;
1039
+ break;
1040
+ case 'T':
1041
+ nucleotide = THYMINE;
1042
+ break;
1043
+ default:
1044
+ fflush(stdout);
1045
+ abort();
1046
+ }
1047
+
1048
+ writeNucleotideInDescriptor_pg(nucleotide, res, index);
1049
+ pushNucleotide(initialKmer, nucleotide);
1050
+ }
1051
+
1052
+ //velvetLog(" ");
1053
+
1054
+ return res;
1055
+ }
1056
+
1057
+ void allocatePreNodeSpace_pg(PreGraph * preGraph, IDnum preNodeCount)
1058
+ {
1059
+ preGraph->preNodes = callocOrExit(preNodeCount + 1, PreNode);
1060
+ preGraph->preNodeCount = preNodeCount;
1061
+ }
1062
+
1063
+ void allocatePreMarkerCountSpace_pg(PreGraph * preGraph)
1064
+ {
1065
+ preGraph->nodeReferenceMarkerCounts = callocOrExit(preGraph->preNodeCount + 1, IDnum);
1066
+ preGraph->nodeReferenceMarkers = callocOrExit(preGraph->preNodeCount + 1, PreMarker *);
1067
+ }
1068
+
1069
+ void incrementNodeReferenceMarkerCount_pg(PreGraph * preGraph, IDnum preNodeID) {
1070
+ if (preNodeID < 0)
1071
+ preNodeID = -preNodeID;
1072
+
1073
+ preGraph->nodeReferenceMarkerCounts[preNodeID]++;
1074
+ }
1075
+
1076
+ void allocatePreMarkerSpace_pg(PreGraph * preGraph) {
1077
+ IDnum index;
1078
+
1079
+ if (!preGraph->nodeReferenceMarkers)
1080
+ return;
1081
+
1082
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
1083
+ if (preGraph->nodeReferenceMarkerCounts[index])
1084
+ preGraph->nodeReferenceMarkers[index] = callocOrExit(preGraph->nodeReferenceMarkerCounts[index], PreMarker);
1085
+ else
1086
+ preGraph->nodeReferenceMarkers[index] = NULL;
1087
+ preGraph->nodeReferenceMarkerCounts[index] = 0;
1088
+ }
1089
+ }
1090
+
1091
+ PreMarker * addPreMarker_pg(PreGraph * preGraph, IDnum nodeID, IDnum seqID, Coordinate * start, PreMarker * previous) {
1092
+ PreMarker * preMarker;
1093
+ IDnum positive_nodeID;
1094
+
1095
+ if (nodeID < 0)
1096
+ abort();
1097
+ else
1098
+ positive_nodeID = nodeID;
1099
+
1100
+ //printf("Adding preMarker %li\n", (long) *start);
1101
+
1102
+ preMarker = &(preGraph->nodeReferenceMarkers[positive_nodeID][(preGraph->nodeReferenceMarkerCounts[positive_nodeID])++]);
1103
+ preMarker->previous = previous;
1104
+ if (previous)
1105
+ previous->next = preMarker;
1106
+ preMarker->next = NULL;
1107
+ preMarker->referenceStart = *start;
1108
+ preMarker->length = preGraph->preNodes[positive_nodeID].length;
1109
+ preMarker->preNodeStart = 0;
1110
+ preMarker->preNodeID = nodeID;
1111
+ preMarker->referenceID = seqID;
1112
+
1113
+ *start += preMarker->length;
1114
+
1115
+ return preMarker;
1116
+ }
1117
+ void addPreNodeToPreGraph_pg(PreGraph * preGraph, Coordinate start,
1118
+ Coordinate finish, SequencesReader *seqReadInfo,
1119
+ Kmer * initialKmer, IDnum ID)
1120
+ {
1121
+ PreNode *newnd = &(preGraph->preNodes[ID]);
1122
+
1123
+ newnd->preArcLeft = NULL_IDX;
1124
+ newnd->preArcRight = NULL_IDX;
1125
+
1126
+ newnd->length = finish - start;
1127
+
1128
+ newnd->descriptor =
1129
+ newDescriptor_pg(newnd->length, seqReadInfo, initialKmer,
1130
+ preGraph->wordLength);
1131
+ }
1132
+
1133
+ static void exportPreNode_pg(FILE * outfile, PreNode * preNode, IDnum ID,
1134
+ int wordLength)
1135
+ {
1136
+ Coordinate index;
1137
+ Nucleotide nucleotide;
1138
+
1139
+ if (preNode == NULL)
1140
+ return;
1141
+
1142
+ velvetFprintf(outfile, "NODE\t%ld\t%lld\n", (long) ID, (long long) preNode->length);
1143
+
1144
+ if (preNode->length == 0) {
1145
+ velvetFprintf(outfile, "\n");
1146
+ return;
1147
+ }
1148
+
1149
+ for (index = 0; index < preNode->length + wordLength - 1; index++) {
1150
+ nucleotide =
1151
+ getNucleotideInDescriptor_pg(preNode->descriptor,
1152
+ index);
1153
+ switch (nucleotide) {
1154
+ case ADENINE:
1155
+ velvetFprintf(outfile, "A");
1156
+ break;
1157
+ case CYTOSINE:
1158
+ velvetFprintf(outfile, "C");
1159
+ break;
1160
+ case GUANINE:
1161
+ velvetFprintf(outfile, "G");
1162
+ break;
1163
+ case THYMINE:
1164
+ velvetFprintf(outfile, "T");
1165
+ break;
1166
+ }
1167
+ }
1168
+
1169
+ velvetFprintf(outfile, "\n");
1170
+ }
1171
+
1172
+ static void exportPreMarker(FILE * outfile, PreMarker* preMarker) {
1173
+ velvetFprintf(outfile, "%li\t%lli\t%lli\t%lli\n", (long) preMarker->preNodeID, (long long) preMarker->preNodeStart, (long long) preMarker->referenceStart, (long long) preMarker->length);
1174
+ }
1175
+
1176
+ static void exportPreReference_pg(FILE * outfile, IDnum refIndex, PreGraph * preGraph) {
1177
+ PreMarker * preMarker;
1178
+ IDnum nodeID, index;
1179
+
1180
+ velvetFprintf(outfile, "SEQ\t%li\n", (long) refIndex);
1181
+
1182
+ for (nodeID = 1; nodeID <= preGraph->preNodeCount; nodeID++) {
1183
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[nodeID]; index++) {
1184
+ preMarker = &(preGraph->nodeReferenceMarkers[nodeID][index]);
1185
+ if (preMarker->referenceID == refIndex && !preMarker->previous) {
1186
+ for (;preMarker;preMarker = preMarker->next) {
1187
+ exportPreMarker(outfile, preMarker);
1188
+ }
1189
+ }
1190
+ }
1191
+ }
1192
+ }
1193
+
1194
+ void exportPreGraph_pg(char *filename, PreGraph * preGraph)
1195
+ {
1196
+ IDnum index;
1197
+ FILE *outfile;
1198
+ PreNode *preNode;
1199
+ int wordLength = getWordLength_pg(preGraph);
1200
+
1201
+ if (preGraph == NULL) {
1202
+ return;
1203
+ }
1204
+
1205
+ outfile = fopen(filename, "w");
1206
+ if (outfile == NULL) {
1207
+ velvetLog("Couldn't open file, sorry\n");
1208
+ return;
1209
+ } else
1210
+ velvetLog("Writing into pregraph file %s...\n", filename);
1211
+
1212
+ // General data
1213
+ velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) preGraph->preNodeCount,
1214
+ (long) preGraph->sequenceCount, preGraph->wordLength, (short) preGraph->double_strand);
1215
+
1216
+ // PreNode info
1217
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
1218
+ preNode = getPreNodeInPreGraph_pg(preGraph, index);
1219
+ exportPreNode_pg(outfile, preNode, index, wordLength);
1220
+ }
1221
+
1222
+ // Reference sequence info
1223
+ for (index = 1; index <= preGraph->referenceCount; index++)
1224
+ exportPreReference_pg(outfile, index, preGraph);
1225
+
1226
+
1227
+ fclose(outfile);
1228
+ }
1229
+
1230
+ int getWordLength_pg(PreGraph * preGraph)
1231
+ {
1232
+ return preGraph->wordLength;
1233
+ }
1234
+
1235
+ boolean hasSinglePreArc_pg(IDnum preNodeID, PreGraph * preGraph)
1236
+ {
1237
+ IDnum ID = preNodeID;
1238
+ PreNode *preNode;
1239
+ PreArcI preArc;
1240
+
1241
+ if (ID < 0)
1242
+ ID = -ID;
1243
+
1244
+ preNode = &(preGraph->preNodes[ID]);
1245
+
1246
+ if (preNodeID > 0)
1247
+ preArc = preNode->preArcRight;
1248
+ else
1249
+ preArc = preNode->preArcLeft;
1250
+
1251
+ return (preArc != NULL_IDX
1252
+ && getNextPreArc_pg(preArc, preNodeID) == NULL_IDX);
1253
+ }
1254
+
1255
+ char simplePreArcCount_pg(IDnum preNodeID, PreGraph * preGraph)
1256
+ {
1257
+ PreNode *preNode;
1258
+ PreArcI preArc;
1259
+ char count = 0;
1260
+ IDnum ID = preNodeID;
1261
+
1262
+ if (ID < 0)
1263
+ ID = -ID;
1264
+
1265
+ preNode = &(preGraph->preNodes[ID]);
1266
+
1267
+ if (preNodeID > 0)
1268
+ preArc = preNode->preArcRight;
1269
+ else
1270
+ preArc = preNode->preArcLeft;
1271
+
1272
+ for (; preArc != NULL_IDX;
1273
+ preArc = getNextPreArc_pg(preArc, preNodeID))
1274
+ count++;
1275
+
1276
+ return count;
1277
+ }
1278
+
1279
+ boolean isLoop_pg(PreArcI preArc)
1280
+ {
1281
+ PreArc *preArcVal = PREARC_FI2P (preArc);
1282
+
1283
+ return (preArcVal->preNodeIDLeft == preArcVal->preNodeIDRight
1284
+ || preArcVal->preNodeIDLeft == -preArcVal->preNodeIDRight);
1285
+ }
1286
+
1287
+ void setPreNodeDescriptor_pg(Descriptor * descr, Coordinate length, IDnum preNodeID, PreGraph * preGraph) {
1288
+ PreNode * preNode;
1289
+
1290
+ if (preNodeID < 0)
1291
+ preNodeID = -preNodeID;
1292
+
1293
+ preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1294
+ free(preNode->descriptor);
1295
+ preNode->descriptor = descr;
1296
+ preNode->length = length;
1297
+ }
1298
+
1299
+ static void appendPositiveDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
1300
+ PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1301
+ Descriptor * readPtr = preNode->descriptor;
1302
+ Descriptor readCopy;
1303
+ int wordLength = getWordLength_pg(preGraph);
1304
+ Coordinate length = preNode->length;
1305
+ Coordinate index;
1306
+ int readOffset = 0;
1307
+
1308
+ if (initial) {
1309
+ index = 0;
1310
+ readPtr = preNode->descriptor;
1311
+ readCopy = *readPtr;
1312
+ readOffset = 0;
1313
+ } else {
1314
+ index = wordLength - 1;
1315
+ readPtr = &(preNode->descriptor[(wordLength - 1) / 4]);
1316
+ readCopy = *readPtr;
1317
+ readOffset = (wordLength - 1) % 4;
1318
+ readCopy >>= (readOffset * 2);
1319
+ }
1320
+
1321
+ for (; index < length + wordLength - 1; index++) {
1322
+ (**writePtr) >>= 2;
1323
+ if (readOffset == 0)
1324
+ readCopy = *readPtr;
1325
+ (**writePtr) += (readCopy & 3) << 6;
1326
+ readCopy >>= 2;
1327
+
1328
+ if (++(*writeOffset) == 4) {
1329
+ (*writePtr)++;
1330
+ *writeOffset = 0;
1331
+ }
1332
+
1333
+ if (++readOffset == 4) {
1334
+ readPtr++;
1335
+ readOffset = 0;
1336
+ }
1337
+ }
1338
+ }
1339
+
1340
+ static void appendNegativeDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
1341
+ PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1342
+ Descriptor * readPtr = preNode->descriptor;
1343
+ Descriptor readCopy;
1344
+ int wordLength = getWordLength_pg(preGraph);
1345
+ Coordinate length = preNode->length;
1346
+ Coordinate index;
1347
+ int readOffset;
1348
+
1349
+ if (initial)
1350
+ length += wordLength - 1;
1351
+
1352
+ readPtr = &(preNode->descriptor[(length - 1) / 4]);
1353
+ readCopy = *readPtr;
1354
+ readOffset = (length - 1) % 4;
1355
+ readCopy <<= ((3 - readOffset) * 2);
1356
+
1357
+ for (index = 0; index < length; index++) {
1358
+ (**writePtr) >>= 2;
1359
+ if (readOffset == 3)
1360
+ readCopy = *readPtr;
1361
+ #ifndef COLOR
1362
+ (**writePtr) += 192 - (readCopy & 192);
1363
+ #else
1364
+ (**writePtr) += (readCopy & 192);
1365
+ #endif
1366
+ readCopy <<= 2;
1367
+
1368
+ (*writeOffset)++;
1369
+ if (*writeOffset == 4) {
1370
+ (*writePtr)++;
1371
+ *writeOffset = 0;
1372
+ }
1373
+
1374
+ readOffset--;
1375
+ if (readOffset == -1) {
1376
+ readPtr--;
1377
+ readOffset = 3;
1378
+ }
1379
+ }
1380
+ }
1381
+
1382
+ void appendDescriptors_pg(Descriptor ** start, int * writeOffset, IDnum preNodeID, PreGraph* preGraph, boolean initial) {
1383
+ if (preNodeID > 0)
1384
+ appendPositiveDescriptor_pg(start, writeOffset, preNodeID, preGraph, initial);
1385
+ else
1386
+ appendNegativeDescriptor_pg(start, writeOffset, -preNodeID, preGraph, initial);
1387
+ }
1388
+
1389
+ boolean referenceMarkersAreActivated_pg(PreGraph * preGraph) {
1390
+ return preGraph->nodeReferenceMarkers != NULL;
1391
+ }
1392
+
1393
+ static void copyPreMarker(PreMarker * dest, PreMarker * source, IDnum preNodeAID, PreGraph * preGraph) {
1394
+ dest->previous = source->previous;
1395
+ dest->next = source->next;
1396
+
1397
+ dest->preNodeStart = source->preNodeStart;
1398
+ dest->length = source->length;
1399
+ dest->referenceID = source->referenceID;
1400
+ dest->referenceStart = source->referenceStart;
1401
+
1402
+ if (source->preNodeID > 0)
1403
+ dest->preNodeID = preNodeAID;
1404
+ else
1405
+ dest->preNodeID = -preNodeAID;
1406
+
1407
+ if (source->previous)
1408
+ source->previous->next = dest;
1409
+ if (source->next)
1410
+ source->next->previous = dest;
1411
+
1412
+ source->referenceID = 0;
1413
+ source->preNodeID = 0;
1414
+ source->previous = NULL;
1415
+ source->next = NULL;
1416
+ }
1417
+
1418
+ static PreMarker * reallocOrExitReferenceMarkers(PreGraph * preGraph, IDnum preNodeID, IDnum length) {
1419
+ PreMarker * array = callocOrExit(length, PreMarker);
1420
+ PreMarker * writer = array;
1421
+ PreMarker * reader = preGraph->nodeReferenceMarkers[preNodeID];
1422
+ IDnum index;
1423
+
1424
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[preNodeID]; index++) {
1425
+ copyPreMarker(writer, reader, preNodeID, preGraph);
1426
+ writer++;
1427
+ reader++;
1428
+ }
1429
+
1430
+ free(preGraph->nodeReferenceMarkers[preNodeID]);
1431
+
1432
+ return array;
1433
+ }
1434
+
1435
+ static void concatenateReferenceMarkers_H2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1436
+ IDnum index;
1437
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1438
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1439
+ Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
1440
+ PreMarker * markerA, *next, *markerB;
1441
+ IDnum counter = 0;
1442
+
1443
+ for (index = 0 ; index < countA; index++) {
1444
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1445
+
1446
+ if (markerA->preNodeID > 0)
1447
+ next = markerA->next;
1448
+ else
1449
+ next = markerA->previous;
1450
+
1451
+ if (!next)
1452
+ continue;
1453
+
1454
+ if (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
1455
+ continue;
1456
+ if (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)
1457
+ continue;
1458
+
1459
+ next->referenceID = 0;
1460
+ next->preNodeID = 0;
1461
+
1462
+ markerA->length += next->length;
1463
+ if (markerA->preNodeID > 0) {
1464
+ markerA->next = next->next;
1465
+ if (next->next)
1466
+ next->next->previous = markerA;
1467
+ } else {
1468
+ markerA->previous = next->previous;
1469
+ if (next->previous)
1470
+ next->previous->next = markerA;
1471
+ markerA->referenceStart = next->referenceStart;
1472
+ }
1473
+ next->next = NULL;
1474
+ next->previous = NULL;
1475
+ }
1476
+
1477
+ for (index = 0; index < countB; index++)
1478
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1479
+ counter++;
1480
+
1481
+ if (counter == 0)
1482
+ return;
1483
+
1484
+ if (countA)
1485
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1486
+ else
1487
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1488
+
1489
+ for (index = 0; index < countB; index++) {
1490
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1491
+ if (markerB->referenceID) {
1492
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1493
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1494
+ markerA->preNodeStart += lengthA;
1495
+ }
1496
+ }
1497
+
1498
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1499
+ }
1500
+
1501
+ static void concatenateReferenceMarkers_H2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1502
+ IDnum index;
1503
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1504
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1505
+ Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
1506
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1507
+ PreMarker * markerA, *next, *markerB;
1508
+ IDnum counter = 0;
1509
+
1510
+ for (index = 0 ; index < countA; index++) {
1511
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1512
+
1513
+ if (markerA->preNodeID > 0)
1514
+ next = markerA->next;
1515
+ else
1516
+ next = markerA->previous;
1517
+
1518
+
1519
+ if ((!next)
1520
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
1521
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID))
1522
+ continue;
1523
+
1524
+ next->referenceID = 0;
1525
+ next->preNodeID = 0;
1526
+
1527
+ markerA->length += next->length;
1528
+ if (markerA->preNodeID > 0) {
1529
+ markerA->next = next->next;
1530
+ if (next->next)
1531
+ next->next->previous = markerA;
1532
+ } else {
1533
+ markerA->previous = next->previous;
1534
+ if (next->previous)
1535
+ next->previous->next = markerA;
1536
+ markerA->referenceStart = next->referenceStart;
1537
+ }
1538
+ next->next = NULL;
1539
+ next->previous = NULL;
1540
+ }
1541
+
1542
+ for (index = 0; index < countB; index++)
1543
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1544
+ counter++;
1545
+
1546
+ if (counter == 0)
1547
+ return;
1548
+
1549
+ if (countA)
1550
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1551
+ else
1552
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1553
+
1554
+ for (index = 0; index < countB; index++) {
1555
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1556
+ if (markerB->referenceID) {
1557
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1558
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1559
+ markerA->preNodeID *= -1;
1560
+ markerA->preNodeStart = lengthA + lengthB - markerA->preNodeStart - markerA->length;
1561
+ }
1562
+ }
1563
+
1564
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1565
+ }
1566
+
1567
+ static void concatenateReferenceMarkers_T2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1568
+ IDnum index;
1569
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1570
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1571
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1572
+ PreMarker * markerA, *next, *markerB;
1573
+ IDnum counter = 0;
1574
+
1575
+ for (index = 0 ; index < countA; index++) {
1576
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1577
+
1578
+ if (markerA->preNodeID < 0)
1579
+ next = markerA->next;
1580
+ else
1581
+ next = markerA->previous;
1582
+
1583
+ if (!next
1584
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
1585
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID)) {
1586
+ markerA->preNodeStart += lengthB;
1587
+ continue;
1588
+ }
1589
+
1590
+ next->referenceID = 0;
1591
+ next->preNodeID = 0;
1592
+
1593
+ markerA->length += next->length;
1594
+ markerA->preNodeStart = lengthB - next->preNodeStart - next->length;
1595
+ if (markerA->preNodeID < 0) {
1596
+ markerA->next = next->next;
1597
+ if (next->next)
1598
+ next->next->previous = markerA;
1599
+ } else {
1600
+ markerA->previous = next->previous;
1601
+ if (next->previous)
1602
+ next->previous->next = markerA;
1603
+ markerA->referenceStart = next->referenceStart;
1604
+ }
1605
+ next->next = NULL;
1606
+ next->previous = NULL;
1607
+ }
1608
+
1609
+ for (index = 0; index < countB; index++)
1610
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1611
+ counter++;
1612
+
1613
+ if (counter == 0)
1614
+ return;
1615
+
1616
+ if (countA)
1617
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1618
+ else
1619
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1620
+
1621
+ for (index = 0; index < countB; index++) {
1622
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1623
+ if (markerB->referenceID) {
1624
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1625
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1626
+ markerA->preNodeID *= -1;
1627
+ markerA->preNodeStart = lengthB - markerA->preNodeStart - markerA->length;
1628
+ }
1629
+ }
1630
+
1631
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1632
+ }
1633
+
1634
+ static void concatenateReferenceMarkers_T2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1635
+ IDnum index;
1636
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1637
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1638
+ PreMarker * markerA, *next, *markerB;
1639
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1640
+ IDnum counter = 0;
1641
+
1642
+ for (index = 0 ; index < countA; index++) {
1643
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1644
+
1645
+ if (markerA->preNodeID < 0)
1646
+ next = markerA->next;
1647
+ else
1648
+ next = markerA->previous;
1649
+
1650
+ if (!next
1651
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
1652
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)) {
1653
+ markerA->preNodeStart += lengthB;
1654
+ continue;
1655
+ }
1656
+
1657
+ next->referenceID = 0;
1658
+ next->preNodeID = 0;
1659
+
1660
+ markerA->length += next->length;
1661
+ markerA->preNodeStart = next->preNodeStart;
1662
+ if (markerA->preNodeID < 0) {
1663
+ markerA->next = next->next;
1664
+ if (next->next)
1665
+ next->next->previous = markerA;
1666
+ } else {
1667
+ markerA->previous = next->previous;
1668
+ if (next->previous)
1669
+ next->previous->next = markerA;
1670
+ markerA->referenceStart = next->referenceStart;
1671
+ }
1672
+ next->next = NULL;
1673
+ next->previous = NULL;
1674
+ }
1675
+
1676
+ for (index = 0; index < countB; index++)
1677
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1678
+ counter++;
1679
+
1680
+ if (counter == 0)
1681
+ return;
1682
+
1683
+ if (countA)
1684
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1685
+ else
1686
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1687
+
1688
+ for (index = 0; index < countB; index++) {
1689
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1690
+ if (markerB->referenceID) {
1691
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1692
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1693
+ }
1694
+ }
1695
+
1696
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1697
+ }
1698
+
1699
+ void concatenateReferenceMarkers_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1700
+ if (!referenceMarkersAreActivated_pg(preGraph))
1701
+ return;
1702
+
1703
+ if (preNodeAID > 0 && preNodeBID > 0)
1704
+ concatenateReferenceMarkers_H2T_pg(preNodeAID, preNodeBID, preGraph, totalOffset);
1705
+ else if (preNodeAID > 0)
1706
+ concatenateReferenceMarkers_H2H_pg(preNodeAID, -preNodeBID, preGraph, totalOffset);
1707
+ else if (preNodeBID > 0)
1708
+ concatenateReferenceMarkers_T2T_pg(-preNodeAID, preNodeBID, preGraph, totalOffset);
1709
+ else
1710
+ concatenateReferenceMarkers_T2H_pg(-preNodeAID, -preNodeBID, preGraph, totalOffset);
1711
+ }
1712
+
1713
+ boolean hasPreMarkers(IDnum nodeID, PreGraph * preGraph) {
1714
+ if (nodeID < 0)
1715
+ nodeID = -nodeID;
1716
+ return preGraph->nodeReferenceMarkerCounts[nodeID] > 0;
1717
+ }