finishm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (554) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.gitmodules +3 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +31 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +59 -0
  8. data/Rakefile +51 -0
  9. data/VERSION +1 -0
  10. data/bin/assembly_visualiser +106 -0
  11. data/bin/check_primer_combinations.rb +73 -0
  12. data/bin/contig_joiner.rb +244 -0
  13. data/bin/contigs_against_assembly.rb +153 -0
  14. data/bin/finishm +143 -0
  15. data/bin/finishm_assembler +55 -0
  16. data/bin/finishm_gap_closer.rb +241 -0
  17. data/bin/kmer_abundance_file_tool.rb +49 -0
  18. data/bin/kmer_pattern_to_assembly.rb +377 -0
  19. data/bin/kmer_profile_finder.rb +92 -0
  20. data/bin/kmers_count_parse.d +52 -0
  21. data/bin/kmers_count_tabulate.d +123 -0
  22. data/bin/kmers_count_tabulate.rb +84 -0
  23. data/bin/pcr_result_parser.rb +108 -0
  24. data/bin/primer_finder.rb +119 -0
  25. data/bin/read_selection_by_kmer.d +174 -0
  26. data/bin/scaffold_by_pattern.rb +119 -0
  27. data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
  28. data/bin/scaffold_end_coverages.rb +69 -0
  29. data/bin/trail_validator.rb +84 -0
  30. data/ext/mkrf_conf.rb +56 -0
  31. data/ext/src/Makefile +140 -0
  32. data/ext/src/src/allocArray.c +305 -0
  33. data/ext/src/src/allocArray.h +86 -0
  34. data/ext/src/src/autoOpen.c +107 -0
  35. data/ext/src/src/autoOpen.h +18 -0
  36. data/ext/src/src/binarySequences.c +813 -0
  37. data/ext/src/src/binarySequences.h +125 -0
  38. data/ext/src/src/concatenatedGraph.c +233 -0
  39. data/ext/src/src/concatenatedGraph.h +30 -0
  40. data/ext/src/src/concatenatedPreGraph.c +262 -0
  41. data/ext/src/src/concatenatedPreGraph.h +29 -0
  42. data/ext/src/src/correctedGraph.c +2643 -0
  43. data/ext/src/src/correctedGraph.h +32 -0
  44. data/ext/src/src/dfib.c +509 -0
  45. data/ext/src/src/dfib.h +69 -0
  46. data/ext/src/src/dfibHeap.c +89 -0
  47. data/ext/src/src/dfibHeap.h +39 -0
  48. data/ext/src/src/dfibpriv.h +105 -0
  49. data/ext/src/src/fib.c +628 -0
  50. data/ext/src/src/fib.h +78 -0
  51. data/ext/src/src/fibHeap.c +79 -0
  52. data/ext/src/src/fibHeap.h +41 -0
  53. data/ext/src/src/fibpriv.h +110 -0
  54. data/ext/src/src/globals.h +154 -0
  55. data/ext/src/src/graph.c +3932 -0
  56. data/ext/src/src/graph.h +233 -0
  57. data/ext/src/src/graphReConstruction.c +1472 -0
  58. data/ext/src/src/graphReConstruction.h +30 -0
  59. data/ext/src/src/graphStats.c +2167 -0
  60. data/ext/src/src/graphStats.h +72 -0
  61. data/ext/src/src/graphStructures.h +52 -0
  62. data/ext/src/src/kmer.c +652 -0
  63. data/ext/src/src/kmer.h +73 -0
  64. data/ext/src/src/kmerOccurenceTable.c +236 -0
  65. data/ext/src/src/kmerOccurenceTable.h +44 -0
  66. data/ext/src/src/kseq.h +223 -0
  67. data/ext/src/src/locallyCorrectedGraph.c +557 -0
  68. data/ext/src/src/locallyCorrectedGraph.h +40 -0
  69. data/ext/src/src/passageMarker.c +677 -0
  70. data/ext/src/src/passageMarker.h +137 -0
  71. data/ext/src/src/preGraph.c +1717 -0
  72. data/ext/src/src/preGraph.h +106 -0
  73. data/ext/src/src/preGraphConstruction.c +990 -0
  74. data/ext/src/src/preGraphConstruction.h +26 -0
  75. data/ext/src/src/probe_node_finder.c +84 -0
  76. data/ext/src/src/probe_node_finder.h +6 -0
  77. data/ext/src/src/readCoherentGraph.c +557 -0
  78. data/ext/src/src/readCoherentGraph.h +30 -0
  79. data/ext/src/src/readSet.c +1734 -0
  80. data/ext/src/src/readSet.h +67 -0
  81. data/ext/src/src/readToNode.c +218 -0
  82. data/ext/src/src/readToNode.h +35 -0
  83. data/ext/src/src/recycleBin.c +199 -0
  84. data/ext/src/src/recycleBin.h +58 -0
  85. data/ext/src/src/roadMap.c +342 -0
  86. data/ext/src/src/roadMap.h +65 -0
  87. data/ext/src/src/run.c +318 -0
  88. data/ext/src/src/run.h +52 -0
  89. data/ext/src/src/run2.c +744 -0
  90. data/ext/src/src/runReadToNode.c +29 -0
  91. data/ext/src/src/scaffold.c +1876 -0
  92. data/ext/src/src/scaffold.h +64 -0
  93. data/ext/src/src/shortReadPairs.c +1243 -0
  94. data/ext/src/src/shortReadPairs.h +32 -0
  95. data/ext/src/src/splay.c +259 -0
  96. data/ext/src/src/splay.h +43 -0
  97. data/ext/src/src/splayTable.c +1315 -0
  98. data/ext/src/src/splayTable.h +31 -0
  99. data/ext/src/src/tightString.c +362 -0
  100. data/ext/src/src/tightString.h +82 -0
  101. data/ext/src/src/utility.c +199 -0
  102. data/ext/src/src/utility.h +98 -0
  103. data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
  104. data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
  105. data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
  106. data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
  107. data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
  108. data/ext/src/third-party/zlib-1.2.3/README +125 -0
  109. data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
  110. data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
  111. data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
  112. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
  113. data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
  114. data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
  115. data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
  116. data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
  117. data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
  118. data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
  119. data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
  120. data/ext/src/third-party/zlib-1.2.3/configure +459 -0
  121. data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
  122. data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
  123. data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
  124. data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
  125. data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
  126. data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
  127. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
  128. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
  129. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
  130. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
  131. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
  132. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
  133. data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
  134. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
  135. data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
  136. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
  137. data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
  138. data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
  139. data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
  140. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
  141. data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
  142. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
  143. data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
  144. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
  145. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
  146. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
  147. data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
  148. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
  149. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
  150. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
  151. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
  152. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
  153. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
  154. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
  155. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
  156. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
  157. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
  158. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
  159. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
  160. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
  161. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
  162. data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
  163. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
  164. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
  165. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
  166. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
  167. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
  168. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
  169. data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
  170. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
  171. data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
  172. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
  173. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
  174. data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
  175. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
  176. data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
  177. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
  178. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
  179. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
  180. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
  181. data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
  182. data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
  183. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
  184. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
  185. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
  186. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
  187. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
  188. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
  189. data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
  190. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
  191. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
  192. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
  193. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
  194. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
  195. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
  196. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
  197. data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
  198. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
  199. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
  200. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
  201. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
  202. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
  203. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
  204. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
  205. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
  206. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
  207. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
  208. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
  209. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
  210. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
  211. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
  212. data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
  213. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
  214. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
  215. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
  216. data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
  217. data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
  218. data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
  219. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
  220. data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
  221. data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
  222. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
  223. data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
  224. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
  225. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
  226. data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
  227. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
  228. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
  229. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
  230. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
  231. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
  232. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
  233. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
  234. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
  235. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
  236. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
  237. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
  238. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
  239. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
  240. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
  241. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
  242. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
  243. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
  244. data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
  245. data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
  246. data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
  247. data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
  248. data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
  249. data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
  250. data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
  251. data/ext/src/third-party/zlib-1.2.3/example +0 -0
  252. data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
  253. data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
  254. data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
  255. data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
  256. data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
  257. data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
  258. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
  259. data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
  260. data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
  261. data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
  262. data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
  263. data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
  264. data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
  265. data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
  266. data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
  267. data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
  268. data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
  269. data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
  270. data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
  271. data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
  272. data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
  273. data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
  274. data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
  275. data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
  276. data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
  277. data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
  278. data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
  279. data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
  280. data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
  281. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
  282. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
  283. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
  284. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
  285. data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
  286. data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
  287. data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
  288. data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
  289. data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
  290. data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
  291. data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
  292. data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
  293. data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
  294. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
  295. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
  296. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
  297. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
  298. data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
  299. data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
  300. data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
  301. data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
  302. data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
  303. data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
  304. data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
  305. data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
  306. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
  307. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
  308. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
  309. data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
  310. data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
  311. data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
  312. data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
  313. data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
  314. data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
  315. data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
  316. data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
  317. data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
  318. data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
  319. data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
  320. data/lib/assembly/a_b_visualiser.rb +169 -0
  321. data/lib/assembly/acyclic_connection_finder.rb +81 -0
  322. data/lib/assembly/all_orfs.rb +615 -0
  323. data/lib/assembly/bad_format_writer.rb +46 -0
  324. data/lib/assembly/bam_probe_read_selector.rb +48 -0
  325. data/lib/assembly/bubbly_assembler.rb +842 -0
  326. data/lib/assembly/c_probe_node_finder.rb +38 -0
  327. data/lib/assembly/connection_interpreter.rb +350 -0
  328. data/lib/assembly/contig_printer.rb +400 -0
  329. data/lib/assembly/coverage_based_graph_filter.rb +68 -0
  330. data/lib/assembly/depth_first_search.rb +63 -0
  331. data/lib/assembly/dijkstra.rb +216 -0
  332. data/lib/assembly/fluffer.rb +253 -0
  333. data/lib/assembly/graph_explorer.rb +85 -0
  334. data/lib/assembly/graph_generator.rb +315 -0
  335. data/lib/assembly/height_finder.rb +355 -0
  336. data/lib/assembly/hybrid_velvet_graph.rb +70 -0
  337. data/lib/assembly/input_genome.rb +182 -0
  338. data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
  339. data/lib/assembly/node_finder.rb +171 -0
  340. data/lib/assembly/oriented_node_trail.rb +507 -0
  341. data/lib/assembly/paired_end_assembler.rb +53 -0
  342. data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
  343. data/lib/assembly/probed_graph.rb +105 -0
  344. data/lib/assembly/read_input.rb +79 -0
  345. data/lib/assembly/read_to_node.rb +37 -0
  346. data/lib/assembly/scaffold_breaker.rb +126 -0
  347. data/lib/assembly/sequence_hasher.rb +71 -0
  348. data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
  349. data/lib/assembly/single_coherent_wanderer.rb +261 -0
  350. data/lib/assembly/single_ended_assembler.rb +441 -0
  351. data/lib/assembly/velvet_c_binding.rb +54 -0
  352. data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
  353. data/lib/external/VERSION +1 -0
  354. data/lib/finishm/assemble.rb +224 -0
  355. data/lib/finishm/explore.rb +217 -0
  356. data/lib/finishm/finisher.rb +303 -0
  357. data/lib/finishm/fluff.rb +122 -0
  358. data/lib/finishm/gapfiller.rb +325 -0
  359. data/lib/finishm/orfs_finder.rb +88 -0
  360. data/lib/finishm/path_counter.rb +90 -0
  361. data/lib/finishm/primers.rb +425 -0
  362. data/lib/finishm/primers_check.rb +176 -0
  363. data/lib/finishm/roundup.rb +344 -0
  364. data/lib/finishm/sequence.rb +142 -0
  365. data/lib/finishm/visualise.rb +430 -0
  366. data/lib/finishm/wander.rb +270 -0
  367. data/lib/kmer_abundance_pattern.rb +79 -0
  368. data/lib/kmer_multi_abundance_file.rb +48 -0
  369. data/lib/oligo_designer.rb +88 -0
  370. data/lib/priner.rb +66 -0
  371. data/spec/acyclic_connection_finder_spec.rb +551 -0
  372. data/spec/all_orfs_spec.rb +443 -0
  373. data/spec/assemble_spec.rb +186 -0
  374. data/spec/bubbly_assembler_spec.rb +707 -0
  375. data/spec/c_node_finder_spec.rb +58 -0
  376. data/spec/connection_interpreter_spec.rb +284 -0
  377. data/spec/contig_printer_spec.rb +291 -0
  378. data/spec/coverage_based_graph_filter_spec.rb +102 -0
  379. data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
  380. data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
  381. data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
  382. data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
  383. data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
  384. data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
  385. data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
  386. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
  387. data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
  388. data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
  389. data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
  390. data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
  391. data/spec/data/assembly_visualiser/Graph +46 -0
  392. data/spec/data/assembly_visualiser/start_kmers1 +2 -0
  393. data/spec/data/bands.csv +1 -0
  394. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
  395. data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
  396. data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
  397. data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
  398. data/spec/data/c_probe_node_finder/1/Log +756 -0
  399. data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
  400. data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
  401. data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
  402. data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
  403. data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
  404. data/spec/data/contig_printer/1/contigs.fa +4 -0
  405. data/spec/data/contig_printer/1/seq.fa +2408 -0
  406. data/spec/data/contig_printer/1/seq.fa.svg +153 -0
  407. data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
  408. data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
  409. data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
  410. data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
  411. data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
  412. data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
  413. data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
  414. data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
  415. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
  416. data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
  417. data/spec/data/contig_printer/1/seq.node12.fa +4 -0
  418. data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
  419. data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
  420. data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
  421. data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
  422. data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
  423. data/spec/data/explore/1/a.fa +2 -0
  424. data/spec/data/explore/1/seq1_and_a.fa +3 -0
  425. data/spec/data/explore/1/seq2.fa +2 -0
  426. data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
  427. data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
  428. data/spec/data/fluff/1/seq1.fa +2 -0
  429. data/spec/data/fluff/1/seq2.fa +2 -0
  430. data/spec/data/gapfilling/1/reads.fa +171 -0
  431. data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
  432. data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
  433. data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
  434. data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
  435. data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
  436. data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
  437. data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
  438. data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
  439. data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
  440. data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
  441. data/spec/data/gapfilling/2/reference.fa +2 -0
  442. data/spec/data/gapfilling/2/reference_part1.fa +4 -0
  443. data/spec/data/gapfilling/2/reference_part2.fa +4 -0
  444. data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
  445. data/spec/data/gapfilling/2/with_gaps.fa +4 -0
  446. data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
  447. data/spec/data/gapfilling/3/reads.fa.gz +0 -0
  448. data/spec/data/gapfilling/3/reference_part1.fa +4 -0
  449. data/spec/data/gapfilling/3/reference_part2.fa +4 -0
  450. data/spec/data/gapfilling/3/with_gaps.fa +4 -0
  451. data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
  452. data/spec/data/gapfilling/4/reads.fa.gz +0 -0
  453. data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
  454. data/spec/data/gapfilling/5/answer.fna +2 -0
  455. data/spec/data/gapfilling/5/gappy.fna +2 -0
  456. data/spec/data/gapfilling/5/reads.fa +17961 -0
  457. data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
  458. data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
  459. data/spec/data/gapfilling/6/random1.fa +28 -0
  460. data/spec/data/gapfilling/6/random2.fa +28 -0
  461. data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
  462. data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
  463. data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
  464. data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
  465. data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
  466. data/spec/data/kmers_count1.csv +2 -0
  467. data/spec/data/kmers_count2.csv +3 -0
  468. data/spec/data/out +3 -0
  469. data/spec/data/positive_latching_pair.fa +2 -0
  470. data/spec/data/primers.csv +4 -0
  471. data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
  472. data/spec/data/read_selection_by_kmer/input.fasta +6 -0
  473. data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
  474. data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
  475. data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
  476. data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
  477. data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
  478. data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
  479. data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
  480. data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
  481. data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
  482. data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
  483. data/spec/data/t/details.txt +5 -0
  484. data/spec/data/t/details.txt.srt +5 -0
  485. data/spec/data/t/location.txt +3 -0
  486. data/spec/data/t/location.txt.srt +3 -0
  487. data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
  488. data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
  489. data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
  490. data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
  491. data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
  492. data/spec/data/tweak/3_variant/answer.fa +2 -0
  493. data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
  494. data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
  495. data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
  496. data/spec/data/velvet_test_trails/Assem/Graph +17 -0
  497. data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
  498. data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
  499. data/spec/data/velvet_test_trails/Assem/Log +35 -0
  500. data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
  501. data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
  502. data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
  503. data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
  504. data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
  505. data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
  506. data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
  507. data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
  508. data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
  509. data/spec/data/velvet_test_trails/read1.fa +2 -0
  510. data/spec/data/velvet_test_trails/reads.fa +50 -0
  511. data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
  512. data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
  513. data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
  514. data/spec/data/visualise/1/LastGraph +6695 -0
  515. data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
  516. data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
  517. data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
  518. data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
  519. data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
  520. data/spec/data/wander/1/random1.fa +2 -0
  521. data/spec/data/wander/1/random1.sammy.fa +804 -0
  522. data/spec/depth_first_search_spec.rb +190 -0
  523. data/spec/dijkstra_spec.rb +143 -0
  524. data/spec/explore_spec.rb +29 -0
  525. data/spec/fluffer_spec.rb +155 -0
  526. data/spec/gapfiller_spec.rb +107 -0
  527. data/spec/graph_explorer_spec.rb +475 -0
  528. data/spec/graph_generator_spec.rb +99 -0
  529. data/spec/height_finder_spec.rb +306 -0
  530. data/spec/kmer_abundance_pattern_spec.rb +56 -0
  531. data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
  532. data/spec/kmer_profile_finder_spec.rb +38 -0
  533. data/spec/kmers_count_tabulate_spec.rb +120 -0
  534. data/spec/oriented_node_trail_spec.rb +221 -0
  535. data/spec/paired_end_neighbours_spec.rb +126 -0
  536. data/spec/paths_between_nodes_spec.rb +349 -0
  537. data/spec/priner_spec.rb +7 -0
  538. data/spec/read_input_spec.rb +23 -0
  539. data/spec/read_selection_by_kmer_spec.rb +166 -0
  540. data/spec/read_to_node_spec.rb +35 -0
  541. data/spec/roundup_spec.rb +366 -0
  542. data/spec/scaffold_breaker_spec.rb +144 -0
  543. data/spec/sequence_spec.rb +43 -0
  544. data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
  545. data/spec/single_coherent_wanderer_spec.rb +120 -0
  546. data/spec/single_ended_assembler_spec.rb +398 -0
  547. data/spec/spec_helper.rb +310 -0
  548. data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
  549. data/spec/visualise_spec.rb +105 -0
  550. data/spec/wander_spec.rb +119 -0
  551. data/spec/watch_for_changes.sh +16 -0
  552. data/validation/fasta_compare.rb +72 -0
  553. data/validation/gapfill_simulate_perfect.rb +108 -0
  554. metadata +899 -0
@@ -0,0 +1,137 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #ifndef _PASSAGEMARKER_H_
22
+ #define _PASSAGEMARKER_H_
23
+
24
+ struct passageList_st {
25
+ PassageMarkerI marker;
26
+ PassageMarkerList *next;
27
+ } ATTRIBUTE_PACKED;
28
+
29
+ ///////////////////////////////////////////////////////////////////
30
+ // PassageMarker lists
31
+ ///////////////////////////////////////////////////////////////////
32
+ // You can always malloc a PassaegMarkerList but these routines manage the
33
+ // memory for you, thus avoiding fragmentation
34
+ PassageMarkerList *newPassageMarkerList(PassageMarkerI marker,
35
+ PassageMarkerList * next);
36
+
37
+ void deallocatePassageMarkerList(PassageMarkerList * list);
38
+
39
+ ///////////////////////////////////////////////////////////////////
40
+ // Creators/Destructors
41
+ ///////////////////////////////////////////////////////////////////
42
+ PassageMarkerI addPassageMarker(IDnum sequenceID, Coordinate start,
43
+ Node * node);
44
+
45
+ PassageMarkerI addUncertainPassageMarker(IDnum sequenceID, Node * node);
46
+
47
+ PassageMarkerI newPassageMarker(IDnum seqID, Coordinate start,
48
+ Coordinate finish, Coordinate startOffset,
49
+ Coordinate finishOffset);
50
+
51
+ // Deallocates but also removes all pointers towards that structure
52
+ void destroyPassageMarker(PassageMarkerI marker);
53
+ void destroyAllPassageMarkers();
54
+
55
+ ///////////////////////////////////////////////////////////////////
56
+ // Node
57
+ ///////////////////////////////////////////////////////////////////
58
+
59
+ // Current node
60
+ Node *getNode(PassageMarkerI marker);
61
+
62
+ // Yank out of current node
63
+ void extractPassageMarker(PassageMarkerI marker);
64
+
65
+ // Insert into a node
66
+ void transposePassageMarker(PassageMarkerI marker, Node * destination);
67
+
68
+ ///////////////////////////////////////////////////////////////////
69
+ // General Info
70
+ ///////////////////////////////////////////////////////////////////
71
+ // Export into file
72
+ void exportMarker(FILE * outfile, PassageMarkerI marker,
73
+ TightString * sequences, int wordLength);
74
+
75
+ // General info for debugging
76
+ char *readPassageMarker(PassageMarkerI marker);
77
+
78
+ // Sequence ID associated to the passage marker
79
+ IDnum getPassageMarkerSequenceID(PassageMarkerI marker);
80
+ IDnum getAbsolutePassMarkerSeqID(PassageMarkerI marker);
81
+ int passageMarkerDirection(PassageMarkerI marker);
82
+
83
+ // Coordinates
84
+ Coordinate getPassageMarkerStart(PassageMarkerI marker);
85
+ void setPassageMarkerStart(PassageMarkerI marker, Coordinate start);
86
+ Coordinate getPassageMarkerFinish(PassageMarkerI marker);
87
+ void setPassageMarkerFinish(PassageMarkerI marker, Coordinate finish);
88
+ Coordinate getPassageMarkerLength(PassageMarkerI marker);
89
+
90
+ // Offsets
91
+ Coordinate getStartOffset(PassageMarkerI marker);
92
+ void setStartOffset(PassageMarkerI marker, Coordinate offset);
93
+ void incrementStartOffset(PassageMarkerI marker, Coordinate offset);
94
+ Coordinate getFinishOffset(PassageMarkerI marker);
95
+ void setFinishOffset(PassageMarkerI marker, Coordinate offset);
96
+ void incrementFinishOffset(PassageMarkerI marker, Coordinate offset);
97
+
98
+ // Status
99
+ void setPassageMarkerStatus(PassageMarkerI marker, boolean status);
100
+ boolean getPassageMarkerStatus(PassageMarkerI marker);
101
+
102
+ ///////////////////////////////////////////////////////////////////
103
+ // Marker Sequences
104
+ ///////////////////////////////////////////////////////////////////
105
+
106
+ // Corresponding marker of reverse complement sequence
107
+ PassageMarkerI getTwinMarker(PassageMarkerI marker);
108
+
109
+ // Within a node
110
+ PassageMarkerI getNextInNode(PassageMarkerI marker);
111
+ void setNextInNode(PassageMarkerI marker, PassageMarkerI next);
112
+ void setTopOfTheNode(PassageMarkerI marker);
113
+
114
+ // Within a sequence
115
+ PassageMarkerI getNextInSequence(PassageMarkerI marker);
116
+ void setNextInSequence(PassageMarkerI previous, PassageMarkerI next);
117
+ PassageMarkerI getPreviousInSequence(PassageMarkerI marker);
118
+ void setPreviousInSequence(PassageMarkerI previous, PassageMarkerI next);
119
+ void connectPassageMarkers(PassageMarkerI previous, PassageMarkerI next,
120
+ Graph * graph);
121
+
122
+ // End of read chains
123
+ boolean isTerminal(PassageMarkerI marker);
124
+ boolean isInitial(PassageMarkerI marker);
125
+
126
+ // Checks whether the node of the next marker is the one given in parameter
127
+ boolean isDestinationToMarker(PassageMarkerI marker, Node * node);
128
+
129
+ // Bypasses the middle marker
130
+ void disconnectNextPassageMarker(PassageMarkerI marker, Graph * graph);
131
+ void deleteNextPassageMarker(PassageMarkerI marker, Graph * graph);
132
+
133
+ // Merge two markers (cf concatenateGraph())
134
+ void concatenatePassageMarkers(PassageMarkerI marker,
135
+ PassageMarkerI nextMarker);
136
+
137
+ #endif
@@ -0,0 +1,1717 @@
1
+ /*
2
+ Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3
+
4
+ This file is part of Velvet.
5
+
6
+ Velvet is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation; either version 2 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Velvet is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with Velvet; if not, write to the Free Software
18
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+
20
+ */
21
+ #include <stdlib.h>
22
+ #include <stdio.h>
23
+ #include <string.h>
24
+ #include <ctype.h>
25
+
26
+ #ifdef _OPENMP
27
+ #include <omp.h>
28
+ #endif
29
+
30
+ #include "globals.h"
31
+ #include "allocArray.h"
32
+ #include "preGraph.h"
33
+ #include "recycleBin.h"
34
+ #include "tightString.h"
35
+ #include "run.h"
36
+ #include "utility.h"
37
+
38
+ #define ADENINE 0
39
+ #define CYTOSINE 1
40
+ #define GUANINE 2
41
+ #define THYMINE 3
42
+
43
+ struct preMarker_st {
44
+ PreMarker * previous;
45
+ PreMarker * next;
46
+ IDnum referenceStart;
47
+ IDnum preNodeStart;
48
+ IDnum length;
49
+ IDnum referenceID;
50
+ IDnum preNodeID; /* SF TODO only the sign seems to matter. Could replace with char or bit field */
51
+ } ATTRIBUTE_PACKED;
52
+
53
+ typedef struct preArc_st PreArc;
54
+
55
+ struct preArc_st {
56
+ PreArcI nextLeft; /* Index of the previous PreArc */
57
+ PreArcI nextRight; /* Index of the next PreArc */
58
+ IDnum multiplicity;
59
+ IDnum preNodeIDLeft;
60
+ IDnum preNodeIDRight;
61
+ } ATTRIBUTE_PACKED;
62
+
63
+ struct preNode_st {
64
+ PreArcI preArcLeft;
65
+ PreArcI preArcRight;
66
+ Descriptor *descriptor;
67
+ IDnum length;
68
+ } ATTRIBUTE_PACKED;
69
+
70
+ struct preGraph_st {
71
+ PreNode *preNodes;
72
+ IDnum * nodeReferenceMarkerCounts;
73
+ PreMarker ** nodeReferenceMarkers;
74
+ IDnum sequenceCount;
75
+ IDnum referenceCount;
76
+ IDnum preNodeCount;
77
+ int wordLength;
78
+ boolean double_strand;
79
+ };
80
+
81
+ static AllocArray *preArcMemory = NULL;
82
+
83
+ DECLARE_FAST_ACCESSORS(PREARC, PreArc, preArcMemory)
84
+
85
+ PreArcI allocatePreArc_pg()
86
+ {
87
+ #ifdef _OPENMP
88
+ return allocArrayArrayAllocate (preArcMemory);
89
+ #else
90
+ if (preArcMemory == NULL)
91
+ preArcMemory = newAllocArray(sizeof(PreArc), "PreArc");
92
+ return allocArrayAllocate (preArcMemory);
93
+ #endif
94
+
95
+ }
96
+
97
+ void deallocatePreArc_pg(PreArcI preArc)
98
+ {
99
+ #ifdef _OPENMP
100
+ allocArrayArrayFree (preArcMemory, preArc);
101
+ #else
102
+ allocArrayFree (preArcMemory, preArc);
103
+ #endif
104
+ }
105
+
106
+ // Returns the length of the preNode's descriptor list
107
+ Coordinate getPreNodeLength_pg(IDnum preNodeID, PreGraph * preGraph)
108
+ {
109
+ IDnum ID = preNodeID;
110
+
111
+ if (ID < 0)
112
+ ID = -ID;
113
+
114
+ return (preGraph->preNodes[ID]).length;
115
+ }
116
+
117
+ // Returns the number of preNodes in the preGraph
118
+ IDnum preNodeCount_pg(PreGraph * preGraph)
119
+ {
120
+ return preGraph->preNodeCount;
121
+ }
122
+
123
+ // returns the number of sequences used to buid the preGraph
124
+ IDnum sequenceCount_pg(PreGraph * preGraph)
125
+ {
126
+ return preGraph->sequenceCount;
127
+ }
128
+
129
+ PreArcI getPreArcBetweenPreNodes_pg(IDnum originPreNodeID,
130
+ IDnum destinationPreNodeID,
131
+ PreGraph * preGraph)
132
+ {
133
+ PreArcI preArc;
134
+
135
+ if (originPreNodeID == 0 || destinationPreNodeID == 0) {
136
+ return NULL_IDX;
137
+ }
138
+
139
+ for (preArc = getPreArc_pg(originPreNodeID, preGraph);
140
+ preArc != NULL_IDX;
141
+ preArc = getNextPreArc_pg(preArc, originPreNodeID)) {
142
+ if (getDestination_pg(preArc, originPreNodeID) ==
143
+ destinationPreNodeID) {
144
+ return preArc;
145
+ }
146
+ }
147
+
148
+ return NULL_IDX;
149
+ }
150
+
151
+ static void addPreArcToPreNode_pg(PreArcI preArc, IDnum preNodeID,
152
+ PreGraph * preGraph)
153
+ {
154
+ IDnum ID = preNodeID;
155
+ PreNode *preNode;
156
+ PreArcI *preArcPtr;
157
+ PreArc *preArcVal;
158
+
159
+ if (ID < 0)
160
+ ID = -ID;
161
+
162
+ preNode = &(preGraph->preNodes[ID]);
163
+
164
+ if (preNodeID > 0)
165
+ preArcPtr = &(preNode->preArcRight);
166
+ else
167
+ preArcPtr = &(preNode->preArcLeft);
168
+
169
+ preArcVal = PREARC_I2P (preArc);
170
+ preArcVal = PREARC_I2P (preArc);
171
+
172
+ if (preNodeID == preArcVal->preNodeIDLeft) {
173
+ preArcVal->nextLeft = *preArcPtr;
174
+ *preArcPtr = preArc;
175
+ }
176
+
177
+ if (preNodeID == preArcVal->preNodeIDRight) {
178
+ preArcVal->nextRight = *preArcPtr;
179
+ *preArcPtr = preArc;
180
+ }
181
+ }
182
+
183
+ // Creates an preArc from preNode origin to preNode destination.
184
+ // If this preArc already exists, increments its multiplicity by 1.
185
+ PreArcI createPreArc_pg(IDnum originPreNodeID, IDnum destinationPreNodeID,
186
+ PreGraph * preGraph)
187
+ {
188
+ PreArcI preArc;
189
+ PreArc *preArcVal;
190
+
191
+
192
+ if (originPreNodeID == 0 || destinationPreNodeID == 0)
193
+ return NULL_IDX;
194
+
195
+ preArc =
196
+ getPreArcBetweenPreNodes_pg(originPreNodeID,
197
+ destinationPreNodeID, preGraph);
198
+
199
+ if (preArc != NULL_IDX) {
200
+ PREARC_FI2P (preArc)->multiplicity++;
201
+ if (destinationPreNodeID == -originPreNodeID)
202
+ PREARC_FI2P (preArc)->multiplicity++;
203
+ return preArc;
204
+ }
205
+ // If not found
206
+ preArc = allocatePreArc_pg();
207
+ preArcVal = PREARC_FI2P (preArc);
208
+ preArcVal->preNodeIDLeft = originPreNodeID;
209
+ preArcVal->preNodeIDRight = -destinationPreNodeID;
210
+ preArcVal->multiplicity = 1;
211
+
212
+ addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
213
+
214
+ // Hairpin case
215
+ if (destinationPreNodeID == -originPreNodeID) {
216
+ preArcVal->multiplicity++;
217
+ return preArc;
218
+ }
219
+
220
+ addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
221
+
222
+ return preArc;
223
+ }
224
+
225
+ void createAnalogousPreArc_pg(IDnum originPreNodeID,
226
+ IDnum destinationPreNodeID,
227
+ PreArcI refPreArc, PreGraph * preGraph)
228
+ {
229
+ PreArcI preArc;
230
+ PreArc *preArcVal;
231
+
232
+ if (originPreNodeID == 0 || destinationPreNodeID == 0)
233
+ return;
234
+
235
+ preArc =
236
+ getPreArcBetweenPreNodes_pg(originPreNodeID,
237
+ destinationPreNodeID, preGraph);
238
+
239
+ if (preArc != NULL_IDX) {
240
+ PREARC_FI2P (preArc)->multiplicity += PREARC_FI2P (refPreArc)->multiplicity;
241
+ return;
242
+ }
243
+ // If not found
244
+ preArc = allocatePreArc_pg();
245
+ preArcVal = PREARC_FI2P (preArc);
246
+ preArcVal->preNodeIDLeft = originPreNodeID;
247
+ preArcVal->preNodeIDRight = -destinationPreNodeID;
248
+ preArcVal->multiplicity = PREARC_FI2P (refPreArc)->multiplicity;
249
+
250
+ addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
251
+
252
+ // Hairpin case
253
+ if (destinationPreNodeID == -originPreNodeID)
254
+ return;
255
+
256
+ addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
257
+ }
258
+
259
+ static void setNextPreArc_pg(PreArcI preArc, IDnum preNodeID,
260
+ PreArcI nextPreArc)
261
+ {
262
+ PreArc *preArcVal;
263
+
264
+ preArcVal = PREARC_FI2P (preArc);
265
+ if (preNodeID == preArcVal->preNodeIDLeft)
266
+ preArcVal->nextLeft = nextPreArc;
267
+ if (preNodeID == preArcVal->preNodeIDRight)
268
+ preArcVal->nextRight = nextPreArc;
269
+ }
270
+
271
+ void removePreArcFromList_pg(PreArcI preArc, IDnum preNodeID,
272
+ PreGraph * preGraph)
273
+ {
274
+ IDnum ID = preNodeID;
275
+ PreNode *preNode;
276
+ PreArcI *preArcPtr;
277
+ PreArcI tempPreArc;
278
+
279
+ if (ID < 0)
280
+ ID = -ID;
281
+
282
+ preNode = &(preGraph->preNodes[ID]);
283
+
284
+ if (preNodeID > 0)
285
+ preArcPtr = &(preNode->preArcRight);
286
+ else
287
+ preArcPtr = &(preNode->preArcLeft);
288
+
289
+ if (*preArcPtr == preArc) {
290
+ *preArcPtr = getNextPreArc_pg(preArc, preNodeID);
291
+ return;
292
+ }
293
+
294
+ for (tempPreArc = *preArcPtr; tempPreArc != NULL_IDX;
295
+ tempPreArc = getNextPreArc_pg(tempPreArc, preNodeID))
296
+ if (getNextPreArc_pg(tempPreArc, preNodeID) == preArc)
297
+ setNextPreArc_pg(tempPreArc, preNodeID,
298
+ getNextPreArc_pg(preArc,
299
+ preNodeID));
300
+ }
301
+
302
+ void destroyPreArc_pg(PreArcI preArc, PreGraph * preGraph)
303
+ {
304
+ IDnum leftID, rightID;
305
+ PreArc *preArcVal;
306
+
307
+ if (preArc == NULL_IDX)
308
+ return;
309
+
310
+ preArcVal = PREARC_FI2P (preArc);
311
+ leftID = preArcVal->preNodeIDLeft;
312
+ rightID = preArcVal->preNodeIDRight;
313
+
314
+ // Removing preArc from list
315
+ removePreArcFromList_pg(preArc, leftID, preGraph);
316
+
317
+ // Removing preArc's twin from list
318
+ if (rightID != leftID)
319
+ removePreArcFromList_pg(preArc, rightID, preGraph);
320
+
321
+ deallocatePreArc_pg(preArc);
322
+ }
323
+
324
+ void destroyPreNode_pg(IDnum preNodeID, PreGraph * preGraph)
325
+ {
326
+ PreNode *preNode;
327
+ IDnum ID = preNodeID;
328
+ IDnum index;
329
+ PreMarker * preMarker;
330
+
331
+ //velvetLog("Destroying %ld\n", (long) preNodeID);
332
+
333
+ if (ID < 0)
334
+ ID = -ID;
335
+
336
+ preNode = &(preGraph->preNodes[ID]);
337
+
338
+ // PreNode preArcs:
339
+ while (preNode->preArcLeft != NULL_IDX)
340
+ destroyPreArc_pg(preNode->preArcLeft, preGraph);
341
+ while (preNode->preArcRight != NULL_IDX)
342
+ destroyPreArc_pg(preNode->preArcRight, preGraph);
343
+
344
+ // PreMarkers
345
+ if (preGraph->nodeReferenceMarkers) {
346
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[ID]; index++) {
347
+ preMarker = &(preGraph->nodeReferenceMarkers[ID][index]);
348
+ if (preMarker->previous != NULL)
349
+ preMarker->previous->next = NULL;
350
+ if (preMarker->next != NULL)
351
+ preMarker->next->previous = NULL;
352
+ preMarker->preNodeID = 0;
353
+ preMarker->referenceID = 0;
354
+ }
355
+ if (preGraph->nodeReferenceMarkers[ID])
356
+ free(preGraph->nodeReferenceMarkers[ID]);
357
+ preGraph->nodeReferenceMarkers[ID] = NULL;
358
+ preGraph->nodeReferenceMarkerCounts[ID] = 0;
359
+ }
360
+
361
+ // Descriptors
362
+ free(preNode->descriptor);
363
+
364
+ // Flag as destroyed
365
+ preNode->descriptor = NULL;
366
+ }
367
+
368
+ void destroyPreGraph_pg(PreGraph * preGraph)
369
+ {
370
+ IDnum index;
371
+ PreNode *preNode = &(preGraph->preNodes[1]);
372
+
373
+ // Descriptors
374
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
375
+ free(preNode->descriptor);
376
+ preNode++;
377
+ }
378
+
379
+ // Arcs
380
+ #ifdef _OPENMP
381
+ destroyAllocArrayArray(preArcMemory);
382
+ #else
383
+ destroyAllocArray(preArcMemory);
384
+ #endif
385
+
386
+ // Nodes
387
+ free(preGraph->preNodes);
388
+
389
+ // PreMarkers
390
+ if (preGraph->nodeReferenceMarkerCounts) {
391
+ free(preGraph->nodeReferenceMarkerCounts);
392
+ free(preGraph->nodeReferenceMarkers);
393
+ }
394
+
395
+ // Graph
396
+ free(preGraph);
397
+
398
+ }
399
+
400
+ static Nucleotide getNucleotideInDescriptor_pg(Descriptor * descriptor,
401
+ Coordinate i)
402
+ {
403
+ Descriptor *fourMer = descriptor + i / 4;
404
+
405
+ switch (i % 4) {
406
+ case 0:
407
+ return (*fourMer & 3);
408
+ case 1:
409
+ return (*fourMer & 12) >> 2;
410
+ case 2:
411
+ return (*fourMer & 48) >> 4;
412
+ case 3:
413
+ return (*fourMer & 192) >> 6;
414
+ }
415
+ return 0;
416
+ }
417
+
418
+ PreNode *getPreNodeInPreGraph_pg(PreGraph * preGraph, IDnum preNodeID)
419
+ {
420
+ PreNode *preNode;
421
+ if (preNodeID <= 0)
422
+ abort();
423
+ else {
424
+ preNode = &(preGraph->preNodes[preNodeID]);
425
+ if (preNode->descriptor != NULL)
426
+ return preNode;
427
+ else
428
+ return NULL;
429
+ }
430
+ return NULL;
431
+ }
432
+
433
+ PreArcI getPreArc_pg(IDnum preNodeID, PreGraph * preGraph)
434
+ {
435
+ IDnum ID = preNodeID;
436
+ PreNode *preNode;
437
+
438
+ if (ID < 0)
439
+ ID = -ID;
440
+
441
+ preNode = &(preGraph->preNodes[ID]);
442
+
443
+ if (preNodeID > 0)
444
+ return preNode->preArcRight;
445
+ else
446
+ return preNode->preArcLeft;
447
+ }
448
+
449
+ PreArcI getNextPreArc_pg(PreArcI preArc, IDnum preNodeID)
450
+ {
451
+ PreArc *preArcVal;
452
+
453
+ preArcVal = PREARC_FI2P (preArc);
454
+
455
+ if (preNodeID == preArcVal->preNodeIDLeft) {
456
+ return preArcVal->nextLeft;
457
+ } else {
458
+ return preArcVal->nextRight;
459
+ }
460
+ }
461
+
462
+ IDnum getMultiplicity_pg(PreArcI preArc)
463
+ {
464
+ if (preArc == NULL_IDX)
465
+ return 0;
466
+
467
+ return PREARC_FI2P (preArc)->multiplicity;
468
+ }
469
+
470
+ IDnum getOtherEnd_pg(PreArcI preArc, IDnum preNodeID)
471
+ {
472
+ PreArc *preArcVal;
473
+
474
+ preArcVal = PREARC_FI2P (preArc);
475
+ if (preNodeID == preArcVal->preNodeIDLeft)
476
+ return preArcVal->preNodeIDRight;
477
+ else
478
+ return preArcVal->preNodeIDLeft;
479
+ }
480
+
481
+ IDnum getDestination_pg(PreArcI preArc, IDnum preNodeID)
482
+ {
483
+ PreArc *preArcVal;
484
+
485
+ if (preArc == NULL_IDX)
486
+ return 0;
487
+
488
+ preArcVal = PREARC_FI2P (preArc);
489
+
490
+ if (preNodeID == preArcVal->preNodeIDLeft)
491
+ return -preArcVal->preNodeIDRight;
492
+ else
493
+ return -preArcVal->preNodeIDLeft;
494
+ }
495
+
496
+ static void writeNucleotideInDescriptor_pg(Nucleotide nucleotide,
497
+ Descriptor * descriptor,
498
+ Coordinate i)
499
+ {
500
+ Descriptor *fourMer = descriptor + i / 4;
501
+ switch (i % 4) {
502
+ case 3:
503
+ *fourMer &= 63;
504
+ *fourMer += nucleotide << 6;
505
+ return;
506
+ case 2:
507
+ *fourMer &= 207;
508
+ *fourMer += nucleotide << 4;
509
+ return;
510
+ case 1:
511
+ *fourMer &= 243;
512
+ *fourMer += nucleotide << 2;
513
+ return;
514
+ case 0:
515
+ *fourMer &= 252;
516
+ *fourMer += nucleotide;
517
+ }
518
+ }
519
+
520
+ static inline Descriptor *mergeDescriptors_pg(Descriptor * descr,
521
+ Coordinate destinationLength,
522
+ Descriptor * copy,
523
+ Coordinate sourceLength,
524
+ int wordLength)
525
+ {
526
+ Descriptor *readPtr, *writePtr;
527
+ Descriptor readCopy = 0;
528
+ int readOffset, writeOffset;
529
+ size_t arrayLength;
530
+ Coordinate newLength =
531
+ destinationLength + sourceLength + wordLength - 1;
532
+ Descriptor *new;
533
+ Coordinate index;
534
+
535
+ // Specify new array
536
+ arrayLength = newLength / 4;
537
+ if (newLength % 4)
538
+ arrayLength++;
539
+ new = callocOrExit(arrayLength, Descriptor);
540
+ for (index = 0; index < arrayLength; index++)
541
+ new[index] = 0;
542
+
543
+ // Copying first descriptor
544
+ readPtr = descr;
545
+ writePtr = new;
546
+ writeOffset = 0;
547
+ for (index = 0; index < destinationLength + wordLength - 1;
548
+ index++) {
549
+ (*writePtr) >>= 2;
550
+ if (writeOffset == 0)
551
+ readCopy = *readPtr;
552
+ (*writePtr) += (readCopy & 3) << 6;
553
+
554
+ /*switch ((readCopy & 3)) {
555
+ case ADENINE:
556
+ velvetLog("A%ld", index);
557
+ break;
558
+ case CYTOSINE:
559
+ velvetLog("C%ld", index);
560
+ break;
561
+ case GUANINE:
562
+ velvetLog("G%ld", index);
563
+ break;
564
+ case THYMINE:
565
+ velvetLog("T%ld", index);
566
+ break;
567
+ } */
568
+ readCopy >>= 2;
569
+
570
+ writeOffset++;
571
+ if (writeOffset == 4) {
572
+ writePtr++;
573
+ readPtr++;
574
+ writeOffset = 0;
575
+ }
576
+ }
577
+
578
+ //velvetLog("\n");
579
+
580
+ // Skipping initial k-1 letters in second descriptor
581
+ readPtr = &(copy[(wordLength - 1) / 4]);
582
+ readCopy = *readPtr;
583
+ readOffset = (wordLength - 1) % 4;
584
+ readCopy >>= (readOffset * 2);
585
+
586
+ // Going on copying second descriptor
587
+ for (index = 0; index < sourceLength; index++) {
588
+ (*writePtr) >>= 2;
589
+ if (readOffset == 0)
590
+ readCopy = *readPtr;
591
+ (*writePtr) += (readCopy & 3) << 6;
592
+ /*switch ((readCopy & 3)) {
593
+ case ADENINE:
594
+ velvetLog("A%ld", index);
595
+ break;
596
+ case CYTOSINE:
597
+ velvetLog("C%ld", index);
598
+ break;
599
+ case GUANINE:
600
+ velvetLog("G%ld", index);
601
+ break;
602
+ case THYMINE:
603
+ velvetLog("T%ld", index);
604
+ break;
605
+ default:
606
+ velvetLog("?%ld;", index);
607
+ } */
608
+ readCopy >>= 2;
609
+
610
+ writeOffset++;
611
+ if (writeOffset == 4) {
612
+ writePtr++;
613
+ writeOffset = 0;
614
+ }
615
+
616
+ readOffset++;
617
+ if (readOffset == 4) {
618
+ readPtr++;
619
+ readOffset = 0;
620
+ }
621
+ }
622
+
623
+ //velvetLog("\n");
624
+
625
+ if (writeOffset != 0) {
626
+ while (writeOffset != 4) {
627
+ (*writePtr) >>= 2;
628
+ writeOffset++;
629
+ }
630
+ }
631
+
632
+ return new;
633
+ }
634
+
635
+ static inline Descriptor *mergeDescriptorsH2H_pg(Descriptor * descr,
636
+ Coordinate
637
+ destinationLength,
638
+ Descriptor * copy,
639
+ Coordinate sourceLength,
640
+ int wordLength)
641
+ {
642
+ Descriptor *readPtr, *writePtr;
643
+ Descriptor readCopy;
644
+ int readOffset, writeOffset;
645
+ size_t arrayLength;
646
+ Coordinate newLength =
647
+ destinationLength + sourceLength + wordLength - 1;
648
+ Descriptor *new;
649
+ Coordinate index;
650
+
651
+ // Specify new array
652
+ arrayLength = newLength / 4;
653
+ if (newLength % 4)
654
+ arrayLength++;
655
+ new = callocOrExit(arrayLength, Descriptor);
656
+ for (index = 0; index < arrayLength; index++)
657
+ new[index] = 0;
658
+
659
+ // Copying first descriptor (including final (k-1)-mer)
660
+ readPtr = descr;
661
+ readCopy = *readPtr;
662
+ writePtr = new;
663
+ writeOffset = 0;
664
+ readOffset = 0;
665
+ for (index = 0; index < destinationLength + wordLength - 1;
666
+ index++) {
667
+ (*writePtr) >>= 2;
668
+ if (writeOffset == 0)
669
+ readCopy = *readPtr;
670
+ (*writePtr) += (readCopy & 3) << 6;
671
+ /*switch ((readCopy & 3)) {
672
+ case ADENINE:
673
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
674
+ break;
675
+ case CYTOSINE:
676
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
677
+ break;
678
+ case GUANINE:
679
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
680
+ break;
681
+ case THYMINE:
682
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
683
+ break;
684
+ default:
685
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
686
+ } */
687
+ readCopy >>= 2;
688
+
689
+ writeOffset++;
690
+ if (writeOffset == 4) {
691
+ writePtr++;
692
+ readPtr++;
693
+ writeOffset = 0;
694
+ }
695
+ }
696
+
697
+ //velvetLog("\n");
698
+
699
+ // Going to end of second descriptor
700
+ readPtr = &(copy[(sourceLength - 1) / 4]);
701
+ readCopy = *readPtr;
702
+ readOffset = (sourceLength - 1) % 4;
703
+ readCopy <<= ((3 - readOffset) * 2);
704
+
705
+ //velvetLog("Read copy %x\n", readCopy);
706
+
707
+ // Going on copying reverse complement of second descriptor
708
+ for (index = 0; index < sourceLength; index++) {
709
+ (*writePtr) >>= 2;
710
+ if (readOffset == 3)
711
+ readCopy = *readPtr;
712
+ #ifndef COLOR
713
+ (*writePtr) += 192 - (readCopy & 192);
714
+ #else
715
+ (*writePtr) += (readCopy & 192);
716
+ #endif
717
+ /*switch (3 - ((readCopy & 192) >> 6)) {
718
+ case ADENINE:
719
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
720
+ break;
721
+ case CYTOSINE:
722
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
723
+ break;
724
+ case GUANINE:
725
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
726
+ break;
727
+ case THYMINE:
728
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
729
+ break;
730
+ default:
731
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
732
+ } */
733
+ readCopy <<= 2;
734
+
735
+ writeOffset++;
736
+ if (writeOffset == 4) {
737
+ writePtr++;
738
+ writeOffset = 0;
739
+ }
740
+
741
+ readOffset--;
742
+ if (readOffset == -1) {
743
+ readPtr--;
744
+ readOffset = 3;
745
+ }
746
+ }
747
+
748
+ //velvetLog("\n");
749
+
750
+ if (writeOffset != 0) {
751
+ while (writeOffset != 4) {
752
+ (*writePtr) >>= 2;
753
+ writeOffset++;
754
+ }
755
+ }
756
+
757
+ return new;
758
+ }
759
+
760
+ static inline Descriptor *mergeDescriptorsF2F_pg(Descriptor * descr,
761
+ Coordinate
762
+ destinationLength,
763
+ Descriptor * copy,
764
+ Coordinate sourceLength,
765
+ int wordLength)
766
+ {
767
+ Descriptor *readPtr, *writePtr;
768
+ Descriptor readCopy;
769
+ int readOffset, writeOffset;
770
+ size_t arrayLength;
771
+ Coordinate newLength =
772
+ destinationLength + sourceLength + wordLength - 1;
773
+ Descriptor *new;
774
+ Coordinate index;
775
+
776
+ // Specify new array
777
+ arrayLength = newLength / 4;
778
+ if (newLength % 4)
779
+ arrayLength++;
780
+ new = callocOrExit(arrayLength, Descriptor);
781
+ for (index = 0; index < arrayLength; index++)
782
+ new[index] = 0;
783
+
784
+ writePtr = new;
785
+ writeOffset = 0;
786
+
787
+ // Going to end of first descriptor
788
+ readPtr = &(copy[(sourceLength + wordLength - 2) / 4]);
789
+ readCopy = *readPtr;
790
+ readOffset = (sourceLength + wordLength - 2) % 4;
791
+ readCopy <<= ((3 - readOffset) * 2);
792
+
793
+ // Copying reverse complement of first descriptor (minus final (k-1)-mer)
794
+ for (index = 0; index < sourceLength; index++) {
795
+ (*writePtr) >>= 2;
796
+ if (readOffset == 3)
797
+ readCopy = *readPtr;
798
+ #ifndef COLOR
799
+ (*writePtr) += 192 - (readCopy & 192);
800
+ #else
801
+ (*writePtr) += (readCopy & 192);
802
+ #endif
803
+ /*switch (3 - ((readCopy & 192) >> 6)) {
804
+ case ADENINE:
805
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
806
+ break;
807
+ case CYTOSINE:
808
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
809
+ break;
810
+ case GUANINE:
811
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
812
+ break;
813
+ case THYMINE:
814
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
815
+ break;
816
+ default:
817
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
818
+ } */
819
+ readCopy <<= 2;
820
+
821
+ writeOffset++;
822
+ if (writeOffset == 4) {
823
+ writePtr++;
824
+ writeOffset = 0;
825
+ }
826
+
827
+ readOffset--;
828
+ if (readOffset == -1) {
829
+ readPtr--;
830
+ readOffset = 3;
831
+ }
832
+ }
833
+
834
+ //velvetLog("\n");
835
+
836
+ // Going on copying second descriptor
837
+ readPtr = descr;
838
+ readCopy = *readPtr;
839
+ readOffset = 0;
840
+
841
+ for (index = 0; index < destinationLength + wordLength - 1;
842
+ index++) {
843
+ (*writePtr) >>= 2;
844
+ if (readOffset == 0)
845
+ readCopy = *readPtr;
846
+ (*writePtr) += (readCopy & 3) << 6;
847
+ /*switch ((readCopy & 3)) {
848
+ case ADENINE:
849
+ velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
850
+ break;
851
+ case CYTOSINE:
852
+ velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
853
+ break;
854
+ case GUANINE:
855
+ velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
856
+ break;
857
+ case THYMINE:
858
+ velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
859
+ break;
860
+ default:
861
+ velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
862
+ } */
863
+ readCopy >>= 2;
864
+
865
+ writeOffset++;
866
+ if (writeOffset == 4) {
867
+ writePtr++;
868
+ writeOffset = 0;
869
+ }
870
+
871
+ readOffset++;
872
+ if (readOffset == 4) {
873
+ readPtr++;
874
+ readOffset = 0;
875
+ }
876
+ }
877
+
878
+ //velvetLog("\n");
879
+
880
+ if (writeOffset != 0) {
881
+ while (writeOffset != 4) {
882
+ (*writePtr) >>= 2;
883
+ writeOffset++;
884
+ }
885
+ }
886
+
887
+ return new;
888
+ }
889
+
890
+ void setMultiplicity_pg(PreArcI preArc, IDnum mult)
891
+ {
892
+ PREARC_FI2P (preArc)->multiplicity = mult;
893
+ }
894
+
895
+ static void updatePreArcData_pg(PreArcI preArc, IDnum oldPreNodeID,
896
+ IDnum newPreNodeID)
897
+ {
898
+ PreArc *preArcVal;
899
+
900
+ preArcVal = PREARC_FI2P (preArc);
901
+ if (preArcVal->preNodeIDLeft == oldPreNodeID)
902
+ preArcVal->preNodeIDLeft = newPreNodeID;
903
+ if (preArcVal->preNodeIDRight == oldPreNodeID)
904
+ preArcVal->preNodeIDRight = newPreNodeID;
905
+ }
906
+
907
+ // Reshuffles the preGraph->preNodes array to remove NULL pointers
908
+ // Beware that preNode IDs are accordingly reshuffled (all pointers remain valid though)
909
+ void renumberPreNodes_pg(PreGraph * preGraph)
910
+ {
911
+ IDnum preNodeIndex;
912
+ PreNode *currentPreNode, *destinationPreNode;
913
+ IDnum counter = 0;
914
+ IDnum preNodes = preGraph->preNodeCount;
915
+ IDnum newIndex;
916
+ IDnum preMarkerIndex;
917
+ PreMarker * preMarker;
918
+ PreArcI preArc;
919
+
920
+ velvetLog("Renumbering preNodes\n");
921
+ velvetLog("Initial preNode count %li\n", (long) preGraph->preNodeCount);
922
+
923
+ for (preNodeIndex = 1; preNodeIndex <= preNodes; preNodeIndex++) {
924
+ currentPreNode = &(preGraph->preNodes[preNodeIndex]);
925
+
926
+ if (currentPreNode->descriptor == NULL)
927
+ counter++;
928
+ else if (counter != 0) {
929
+ newIndex = preNodeIndex - counter;
930
+ destinationPreNode =
931
+ &(preGraph->preNodes[newIndex]);
932
+
933
+ destinationPreNode->preArcLeft =
934
+ currentPreNode->preArcLeft;
935
+ destinationPreNode->preArcRight =
936
+ currentPreNode->preArcRight;
937
+ destinationPreNode->descriptor =
938
+ currentPreNode->descriptor;
939
+ destinationPreNode->length =
940
+ currentPreNode->length;
941
+
942
+ for (preArc = getPreArc_pg(newIndex, preGraph);
943
+ preArc != NULL_IDX;
944
+ preArc = getNextPreArc_pg(preArc, newIndex))
945
+ updatePreArcData_pg(preArc, preNodeIndex,
946
+ newIndex);
947
+ for (preArc = getPreArc_pg(-newIndex, preGraph);
948
+ preArc != NULL_IDX;
949
+ preArc = getNextPreArc_pg(preArc, -newIndex))
950
+ updatePreArcData_pg(preArc, -preNodeIndex,
951
+ -newIndex);
952
+
953
+ if (preGraph->nodeReferenceMarkers) {
954
+ preGraph->nodeReferenceMarkerCounts[newIndex] = preGraph->nodeReferenceMarkerCounts[preNodeIndex];
955
+ preGraph->nodeReferenceMarkers[newIndex] = preGraph->nodeReferenceMarkers[preNodeIndex];
956
+
957
+ for (preMarkerIndex = 0; preMarkerIndex < preGraph->nodeReferenceMarkerCounts[newIndex]; preMarkerIndex++) {
958
+ preMarker = &(preGraph->nodeReferenceMarkers[newIndex][preMarkerIndex]);
959
+ if (preMarker->preNodeID == preNodeIndex)
960
+ preMarker->preNodeID = newIndex;
961
+ else if (preMarker->preNodeID == -preNodeIndex)
962
+ preMarker->preNodeID = -newIndex;
963
+ else
964
+ abort();
965
+ }
966
+ }
967
+ }
968
+ }
969
+
970
+ preGraph->preNodeCount -= counter;
971
+ preGraph->preNodes = reallocOrExit(preGraph->preNodes,
972
+ preGraph->preNodeCount +
973
+ 1, PreNode);
974
+
975
+ velvetLog("Destroyed %li preNodes\n", (long) counter);
976
+ }
977
+
978
+ // Allocate memory for an empty preGraph created with sequenceCount different sequences
979
+ PreGraph *emptyPreGraph_pg(IDnum sequenceCount, IDnum referenceCount, int wordLength, boolean double_strand)
980
+ {
981
+ PreGraph *newPreGraph = mallocOrExit(1, PreGraph);
982
+ newPreGraph->sequenceCount = sequenceCount;
983
+ newPreGraph->wordLength = wordLength;
984
+ newPreGraph->preNodeCount = 0;
985
+ newPreGraph->double_strand = double_strand;
986
+ newPreGraph->referenceCount = referenceCount;
987
+ newPreGraph->preNodes = NULL;
988
+ newPreGraph->nodeReferenceMarkerCounts = NULL;
989
+ newPreGraph->nodeReferenceMarkers = NULL;
990
+
991
+ #ifdef _OPENMP
992
+ preArcMemory = newAllocArrayArray(omp_get_max_threads(), sizeof(PreArc), "PreArc");
993
+ #endif
994
+
995
+ return newPreGraph;
996
+ }
997
+
998
+ static Descriptor *newDescriptor_pg(Coordinate length, SequencesReader *seqReadInfo,
999
+ Kmer * initialKmer, int wordLength)
1000
+ {
1001
+ char letter;
1002
+ Nucleotide nucleotide;
1003
+ Coordinate totalLength = length + wordLength - 1;
1004
+ size_t arrayLength = totalLength / 4;
1005
+ Descriptor *res;
1006
+ Coordinate index;
1007
+ Kmer kmerCopy;
1008
+
1009
+ if (totalLength % 4 > 0)
1010
+ arrayLength++;
1011
+
1012
+ res = callocOrExit(arrayLength, Descriptor);
1013
+
1014
+ copyKmers(&kmerCopy, initialKmer);
1015
+ for (index = wordLength - 2; index >= 0; index--)
1016
+ writeNucleotideInDescriptor_pg(popNucleotide(&kmerCopy), res,
1017
+ index);
1018
+
1019
+ for (index = wordLength - 1; index < totalLength; index++) {
1020
+ if (seqReadInfo->m_bIsBinary) {
1021
+ letter = **seqReadInfo->m_ppCurrString;
1022
+ *seqReadInfo->m_ppCurrString += 1; // increment the pointer
1023
+ } else {
1024
+ letter = getc(seqReadInfo->m_pFile);
1025
+ while (!isalpha(letter))
1026
+ letter = getc(seqReadInfo->m_pFile);
1027
+ }
1028
+ //velvetLog("%c", letter);
1029
+ switch (letter) {
1030
+ case 'N':
1031
+ case 'A':
1032
+ nucleotide = ADENINE;
1033
+ break;
1034
+ case 'C':
1035
+ nucleotide = CYTOSINE;
1036
+ break;
1037
+ case 'G':
1038
+ nucleotide = GUANINE;
1039
+ break;
1040
+ case 'T':
1041
+ nucleotide = THYMINE;
1042
+ break;
1043
+ default:
1044
+ fflush(stdout);
1045
+ abort();
1046
+ }
1047
+
1048
+ writeNucleotideInDescriptor_pg(nucleotide, res, index);
1049
+ pushNucleotide(initialKmer, nucleotide);
1050
+ }
1051
+
1052
+ //velvetLog(" ");
1053
+
1054
+ return res;
1055
+ }
1056
+
1057
+ void allocatePreNodeSpace_pg(PreGraph * preGraph, IDnum preNodeCount)
1058
+ {
1059
+ preGraph->preNodes = callocOrExit(preNodeCount + 1, PreNode);
1060
+ preGraph->preNodeCount = preNodeCount;
1061
+ }
1062
+
1063
+ void allocatePreMarkerCountSpace_pg(PreGraph * preGraph)
1064
+ {
1065
+ preGraph->nodeReferenceMarkerCounts = callocOrExit(preGraph->preNodeCount + 1, IDnum);
1066
+ preGraph->nodeReferenceMarkers = callocOrExit(preGraph->preNodeCount + 1, PreMarker *);
1067
+ }
1068
+
1069
+ void incrementNodeReferenceMarkerCount_pg(PreGraph * preGraph, IDnum preNodeID) {
1070
+ if (preNodeID < 0)
1071
+ preNodeID = -preNodeID;
1072
+
1073
+ preGraph->nodeReferenceMarkerCounts[preNodeID]++;
1074
+ }
1075
+
1076
+ void allocatePreMarkerSpace_pg(PreGraph * preGraph) {
1077
+ IDnum index;
1078
+
1079
+ if (!preGraph->nodeReferenceMarkers)
1080
+ return;
1081
+
1082
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
1083
+ if (preGraph->nodeReferenceMarkerCounts[index])
1084
+ preGraph->nodeReferenceMarkers[index] = callocOrExit(preGraph->nodeReferenceMarkerCounts[index], PreMarker);
1085
+ else
1086
+ preGraph->nodeReferenceMarkers[index] = NULL;
1087
+ preGraph->nodeReferenceMarkerCounts[index] = 0;
1088
+ }
1089
+ }
1090
+
1091
+ PreMarker * addPreMarker_pg(PreGraph * preGraph, IDnum nodeID, IDnum seqID, Coordinate * start, PreMarker * previous) {
1092
+ PreMarker * preMarker;
1093
+ IDnum positive_nodeID;
1094
+
1095
+ if (nodeID < 0)
1096
+ abort();
1097
+ else
1098
+ positive_nodeID = nodeID;
1099
+
1100
+ //printf("Adding preMarker %li\n", (long) *start);
1101
+
1102
+ preMarker = &(preGraph->nodeReferenceMarkers[positive_nodeID][(preGraph->nodeReferenceMarkerCounts[positive_nodeID])++]);
1103
+ preMarker->previous = previous;
1104
+ if (previous)
1105
+ previous->next = preMarker;
1106
+ preMarker->next = NULL;
1107
+ preMarker->referenceStart = *start;
1108
+ preMarker->length = preGraph->preNodes[positive_nodeID].length;
1109
+ preMarker->preNodeStart = 0;
1110
+ preMarker->preNodeID = nodeID;
1111
+ preMarker->referenceID = seqID;
1112
+
1113
+ *start += preMarker->length;
1114
+
1115
+ return preMarker;
1116
+ }
1117
+ void addPreNodeToPreGraph_pg(PreGraph * preGraph, Coordinate start,
1118
+ Coordinate finish, SequencesReader *seqReadInfo,
1119
+ Kmer * initialKmer, IDnum ID)
1120
+ {
1121
+ PreNode *newnd = &(preGraph->preNodes[ID]);
1122
+
1123
+ newnd->preArcLeft = NULL_IDX;
1124
+ newnd->preArcRight = NULL_IDX;
1125
+
1126
+ newnd->length = finish - start;
1127
+
1128
+ newnd->descriptor =
1129
+ newDescriptor_pg(newnd->length, seqReadInfo, initialKmer,
1130
+ preGraph->wordLength);
1131
+ }
1132
+
1133
+ static void exportPreNode_pg(FILE * outfile, PreNode * preNode, IDnum ID,
1134
+ int wordLength)
1135
+ {
1136
+ Coordinate index;
1137
+ Nucleotide nucleotide;
1138
+
1139
+ if (preNode == NULL)
1140
+ return;
1141
+
1142
+ velvetFprintf(outfile, "NODE\t%ld\t%lld\n", (long) ID, (long long) preNode->length);
1143
+
1144
+ if (preNode->length == 0) {
1145
+ velvetFprintf(outfile, "\n");
1146
+ return;
1147
+ }
1148
+
1149
+ for (index = 0; index < preNode->length + wordLength - 1; index++) {
1150
+ nucleotide =
1151
+ getNucleotideInDescriptor_pg(preNode->descriptor,
1152
+ index);
1153
+ switch (nucleotide) {
1154
+ case ADENINE:
1155
+ velvetFprintf(outfile, "A");
1156
+ break;
1157
+ case CYTOSINE:
1158
+ velvetFprintf(outfile, "C");
1159
+ break;
1160
+ case GUANINE:
1161
+ velvetFprintf(outfile, "G");
1162
+ break;
1163
+ case THYMINE:
1164
+ velvetFprintf(outfile, "T");
1165
+ break;
1166
+ }
1167
+ }
1168
+
1169
+ velvetFprintf(outfile, "\n");
1170
+ }
1171
+
1172
+ static void exportPreMarker(FILE * outfile, PreMarker* preMarker) {
1173
+ velvetFprintf(outfile, "%li\t%lli\t%lli\t%lli\n", (long) preMarker->preNodeID, (long long) preMarker->preNodeStart, (long long) preMarker->referenceStart, (long long) preMarker->length);
1174
+ }
1175
+
1176
+ static void exportPreReference_pg(FILE * outfile, IDnum refIndex, PreGraph * preGraph) {
1177
+ PreMarker * preMarker;
1178
+ IDnum nodeID, index;
1179
+
1180
+ velvetFprintf(outfile, "SEQ\t%li\n", (long) refIndex);
1181
+
1182
+ for (nodeID = 1; nodeID <= preGraph->preNodeCount; nodeID++) {
1183
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[nodeID]; index++) {
1184
+ preMarker = &(preGraph->nodeReferenceMarkers[nodeID][index]);
1185
+ if (preMarker->referenceID == refIndex && !preMarker->previous) {
1186
+ for (;preMarker;preMarker = preMarker->next) {
1187
+ exportPreMarker(outfile, preMarker);
1188
+ }
1189
+ }
1190
+ }
1191
+ }
1192
+ }
1193
+
1194
+ void exportPreGraph_pg(char *filename, PreGraph * preGraph)
1195
+ {
1196
+ IDnum index;
1197
+ FILE *outfile;
1198
+ PreNode *preNode;
1199
+ int wordLength = getWordLength_pg(preGraph);
1200
+
1201
+ if (preGraph == NULL) {
1202
+ return;
1203
+ }
1204
+
1205
+ outfile = fopen(filename, "w");
1206
+ if (outfile == NULL) {
1207
+ velvetLog("Couldn't open file, sorry\n");
1208
+ return;
1209
+ } else
1210
+ velvetLog("Writing into pregraph file %s...\n", filename);
1211
+
1212
+ // General data
1213
+ velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) preGraph->preNodeCount,
1214
+ (long) preGraph->sequenceCount, preGraph->wordLength, (short) preGraph->double_strand);
1215
+
1216
+ // PreNode info
1217
+ for (index = 1; index <= preGraph->preNodeCount; index++) {
1218
+ preNode = getPreNodeInPreGraph_pg(preGraph, index);
1219
+ exportPreNode_pg(outfile, preNode, index, wordLength);
1220
+ }
1221
+
1222
+ // Reference sequence info
1223
+ for (index = 1; index <= preGraph->referenceCount; index++)
1224
+ exportPreReference_pg(outfile, index, preGraph);
1225
+
1226
+
1227
+ fclose(outfile);
1228
+ }
1229
+
1230
+ int getWordLength_pg(PreGraph * preGraph)
1231
+ {
1232
+ return preGraph->wordLength;
1233
+ }
1234
+
1235
+ boolean hasSinglePreArc_pg(IDnum preNodeID, PreGraph * preGraph)
1236
+ {
1237
+ IDnum ID = preNodeID;
1238
+ PreNode *preNode;
1239
+ PreArcI preArc;
1240
+
1241
+ if (ID < 0)
1242
+ ID = -ID;
1243
+
1244
+ preNode = &(preGraph->preNodes[ID]);
1245
+
1246
+ if (preNodeID > 0)
1247
+ preArc = preNode->preArcRight;
1248
+ else
1249
+ preArc = preNode->preArcLeft;
1250
+
1251
+ return (preArc != NULL_IDX
1252
+ && getNextPreArc_pg(preArc, preNodeID) == NULL_IDX);
1253
+ }
1254
+
1255
+ char simplePreArcCount_pg(IDnum preNodeID, PreGraph * preGraph)
1256
+ {
1257
+ PreNode *preNode;
1258
+ PreArcI preArc;
1259
+ char count = 0;
1260
+ IDnum ID = preNodeID;
1261
+
1262
+ if (ID < 0)
1263
+ ID = -ID;
1264
+
1265
+ preNode = &(preGraph->preNodes[ID]);
1266
+
1267
+ if (preNodeID > 0)
1268
+ preArc = preNode->preArcRight;
1269
+ else
1270
+ preArc = preNode->preArcLeft;
1271
+
1272
+ for (; preArc != NULL_IDX;
1273
+ preArc = getNextPreArc_pg(preArc, preNodeID))
1274
+ count++;
1275
+
1276
+ return count;
1277
+ }
1278
+
1279
+ boolean isLoop_pg(PreArcI preArc)
1280
+ {
1281
+ PreArc *preArcVal = PREARC_FI2P (preArc);
1282
+
1283
+ return (preArcVal->preNodeIDLeft == preArcVal->preNodeIDRight
1284
+ || preArcVal->preNodeIDLeft == -preArcVal->preNodeIDRight);
1285
+ }
1286
+
1287
+ void setPreNodeDescriptor_pg(Descriptor * descr, Coordinate length, IDnum preNodeID, PreGraph * preGraph) {
1288
+ PreNode * preNode;
1289
+
1290
+ if (preNodeID < 0)
1291
+ preNodeID = -preNodeID;
1292
+
1293
+ preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1294
+ free(preNode->descriptor);
1295
+ preNode->descriptor = descr;
1296
+ preNode->length = length;
1297
+ }
1298
+
1299
+ static void appendPositiveDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
1300
+ PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1301
+ Descriptor * readPtr = preNode->descriptor;
1302
+ Descriptor readCopy;
1303
+ int wordLength = getWordLength_pg(preGraph);
1304
+ Coordinate length = preNode->length;
1305
+ Coordinate index;
1306
+ int readOffset = 0;
1307
+
1308
+ if (initial) {
1309
+ index = 0;
1310
+ readPtr = preNode->descriptor;
1311
+ readCopy = *readPtr;
1312
+ readOffset = 0;
1313
+ } else {
1314
+ index = wordLength - 1;
1315
+ readPtr = &(preNode->descriptor[(wordLength - 1) / 4]);
1316
+ readCopy = *readPtr;
1317
+ readOffset = (wordLength - 1) % 4;
1318
+ readCopy >>= (readOffset * 2);
1319
+ }
1320
+
1321
+ for (; index < length + wordLength - 1; index++) {
1322
+ (**writePtr) >>= 2;
1323
+ if (readOffset == 0)
1324
+ readCopy = *readPtr;
1325
+ (**writePtr) += (readCopy & 3) << 6;
1326
+ readCopy >>= 2;
1327
+
1328
+ if (++(*writeOffset) == 4) {
1329
+ (*writePtr)++;
1330
+ *writeOffset = 0;
1331
+ }
1332
+
1333
+ if (++readOffset == 4) {
1334
+ readPtr++;
1335
+ readOffset = 0;
1336
+ }
1337
+ }
1338
+ }
1339
+
1340
+ static void appendNegativeDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
1341
+ PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
1342
+ Descriptor * readPtr = preNode->descriptor;
1343
+ Descriptor readCopy;
1344
+ int wordLength = getWordLength_pg(preGraph);
1345
+ Coordinate length = preNode->length;
1346
+ Coordinate index;
1347
+ int readOffset;
1348
+
1349
+ if (initial)
1350
+ length += wordLength - 1;
1351
+
1352
+ readPtr = &(preNode->descriptor[(length - 1) / 4]);
1353
+ readCopy = *readPtr;
1354
+ readOffset = (length - 1) % 4;
1355
+ readCopy <<= ((3 - readOffset) * 2);
1356
+
1357
+ for (index = 0; index < length; index++) {
1358
+ (**writePtr) >>= 2;
1359
+ if (readOffset == 3)
1360
+ readCopy = *readPtr;
1361
+ #ifndef COLOR
1362
+ (**writePtr) += 192 - (readCopy & 192);
1363
+ #else
1364
+ (**writePtr) += (readCopy & 192);
1365
+ #endif
1366
+ readCopy <<= 2;
1367
+
1368
+ (*writeOffset)++;
1369
+ if (*writeOffset == 4) {
1370
+ (*writePtr)++;
1371
+ *writeOffset = 0;
1372
+ }
1373
+
1374
+ readOffset--;
1375
+ if (readOffset == -1) {
1376
+ readPtr--;
1377
+ readOffset = 3;
1378
+ }
1379
+ }
1380
+ }
1381
+
1382
+ void appendDescriptors_pg(Descriptor ** start, int * writeOffset, IDnum preNodeID, PreGraph* preGraph, boolean initial) {
1383
+ if (preNodeID > 0)
1384
+ appendPositiveDescriptor_pg(start, writeOffset, preNodeID, preGraph, initial);
1385
+ else
1386
+ appendNegativeDescriptor_pg(start, writeOffset, -preNodeID, preGraph, initial);
1387
+ }
1388
+
1389
+ boolean referenceMarkersAreActivated_pg(PreGraph * preGraph) {
1390
+ return preGraph->nodeReferenceMarkers != NULL;
1391
+ }
1392
+
1393
+ static void copyPreMarker(PreMarker * dest, PreMarker * source, IDnum preNodeAID, PreGraph * preGraph) {
1394
+ dest->previous = source->previous;
1395
+ dest->next = source->next;
1396
+
1397
+ dest->preNodeStart = source->preNodeStart;
1398
+ dest->length = source->length;
1399
+ dest->referenceID = source->referenceID;
1400
+ dest->referenceStart = source->referenceStart;
1401
+
1402
+ if (source->preNodeID > 0)
1403
+ dest->preNodeID = preNodeAID;
1404
+ else
1405
+ dest->preNodeID = -preNodeAID;
1406
+
1407
+ if (source->previous)
1408
+ source->previous->next = dest;
1409
+ if (source->next)
1410
+ source->next->previous = dest;
1411
+
1412
+ source->referenceID = 0;
1413
+ source->preNodeID = 0;
1414
+ source->previous = NULL;
1415
+ source->next = NULL;
1416
+ }
1417
+
1418
+ static PreMarker * reallocOrExitReferenceMarkers(PreGraph * preGraph, IDnum preNodeID, IDnum length) {
1419
+ PreMarker * array = callocOrExit(length, PreMarker);
1420
+ PreMarker * writer = array;
1421
+ PreMarker * reader = preGraph->nodeReferenceMarkers[preNodeID];
1422
+ IDnum index;
1423
+
1424
+ for (index = 0; index < preGraph->nodeReferenceMarkerCounts[preNodeID]; index++) {
1425
+ copyPreMarker(writer, reader, preNodeID, preGraph);
1426
+ writer++;
1427
+ reader++;
1428
+ }
1429
+
1430
+ free(preGraph->nodeReferenceMarkers[preNodeID]);
1431
+
1432
+ return array;
1433
+ }
1434
+
1435
+ static void concatenateReferenceMarkers_H2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1436
+ IDnum index;
1437
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1438
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1439
+ Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
1440
+ PreMarker * markerA, *next, *markerB;
1441
+ IDnum counter = 0;
1442
+
1443
+ for (index = 0 ; index < countA; index++) {
1444
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1445
+
1446
+ if (markerA->preNodeID > 0)
1447
+ next = markerA->next;
1448
+ else
1449
+ next = markerA->previous;
1450
+
1451
+ if (!next)
1452
+ continue;
1453
+
1454
+ if (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
1455
+ continue;
1456
+ if (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)
1457
+ continue;
1458
+
1459
+ next->referenceID = 0;
1460
+ next->preNodeID = 0;
1461
+
1462
+ markerA->length += next->length;
1463
+ if (markerA->preNodeID > 0) {
1464
+ markerA->next = next->next;
1465
+ if (next->next)
1466
+ next->next->previous = markerA;
1467
+ } else {
1468
+ markerA->previous = next->previous;
1469
+ if (next->previous)
1470
+ next->previous->next = markerA;
1471
+ markerA->referenceStart = next->referenceStart;
1472
+ }
1473
+ next->next = NULL;
1474
+ next->previous = NULL;
1475
+ }
1476
+
1477
+ for (index = 0; index < countB; index++)
1478
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1479
+ counter++;
1480
+
1481
+ if (counter == 0)
1482
+ return;
1483
+
1484
+ if (countA)
1485
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1486
+ else
1487
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1488
+
1489
+ for (index = 0; index < countB; index++) {
1490
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1491
+ if (markerB->referenceID) {
1492
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1493
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1494
+ markerA->preNodeStart += lengthA;
1495
+ }
1496
+ }
1497
+
1498
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1499
+ }
1500
+
1501
+ static void concatenateReferenceMarkers_H2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1502
+ IDnum index;
1503
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1504
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1505
+ Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
1506
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1507
+ PreMarker * markerA, *next, *markerB;
1508
+ IDnum counter = 0;
1509
+
1510
+ for (index = 0 ; index < countA; index++) {
1511
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1512
+
1513
+ if (markerA->preNodeID > 0)
1514
+ next = markerA->next;
1515
+ else
1516
+ next = markerA->previous;
1517
+
1518
+
1519
+ if ((!next)
1520
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
1521
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID))
1522
+ continue;
1523
+
1524
+ next->referenceID = 0;
1525
+ next->preNodeID = 0;
1526
+
1527
+ markerA->length += next->length;
1528
+ if (markerA->preNodeID > 0) {
1529
+ markerA->next = next->next;
1530
+ if (next->next)
1531
+ next->next->previous = markerA;
1532
+ } else {
1533
+ markerA->previous = next->previous;
1534
+ if (next->previous)
1535
+ next->previous->next = markerA;
1536
+ markerA->referenceStart = next->referenceStart;
1537
+ }
1538
+ next->next = NULL;
1539
+ next->previous = NULL;
1540
+ }
1541
+
1542
+ for (index = 0; index < countB; index++)
1543
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1544
+ counter++;
1545
+
1546
+ if (counter == 0)
1547
+ return;
1548
+
1549
+ if (countA)
1550
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1551
+ else
1552
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1553
+
1554
+ for (index = 0; index < countB; index++) {
1555
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1556
+ if (markerB->referenceID) {
1557
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1558
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1559
+ markerA->preNodeID *= -1;
1560
+ markerA->preNodeStart = lengthA + lengthB - markerA->preNodeStart - markerA->length;
1561
+ }
1562
+ }
1563
+
1564
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1565
+ }
1566
+
1567
+ static void concatenateReferenceMarkers_T2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1568
+ IDnum index;
1569
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1570
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1571
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1572
+ PreMarker * markerA, *next, *markerB;
1573
+ IDnum counter = 0;
1574
+
1575
+ for (index = 0 ; index < countA; index++) {
1576
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1577
+
1578
+ if (markerA->preNodeID < 0)
1579
+ next = markerA->next;
1580
+ else
1581
+ next = markerA->previous;
1582
+
1583
+ if (!next
1584
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
1585
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID)) {
1586
+ markerA->preNodeStart += lengthB;
1587
+ continue;
1588
+ }
1589
+
1590
+ next->referenceID = 0;
1591
+ next->preNodeID = 0;
1592
+
1593
+ markerA->length += next->length;
1594
+ markerA->preNodeStart = lengthB - next->preNodeStart - next->length;
1595
+ if (markerA->preNodeID < 0) {
1596
+ markerA->next = next->next;
1597
+ if (next->next)
1598
+ next->next->previous = markerA;
1599
+ } else {
1600
+ markerA->previous = next->previous;
1601
+ if (next->previous)
1602
+ next->previous->next = markerA;
1603
+ markerA->referenceStart = next->referenceStart;
1604
+ }
1605
+ next->next = NULL;
1606
+ next->previous = NULL;
1607
+ }
1608
+
1609
+ for (index = 0; index < countB; index++)
1610
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1611
+ counter++;
1612
+
1613
+ if (counter == 0)
1614
+ return;
1615
+
1616
+ if (countA)
1617
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1618
+ else
1619
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1620
+
1621
+ for (index = 0; index < countB; index++) {
1622
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1623
+ if (markerB->referenceID) {
1624
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1625
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1626
+ markerA->preNodeID *= -1;
1627
+ markerA->preNodeStart = lengthB - markerA->preNodeStart - markerA->length;
1628
+ }
1629
+ }
1630
+
1631
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1632
+ }
1633
+
1634
+ static void concatenateReferenceMarkers_T2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1635
+ IDnum index;
1636
+ IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
1637
+ IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
1638
+ PreMarker * markerA, *next, *markerB;
1639
+ Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
1640
+ IDnum counter = 0;
1641
+
1642
+ for (index = 0 ; index < countA; index++) {
1643
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
1644
+
1645
+ if (markerA->preNodeID < 0)
1646
+ next = markerA->next;
1647
+ else
1648
+ next = markerA->previous;
1649
+
1650
+ if (!next
1651
+ || (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
1652
+ || (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)) {
1653
+ markerA->preNodeStart += lengthB;
1654
+ continue;
1655
+ }
1656
+
1657
+ next->referenceID = 0;
1658
+ next->preNodeID = 0;
1659
+
1660
+ markerA->length += next->length;
1661
+ markerA->preNodeStart = next->preNodeStart;
1662
+ if (markerA->preNodeID < 0) {
1663
+ markerA->next = next->next;
1664
+ if (next->next)
1665
+ next->next->previous = markerA;
1666
+ } else {
1667
+ markerA->previous = next->previous;
1668
+ if (next->previous)
1669
+ next->previous->next = markerA;
1670
+ markerA->referenceStart = next->referenceStart;
1671
+ }
1672
+ next->next = NULL;
1673
+ next->previous = NULL;
1674
+ }
1675
+
1676
+ for (index = 0; index < countB; index++)
1677
+ if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
1678
+ counter++;
1679
+
1680
+ if (counter == 0)
1681
+ return;
1682
+
1683
+ if (countA)
1684
+ preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
1685
+ else
1686
+ preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
1687
+
1688
+ for (index = 0; index < countB; index++) {
1689
+ markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
1690
+ if (markerB->referenceID) {
1691
+ markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
1692
+ copyPreMarker(markerA, markerB, preNodeAID, preGraph);
1693
+ }
1694
+ }
1695
+
1696
+ preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
1697
+ }
1698
+
1699
+ void concatenateReferenceMarkers_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
1700
+ if (!referenceMarkersAreActivated_pg(preGraph))
1701
+ return;
1702
+
1703
+ if (preNodeAID > 0 && preNodeBID > 0)
1704
+ concatenateReferenceMarkers_H2T_pg(preNodeAID, preNodeBID, preGraph, totalOffset);
1705
+ else if (preNodeAID > 0)
1706
+ concatenateReferenceMarkers_H2H_pg(preNodeAID, -preNodeBID, preGraph, totalOffset);
1707
+ else if (preNodeBID > 0)
1708
+ concatenateReferenceMarkers_T2T_pg(-preNodeAID, preNodeBID, preGraph, totalOffset);
1709
+ else
1710
+ concatenateReferenceMarkers_T2H_pg(-preNodeAID, -preNodeBID, preGraph, totalOffset);
1711
+ }
1712
+
1713
+ boolean hasPreMarkers(IDnum nodeID, PreGraph * preGraph) {
1714
+ if (nodeID < 0)
1715
+ nodeID = -nodeID;
1716
+ return preGraph->nodeReferenceMarkerCounts[nodeID] > 0;
1717
+ }