finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
data/ext/src/src/graph.h
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#ifndef _GRAPH_H_
|
|
22
|
+
#define _GRAPH_H_
|
|
23
|
+
|
|
24
|
+
////////////////////////////////////////////////////////////
|
|
25
|
+
// Node functions
|
|
26
|
+
////////////////////////////////////////////////////////////
|
|
27
|
+
|
|
28
|
+
//Creators/destructor
|
|
29
|
+
Node *newNode(IDnum sequenceID, Coordinate start, Coordinate finish,
|
|
30
|
+
Coordinate offset, IDnum ID, TightString * sequences,
|
|
31
|
+
int WORDLENGTH);
|
|
32
|
+
Node *emptyNode();
|
|
33
|
+
void destroyNode(Node * node, Graph * graph);
|
|
34
|
+
|
|
35
|
+
// Locator
|
|
36
|
+
IDnum getNodeID(Node * node);
|
|
37
|
+
Node *getNodeInGraph(Graph * graph, IDnum nodeID);
|
|
38
|
+
Node *getTwinNode(Node * node);
|
|
39
|
+
|
|
40
|
+
// Arc info
|
|
41
|
+
int arcCount(Node * node);
|
|
42
|
+
int simpleArcCount(Node * node);
|
|
43
|
+
Arc *getArc(Node * node);
|
|
44
|
+
boolean hasSingleArc(Node * node);
|
|
45
|
+
|
|
46
|
+
// Descriptor
|
|
47
|
+
Coordinate getNodeLength(Node * node);
|
|
48
|
+
void appendDescriptors(Node * target, Node * source);
|
|
49
|
+
void directlyAppendDescriptors(Node * target, Node * sourcei, Coordinate totalLength);
|
|
50
|
+
void appendSequence(Node * node, TightString * reads,
|
|
51
|
+
PassageMarkerI guide, Graph * graph);
|
|
52
|
+
void splitNodeDescriptor(Node * source, Node * target, Coordinate offset);
|
|
53
|
+
void reduceNode(Node * node);
|
|
54
|
+
void reallocateNodeDescriptor(Node * node, Coordinate length);
|
|
55
|
+
Nucleotide getNucleotideInNode(Node * node, Coordinate index);
|
|
56
|
+
|
|
57
|
+
// Passage markers
|
|
58
|
+
void insertPassageMarker(PassageMarkerI marker, Node * destination);
|
|
59
|
+
PassageMarkerI getMarker(Node * node);
|
|
60
|
+
void setMarker(Node * node, PassageMarkerI marker);
|
|
61
|
+
IDnum markerCount(Node * node);
|
|
62
|
+
|
|
63
|
+
// Short read marker creation
|
|
64
|
+
void incrementReadStartCount(Node * node, Graph * graph);
|
|
65
|
+
void addReadStart(Node * node, IDnum seqID, Coordinate position,
|
|
66
|
+
Graph * graph, Coordinate offset);
|
|
67
|
+
void blurLastShortReadMarker(Node * node, Graph * graph);
|
|
68
|
+
|
|
69
|
+
// Short read marker handling
|
|
70
|
+
ShortReadMarker *getNodeReads(Node * node, Graph * graph);
|
|
71
|
+
IDnum getNodeReadCount(Node * node, Graph * graph);
|
|
72
|
+
ShortReadMarker *commonNodeReads(Node * nodeA, Node * nodeB, Graph * graph,
|
|
73
|
+
IDnum * length);
|
|
74
|
+
ShortReadMarker *extractBackOfNodeReads(Node * node, Coordinate breakpoint,
|
|
75
|
+
Graph * graph, IDnum * length,
|
|
76
|
+
PassageMarkerI sourceMarker,
|
|
77
|
+
ShortLength * sequenceLengths);
|
|
78
|
+
ShortReadMarker *extractFrontOfNodeReads(Node * node,
|
|
79
|
+
Coordinate breakpoint,
|
|
80
|
+
Graph * graph, IDnum * length,
|
|
81
|
+
PassageMarkerI sourceMarker,
|
|
82
|
+
ShortLength * sequenceLengths);
|
|
83
|
+
|
|
84
|
+
// Short read marker moving around
|
|
85
|
+
void foldSymmetricalNodeReads(Node * node, Graph * graph);
|
|
86
|
+
void spreadReadIDs(ShortReadMarker * reads, IDnum readCount, Node * node,
|
|
87
|
+
Graph * graph);
|
|
88
|
+
void injectShortReads(ShortReadMarker * sourceArray, IDnum sourceLength,
|
|
89
|
+
Node * target, Graph * graph);
|
|
90
|
+
void mergeNodeReads(Node * target, Node * source, Graph * graph);
|
|
91
|
+
|
|
92
|
+
#ifndef SINGLE_COV_CAT
|
|
93
|
+
// Virtual coverage
|
|
94
|
+
void setVirtualCoverage(Node * node, Category category,
|
|
95
|
+
Coordinate coverage);
|
|
96
|
+
void incrementVirtualCoverage(Node * node, Category category,
|
|
97
|
+
Coordinate coverage);
|
|
98
|
+
Coordinate getVirtualCoverage(Node * node, Category category);
|
|
99
|
+
Coordinate getTotalCoverage(Node * node);
|
|
100
|
+
|
|
101
|
+
// Original virtual coverage
|
|
102
|
+
void setOriginalVirtualCoverage(Node * node, Category category,
|
|
103
|
+
Coordinate coverage);
|
|
104
|
+
void incrementOriginalVirtualCoverage(Node * node, Category category,
|
|
105
|
+
Coordinate coverage);
|
|
106
|
+
Coordinate getOriginalVirtualCoverage(Node * node, Category category);
|
|
107
|
+
|
|
108
|
+
#else
|
|
109
|
+
// Virtual coverage
|
|
110
|
+
void setVirtualCoverage(Node * node,
|
|
111
|
+
Coordinate coverage);
|
|
112
|
+
void incrementVirtualCoverage(Node * node,
|
|
113
|
+
Coordinate coverage);
|
|
114
|
+
Coordinate getVirtualCoverage(Node * node);
|
|
115
|
+
Coordinate getTotalCoverage(Node * node);
|
|
116
|
+
#endif
|
|
117
|
+
|
|
118
|
+
// Status
|
|
119
|
+
void setNodeStatus(Node * node, boolean status);
|
|
120
|
+
void setSingleNodeStatus(Node * node, boolean status);
|
|
121
|
+
boolean getNodeStatus(Node * node);
|
|
122
|
+
|
|
123
|
+
// Uniqueness
|
|
124
|
+
void setUniqueness(Node * node, boolean value);
|
|
125
|
+
boolean getUniqueness(Node * node);
|
|
126
|
+
|
|
127
|
+
// Gap markers
|
|
128
|
+
void appendGap(Node * node, Coordinate length, Graph * graph);
|
|
129
|
+
void appendNodeGaps(Node * destination, Node * source, Graph * graph);
|
|
130
|
+
|
|
131
|
+
// IO
|
|
132
|
+
TightString *expandNode(Node * node, int WORDLENGTH);
|
|
133
|
+
void appendNodeSequence(Node * node, TightString * sequence,
|
|
134
|
+
Coordinate writeIndex);
|
|
135
|
+
char *expandNodeFragment(Node * node, Coordinate contigStart,
|
|
136
|
+
Coordinate contigFinish, int WORDLENGTH);
|
|
137
|
+
|
|
138
|
+
////////////////////////////////////////////////////////////
|
|
139
|
+
// Arc functions
|
|
140
|
+
////////////////////////////////////////////////////////////
|
|
141
|
+
|
|
142
|
+
// Creators/destructor
|
|
143
|
+
Arc *createArc(Node * origin, Node * destination, Graph * graph);
|
|
144
|
+
void createAnalogousArc(Node * origin, Node * destination, Arc * refArc,
|
|
145
|
+
Graph * graph);
|
|
146
|
+
void destroyArc(Arc * arc, Graph * graph);
|
|
147
|
+
|
|
148
|
+
// Multiplicity
|
|
149
|
+
void setMultiplicity(Arc * arc, IDnum mult);
|
|
150
|
+
IDnum getMultiplicity(Arc * arc);
|
|
151
|
+
|
|
152
|
+
// Extremities
|
|
153
|
+
Node *getOrigin(Arc * arc);
|
|
154
|
+
Node *getDestination(Arc * arc);
|
|
155
|
+
|
|
156
|
+
// Finding arcs
|
|
157
|
+
Arc *getArcBetweenNodes(Node * origin, Node * destination, Graph * graph);
|
|
158
|
+
Arc *getNextArc(Arc * arc);
|
|
159
|
+
|
|
160
|
+
// Lookup table option
|
|
161
|
+
void activateArcLookupTable(Graph * graph);
|
|
162
|
+
void deactivateArcLookupTable(Graph * graph);
|
|
163
|
+
|
|
164
|
+
////////////////////////////////////////////////////////////
|
|
165
|
+
// Short read marker functions
|
|
166
|
+
////////////////////////////////////////////////////////////
|
|
167
|
+
|
|
168
|
+
ShortReadMarker *getShortReadMarkerAtIndex(ShortReadMarker * array,
|
|
169
|
+
IDnum index);
|
|
170
|
+
|
|
171
|
+
IDnum getShortReadMarkerID(ShortReadMarker * marker);
|
|
172
|
+
|
|
173
|
+
extern inline Coordinate getShortReadMarkerPosition(ShortReadMarker * marker);
|
|
174
|
+
extern inline void setShortReadMarkerPosition(ShortReadMarker * marker,
|
|
175
|
+
Coordinate position);
|
|
176
|
+
|
|
177
|
+
extern inline ShortLength getShortReadMarkerOffset(ShortReadMarker * marker);
|
|
178
|
+
extern inline void setShortReadMarkerOffset(ShortReadMarker * marker,
|
|
179
|
+
ShortLength offset);
|
|
180
|
+
|
|
181
|
+
////////////////////////////////////////////////////////////
|
|
182
|
+
// Gap marker functions
|
|
183
|
+
////////////////////////////////////////////////////////////
|
|
184
|
+
|
|
185
|
+
GapMarker *getGap(Node * node, Graph * graph);
|
|
186
|
+
GapMarker *getNextGap(GapMarker * marker);
|
|
187
|
+
Coordinate getGapStart(GapMarker * marker);
|
|
188
|
+
Coordinate getGapFinish(GapMarker * marker);
|
|
189
|
+
|
|
190
|
+
////////////////////////////////////////////////////////////
|
|
191
|
+
// Graph functions
|
|
192
|
+
////////////////////////////////////////////////////////////
|
|
193
|
+
|
|
194
|
+
// Memory allocation
|
|
195
|
+
Graph *emptyGraph(IDnum sequenceCount, int wordLength);
|
|
196
|
+
void allocateNodeSpace(Graph * graph, IDnum nodeCount);
|
|
197
|
+
Node *addEmptyNodeToGraph(Graph * graph, IDnum nodeID);
|
|
198
|
+
void destroyGraph(Graph * graph);
|
|
199
|
+
|
|
200
|
+
// Dimensions
|
|
201
|
+
IDnum nodeCount(Graph * graph);
|
|
202
|
+
IDnum sequenceCount(Graph * graph);
|
|
203
|
+
void renumberNodes(Graph * graph);
|
|
204
|
+
int getWordLength(Graph * graph);
|
|
205
|
+
boolean doubleStrandedGraph(Graph * graph);
|
|
206
|
+
|
|
207
|
+
// Element status
|
|
208
|
+
void resetNodeStatus(Graph * graph);
|
|
209
|
+
|
|
210
|
+
// File IO
|
|
211
|
+
Graph *importGraph(char *filename);
|
|
212
|
+
void exportGraph(char *filename, Graph * graph, TightString * sequences);
|
|
213
|
+
Graph *readPreGraphFile(char *preGraphFilename, boolean * double_strand);
|
|
214
|
+
Graph *readConnectedGraphFile(char *connectedGraphFilename, boolean * double_strand);
|
|
215
|
+
|
|
216
|
+
// Read starts
|
|
217
|
+
void activateReadStarts(Graph * graph);
|
|
218
|
+
boolean readStartsAreActivated(Graph * graph);
|
|
219
|
+
void createNodeReadStartArrays(Graph * graph);
|
|
220
|
+
void orderNodeReadStartArrays(Graph * graph);
|
|
221
|
+
|
|
222
|
+
// Insert lengths
|
|
223
|
+
void setInsertLengths(Graph * graph, Category cat, Coordinate insertLength,
|
|
224
|
+
Coordinate insertLength_std_dev);
|
|
225
|
+
Coordinate getInsertLength(Graph * graph, Category cat);
|
|
226
|
+
double getInsertLength_var(Graph * graph, Category cat);
|
|
227
|
+
|
|
228
|
+
// Gaps markers
|
|
229
|
+
void activateGapMarkers(Graph * graph);
|
|
230
|
+
void deactivateGapMarkers(Graph * graph);
|
|
231
|
+
void sortGapMarkers(Graph * graph);
|
|
232
|
+
|
|
233
|
+
#endif
|
|
@@ -0,0 +1,1472 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
#include <stdlib.h>
|
|
23
|
+
#include <stdio.h>
|
|
24
|
+
#include <string.h>
|
|
25
|
+
#include <limits.h>
|
|
26
|
+
#include <sys/time.h>
|
|
27
|
+
|
|
28
|
+
#ifdef _OPENMP
|
|
29
|
+
#include <omp.h>
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
#include "globals.h"
|
|
33
|
+
#include "graph.h"
|
|
34
|
+
#include "passageMarker.h"
|
|
35
|
+
#include "readSet.h"
|
|
36
|
+
#include "tightString.h"
|
|
37
|
+
#include "recycleBin.h"
|
|
38
|
+
#include "utility.h"
|
|
39
|
+
#include "kmer.h"
|
|
40
|
+
#include "kmerOccurenceTable.h"
|
|
41
|
+
#include "roadMap.h"
|
|
42
|
+
|
|
43
|
+
#define ADENINE 0
|
|
44
|
+
#define CYTOSINE 1
|
|
45
|
+
#define GUANINE 2
|
|
46
|
+
#define THYMINE 3
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
//////////////////////////////////////////////////////////
|
|
50
|
+
// Node Locking
|
|
51
|
+
//////////////////////////////////////////////////////////
|
|
52
|
+
|
|
53
|
+
#ifdef _OPENMP
|
|
54
|
+
|
|
55
|
+
/* Array of per-node locks */
|
|
56
|
+
|
|
57
|
+
static omp_lock_t *nodeLocks = NULL;
|
|
58
|
+
|
|
59
|
+
static void
|
|
60
|
+
createNodeLocks(Graph *graph)
|
|
61
|
+
{
|
|
62
|
+
IDnum nbNodes;
|
|
63
|
+
IDnum nodeIndex;
|
|
64
|
+
|
|
65
|
+
nbNodes = nodeCount(graph) + 1;
|
|
66
|
+
if (nodeLocks)
|
|
67
|
+
free (nodeLocks);
|
|
68
|
+
nodeLocks = mallocOrExit(nbNodes, omp_lock_t);
|
|
69
|
+
|
|
70
|
+
#pragma omp parallel for
|
|
71
|
+
for (nodeIndex = 0; nodeIndex < nbNodes; nodeIndex++)
|
|
72
|
+
omp_init_lock(nodeLocks + nodeIndex);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
static inline void lockNode(Node *node)
|
|
76
|
+
{
|
|
77
|
+
IDnum nodeID = getNodeID(node);
|
|
78
|
+
|
|
79
|
+
if (nodeID < 0)
|
|
80
|
+
nodeID = -nodeID;
|
|
81
|
+
omp_set_lock (nodeLocks + nodeID);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/* Assumes node is already locked */
|
|
85
|
+
static inline void lockTwoNodes(Node *node, Node *node2)
|
|
86
|
+
{
|
|
87
|
+
IDnum nodeID = getNodeID(node);
|
|
88
|
+
IDnum node2ID = getNodeID(node2);
|
|
89
|
+
|
|
90
|
+
if (nodeID < 0)
|
|
91
|
+
nodeID = -nodeID;
|
|
92
|
+
if (node2ID < 0)
|
|
93
|
+
node2ID = -node2ID;
|
|
94
|
+
|
|
95
|
+
if (nodeID == node2ID)
|
|
96
|
+
return;
|
|
97
|
+
|
|
98
|
+
/* Lock lowest ID first to avoid deadlocks */
|
|
99
|
+
if (nodeID < node2ID)
|
|
100
|
+
{
|
|
101
|
+
omp_set_lock (nodeLocks + node2ID);
|
|
102
|
+
}
|
|
103
|
+
else if (!omp_test_lock (nodeLocks + node2ID))
|
|
104
|
+
{
|
|
105
|
+
omp_unset_lock (nodeLocks + nodeID);
|
|
106
|
+
omp_set_lock (nodeLocks + node2ID);
|
|
107
|
+
omp_set_lock (nodeLocks + nodeID);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
static inline void unLockTwoNodes(Node *node, Node *node2)
|
|
112
|
+
{
|
|
113
|
+
IDnum nodeID = getNodeID(node);
|
|
114
|
+
IDnum node2ID = getNodeID(node2);
|
|
115
|
+
|
|
116
|
+
if (nodeID < 0)
|
|
117
|
+
nodeID = -nodeID;
|
|
118
|
+
if (node2ID < 0)
|
|
119
|
+
node2ID = -node2ID;
|
|
120
|
+
|
|
121
|
+
omp_unset_lock (nodeLocks + nodeID);
|
|
122
|
+
if (nodeID != node2ID)
|
|
123
|
+
omp_unset_lock (nodeLocks + node2ID);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
static inline void unLockNode(Node *node)
|
|
127
|
+
{
|
|
128
|
+
IDnum nodeID = getNodeID(node);
|
|
129
|
+
|
|
130
|
+
if (nodeID < 0)
|
|
131
|
+
nodeID = -nodeID;
|
|
132
|
+
omp_unset_lock (nodeLocks + nodeID);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
#endif
|
|
136
|
+
|
|
137
|
+
//////////////////////////////////////////////////////////
|
|
138
|
+
// Node Lists
|
|
139
|
+
//////////////////////////////////////////////////////////
|
|
140
|
+
typedef struct smallNodeList_st SmallNodeList;
|
|
141
|
+
|
|
142
|
+
struct smallNodeList_st {
|
|
143
|
+
Node *node;
|
|
144
|
+
SmallNodeList *next;
|
|
145
|
+
} ATTRIBUTE_PACKED;
|
|
146
|
+
|
|
147
|
+
static RecycleBin *smallNodeListMemory = NULL;
|
|
148
|
+
|
|
149
|
+
#define BLOCKSIZE 1000
|
|
150
|
+
|
|
151
|
+
#ifdef _OPENMP
|
|
152
|
+
static void initSmallNodeListMemory(void)
|
|
153
|
+
{
|
|
154
|
+
int n = omp_get_max_threads();
|
|
155
|
+
|
|
156
|
+
#pragma omp critical
|
|
157
|
+
{
|
|
158
|
+
if (smallNodeListMemory == NULL)
|
|
159
|
+
smallNodeListMemory = newRecycleBinArray(n, sizeof(SmallNodeList), BLOCKSIZE);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
#endif
|
|
163
|
+
|
|
164
|
+
static SmallNodeList *allocateSmallNodeList()
|
|
165
|
+
{
|
|
166
|
+
#ifdef _OPENMP
|
|
167
|
+
#ifdef DEBUG
|
|
168
|
+
if (smallNodeListMemory == NULL)
|
|
169
|
+
{
|
|
170
|
+
velvetLog("The memory for small nodes seems uninitialised, "
|
|
171
|
+
"this is probably a bug, aborting.\n");
|
|
172
|
+
abort();
|
|
173
|
+
}
|
|
174
|
+
#endif
|
|
175
|
+
return allocatePointer(getRecycleBinInArray(smallNodeListMemory,
|
|
176
|
+
omp_get_thread_num()));
|
|
177
|
+
#else
|
|
178
|
+
if (smallNodeListMemory == NULL)
|
|
179
|
+
smallNodeListMemory = newRecycleBin(sizeof(SmallNodeList), BLOCKSIZE);
|
|
180
|
+
|
|
181
|
+
return allocatePointer(smallNodeListMemory);
|
|
182
|
+
#endif
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
static void deallocateSmallNodeList(SmallNodeList * smallNodeList)
|
|
186
|
+
{
|
|
187
|
+
#ifdef _OPENMP
|
|
188
|
+
deallocatePointer(getRecycleBinInArray(smallNodeListMemory,
|
|
189
|
+
omp_get_thread_num()),
|
|
190
|
+
smallNodeList);
|
|
191
|
+
#else
|
|
192
|
+
deallocatePointer(smallNodeListMemory, smallNodeList);
|
|
193
|
+
#endif
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
static void destroySmallNodeListMemmory(void)
|
|
197
|
+
{
|
|
198
|
+
if (smallNodeListMemory != NULL)
|
|
199
|
+
{
|
|
200
|
+
#ifdef _OPENMP
|
|
201
|
+
destroyRecycleBinArray(smallNodeListMemory);
|
|
202
|
+
#else
|
|
203
|
+
destroyRecycleBin(smallNodeListMemory);
|
|
204
|
+
#endif
|
|
205
|
+
smallNodeListMemory = NULL;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
static inline void memorizeNode(Node * node, SmallNodeList ** nodePile)
|
|
210
|
+
{
|
|
211
|
+
SmallNodeList *list = allocateSmallNodeList();
|
|
212
|
+
list->node = node;
|
|
213
|
+
list->next = *nodePile;
|
|
214
|
+
*nodePile = list;
|
|
215
|
+
#ifndef _OPENMP
|
|
216
|
+
setSingleNodeStatus(node, true);
|
|
217
|
+
#endif
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static inline boolean isNodeMemorized(Node * node, SmallNodeList * nodePile)
|
|
221
|
+
{
|
|
222
|
+
#ifdef _OPENMP
|
|
223
|
+
/* SF TODO There must be a faster way to do this: bit mask, hash table, tree, ... ? */
|
|
224
|
+
SmallNodeList * list;
|
|
225
|
+
|
|
226
|
+
for (list = nodePile; list; list = list->next)
|
|
227
|
+
if (list->node == node)
|
|
228
|
+
return true;
|
|
229
|
+
|
|
230
|
+
return false;
|
|
231
|
+
#else
|
|
232
|
+
return getNodeStatus(node);
|
|
233
|
+
#endif
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
static void unMemorizeNodes(SmallNodeList ** nodePile)
|
|
237
|
+
{
|
|
238
|
+
SmallNodeList * list;
|
|
239
|
+
|
|
240
|
+
while (*nodePile) {
|
|
241
|
+
list = *nodePile;
|
|
242
|
+
*nodePile = list->next;
|
|
243
|
+
#ifndef _OPENMP
|
|
244
|
+
setSingleNodeStatus(list->node, false);
|
|
245
|
+
#endif
|
|
246
|
+
deallocateSmallNodeList(list);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
///////////////////////////////////////////////////////////
|
|
251
|
+
// Reference Mappings
|
|
252
|
+
///////////////////////////////////////////////////////////
|
|
253
|
+
typedef struct referenceMapping_st ReferenceMapping;
|
|
254
|
+
|
|
255
|
+
struct referenceMapping_st {
|
|
256
|
+
IDnum referenceStart;
|
|
257
|
+
IDnum nodeStart;
|
|
258
|
+
IDnum length;
|
|
259
|
+
IDnum referenceID;
|
|
260
|
+
IDnum nodeID;
|
|
261
|
+
} ATTRIBUTE_PACKED;
|
|
262
|
+
|
|
263
|
+
static IDnum countMappings(char * preGraphFilename) {
|
|
264
|
+
FILE *file = fopen(preGraphFilename, "r");
|
|
265
|
+
const int maxline = MAXLINE;
|
|
266
|
+
char line[MAXLINE];
|
|
267
|
+
IDnum count = 0;
|
|
268
|
+
|
|
269
|
+
// Go past NODE blocks
|
|
270
|
+
while(fgets(line, maxline, file))
|
|
271
|
+
if (line[0] == 'S')
|
|
272
|
+
break;
|
|
273
|
+
|
|
274
|
+
// Count relevant lines
|
|
275
|
+
while(fgets(line, maxline, file))
|
|
276
|
+
if (line[0] != 'S')
|
|
277
|
+
count++;
|
|
278
|
+
|
|
279
|
+
fclose(file);
|
|
280
|
+
return count;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
static ReferenceMapping * recordReferenceMappings(char * preGraphFilename, IDnum arrayLength) {
|
|
284
|
+
ReferenceMapping * mappings = callocOrExit(arrayLength, ReferenceMapping);
|
|
285
|
+
FILE *file = fopen(preGraphFilename, "r");
|
|
286
|
+
const int maxline = MAXLINE;
|
|
287
|
+
char line[MAXLINE];
|
|
288
|
+
ReferenceMapping * current = mappings;
|
|
289
|
+
IDnum referenceID;
|
|
290
|
+
long long_var;
|
|
291
|
+
long long coord1, coord2, coord3;
|
|
292
|
+
|
|
293
|
+
// Go past NODE blocks
|
|
294
|
+
while(fgets(line, maxline, file))
|
|
295
|
+
if (line[0] == 'S')
|
|
296
|
+
break;
|
|
297
|
+
|
|
298
|
+
sscanf(line, "SEQ\t%li\n", &long_var);
|
|
299
|
+
referenceID = long_var;
|
|
300
|
+
|
|
301
|
+
// Go relevant lines
|
|
302
|
+
while(fgets(line, maxline, file)) {
|
|
303
|
+
if (line[0] != 'S') {
|
|
304
|
+
sscanf(line, "%li\t%lli\t%lli\t%lli\n", &long_var, &coord1, &coord2, &coord3);
|
|
305
|
+
current->referenceID = referenceID;
|
|
306
|
+
current->nodeID = long_var;
|
|
307
|
+
current->nodeStart = coord1;
|
|
308
|
+
current->referenceStart = coord2;
|
|
309
|
+
current->length = coord3;
|
|
310
|
+
current++;
|
|
311
|
+
} else {
|
|
312
|
+
sscanf(line, "SEQ\t%li\n", &long_var);
|
|
313
|
+
referenceID = long_var;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
fclose(file);
|
|
318
|
+
return mappings;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
static int compareRefMaps(const void * ptrA, const void * ptrB) {
|
|
322
|
+
ReferenceMapping * A = (ReferenceMapping *) ptrA;
|
|
323
|
+
ReferenceMapping * B = (ReferenceMapping *) ptrB;
|
|
324
|
+
|
|
325
|
+
if (A->referenceID > B->referenceID)
|
|
326
|
+
return 1;
|
|
327
|
+
else if (A->referenceID < B->referenceID)
|
|
328
|
+
return -1;
|
|
329
|
+
else {
|
|
330
|
+
if (A->referenceStart >= B->referenceStart + B->length)
|
|
331
|
+
return 1;
|
|
332
|
+
else if (A->referenceStart + A->length <= B->referenceStart)
|
|
333
|
+
return -1;
|
|
334
|
+
else
|
|
335
|
+
return 0;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
static ReferenceMapping * computeReferenceMappings(char * preGraphFilename, ReadSet * reads, Coordinate * referenceMappingLength, IDnum * referenceCount) {
|
|
340
|
+
IDnum index;
|
|
341
|
+
ReferenceMapping * referenceMappings;
|
|
342
|
+
|
|
343
|
+
for(index = 0; index < reads->readCount && reads->categories[index] == 2 * CATEGORIES + 2; index++)
|
|
344
|
+
(*referenceCount)++;
|
|
345
|
+
|
|
346
|
+
if (*referenceCount == 0) {
|
|
347
|
+
*referenceMappingLength = 0;
|
|
348
|
+
return NULL;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
*referenceMappingLength = countMappings(preGraphFilename);
|
|
352
|
+
|
|
353
|
+
if (*referenceMappingLength == 0)
|
|
354
|
+
return NULL;
|
|
355
|
+
|
|
356
|
+
referenceMappings = recordReferenceMappings(preGraphFilename, *referenceMappingLength);
|
|
357
|
+
qsort(referenceMappings, *referenceMappingLength, sizeof(ReferenceMapping), compareRefMaps);
|
|
358
|
+
|
|
359
|
+
return referenceMappings;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static ReferenceMapping * findReferenceMapping(IDnum seqID, Coordinate refCoord, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount) {
|
|
363
|
+
IDnum positive_seqID;
|
|
364
|
+
Coordinate leftIndex = 0;
|
|
365
|
+
Coordinate rightIndex = referenceMappingCount - 1;
|
|
366
|
+
Coordinate middleIndex;
|
|
367
|
+
ReferenceMapping refMap;
|
|
368
|
+
int comparison;
|
|
369
|
+
|
|
370
|
+
if (seqID > 0)
|
|
371
|
+
positive_seqID = seqID;
|
|
372
|
+
else
|
|
373
|
+
positive_seqID = -seqID;
|
|
374
|
+
|
|
375
|
+
refMap.referenceID = positive_seqID;
|
|
376
|
+
refMap.referenceStart = refCoord;
|
|
377
|
+
refMap.length = 1;
|
|
378
|
+
refMap.nodeStart = 0;
|
|
379
|
+
refMap.nodeID = 0;
|
|
380
|
+
|
|
381
|
+
if (compareRefMaps(&(referenceMappings[leftIndex]), &refMap) == 0)
|
|
382
|
+
return &(referenceMappings[leftIndex]);
|
|
383
|
+
if (compareRefMaps(&(referenceMappings[rightIndex]), &refMap) == 0)
|
|
384
|
+
return &(referenceMappings[rightIndex]);
|
|
385
|
+
|
|
386
|
+
while (true) {
|
|
387
|
+
middleIndex = (rightIndex + leftIndex) / 2;
|
|
388
|
+
comparison = compareRefMaps(&(referenceMappings[middleIndex]), &refMap);
|
|
389
|
+
|
|
390
|
+
if (leftIndex >= rightIndex)
|
|
391
|
+
return NULL;
|
|
392
|
+
else if (comparison == 0)
|
|
393
|
+
return &(referenceMappings[middleIndex]);
|
|
394
|
+
else if (leftIndex == middleIndex)
|
|
395
|
+
return NULL;
|
|
396
|
+
else if (comparison > 0)
|
|
397
|
+
rightIndex = middleIndex;
|
|
398
|
+
else
|
|
399
|
+
leftIndex = middleIndex;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
///////////////////////////////////////////////////////////
|
|
404
|
+
// Node Mask
|
|
405
|
+
///////////////////////////////////////////////////////////
|
|
406
|
+
|
|
407
|
+
typedef struct nodeMask_st NodeMask;
|
|
408
|
+
|
|
409
|
+
struct nodeMask_st {
|
|
410
|
+
IDnum nodeID;
|
|
411
|
+
IDnum start;
|
|
412
|
+
IDnum finish;
|
|
413
|
+
} ATTRIBUTE_PACKED;
|
|
414
|
+
|
|
415
|
+
static int compareNodeMasks(const void * ptrA, const void * ptrB) {
|
|
416
|
+
NodeMask * A = (NodeMask *) ptrA;
|
|
417
|
+
NodeMask * B = (NodeMask *) ptrB;
|
|
418
|
+
|
|
419
|
+
if (A->nodeID < B->nodeID)
|
|
420
|
+
return -1;
|
|
421
|
+
else if (A->nodeID > B->nodeID)
|
|
422
|
+
return 1;
|
|
423
|
+
else {
|
|
424
|
+
if (A->start < B->start)
|
|
425
|
+
return -1;
|
|
426
|
+
else if (A->start > B->start)
|
|
427
|
+
return 1;
|
|
428
|
+
else
|
|
429
|
+
return 0;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
static NodeMask * computeNodeMasks(ReferenceMapping * referenceMappings, Coordinate arrayLength, Graph * graph) {
|
|
434
|
+
NodeMask * nodeMasks;
|
|
435
|
+
NodeMask * currentMask;
|
|
436
|
+
ReferenceMapping * currentMapping = referenceMappings;
|
|
437
|
+
Coordinate index;
|
|
438
|
+
|
|
439
|
+
if (referenceMappings == NULL)
|
|
440
|
+
return NULL;
|
|
441
|
+
|
|
442
|
+
nodeMasks = callocOrExit(arrayLength, NodeMask);
|
|
443
|
+
currentMask = nodeMasks;
|
|
444
|
+
|
|
445
|
+
for (index = 0; index < arrayLength; index++) {
|
|
446
|
+
if (currentMapping->nodeID > 0) {
|
|
447
|
+
currentMask->nodeID = currentMapping->nodeID;
|
|
448
|
+
} else {
|
|
449
|
+
currentMask->nodeID = -currentMapping->nodeID;
|
|
450
|
+
}
|
|
451
|
+
currentMask->start = currentMapping->nodeStart;
|
|
452
|
+
currentMask->finish = currentMapping->nodeStart + currentMapping->length;
|
|
453
|
+
currentMask++;
|
|
454
|
+
currentMapping++;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
qsort(nodeMasks, arrayLength, sizeof(NodeMask), compareNodeMasks);
|
|
458
|
+
|
|
459
|
+
return nodeMasks;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
///////////////////////////////////////////////////////////
|
|
463
|
+
// Process
|
|
464
|
+
///////////////////////////////////////////////////////////
|
|
465
|
+
|
|
466
|
+
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename,
|
|
467
|
+
short int accelerationBits, Graph * graph, boolean double_strand, NodeMask * nodeMasks, Coordinate nodeMaskCount)
|
|
468
|
+
{
|
|
469
|
+
FILE *file = fopen(preGraphFilename, "r");
|
|
470
|
+
const int maxline = MAXLINE;
|
|
471
|
+
char line[MAXLINE];
|
|
472
|
+
char c;
|
|
473
|
+
int wordLength;
|
|
474
|
+
Coordinate lineLength, kmerCount;
|
|
475
|
+
Kmer word;
|
|
476
|
+
Kmer antiWord;
|
|
477
|
+
KmerOccurenceTable *kmerTable;
|
|
478
|
+
IDnum index;
|
|
479
|
+
IDnum nodeID = 0;
|
|
480
|
+
Nucleotide nucleotide;
|
|
481
|
+
NodeMask * nodeMask = nodeMasks;
|
|
482
|
+
Coordinate nodeMaskIndex = 0;
|
|
483
|
+
|
|
484
|
+
if (file == NULL)
|
|
485
|
+
exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
|
|
486
|
+
|
|
487
|
+
// Count kmers
|
|
488
|
+
velvetLog("Scanning pre-graph file %s for k-mers\n",
|
|
489
|
+
preGraphFilename);
|
|
490
|
+
|
|
491
|
+
// First line
|
|
492
|
+
if (!fgets(line, maxline, file))
|
|
493
|
+
exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
|
|
494
|
+
sscanf(line, "%*i\t%*i\t%i\n", &wordLength);
|
|
495
|
+
|
|
496
|
+
kmerTable = newKmerOccurenceTable(accelerationBits, wordLength);
|
|
497
|
+
|
|
498
|
+
// Read nodes
|
|
499
|
+
if (!fgets(line, maxline, file))
|
|
500
|
+
exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
|
|
501
|
+
kmerCount = 0;
|
|
502
|
+
while (line[0] == 'N') {
|
|
503
|
+
lineLength = 0;
|
|
504
|
+
while ((c = getc(file)) != EOF && c != '\n')
|
|
505
|
+
lineLength++;
|
|
506
|
+
kmerCount += lineLength - wordLength + 1;
|
|
507
|
+
if (fgets(line, maxline, file) == NULL)
|
|
508
|
+
break;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
velvetLog("%li kmers found\n", (long) kmerCount);
|
|
512
|
+
|
|
513
|
+
for(nodeMaskIndex = 0; nodeMaskIndex < nodeMaskCount; nodeMaskIndex++) {
|
|
514
|
+
kmerCount -= nodeMasks[nodeMaskIndex].finish -
|
|
515
|
+
nodeMasks[nodeMaskIndex].start;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
nodeMaskIndex = 0;
|
|
519
|
+
|
|
520
|
+
fclose(file);
|
|
521
|
+
|
|
522
|
+
// Create table
|
|
523
|
+
allocateKmerOccurences(kmerCount, kmerTable);
|
|
524
|
+
|
|
525
|
+
// Fill table
|
|
526
|
+
file = fopen(preGraphFilename, "r");
|
|
527
|
+
if (file == NULL)
|
|
528
|
+
exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);
|
|
529
|
+
|
|
530
|
+
if (!fgets(line, maxline, file))
|
|
531
|
+
exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
|
|
532
|
+
|
|
533
|
+
// Read nodes
|
|
534
|
+
if (!fgets(line, maxline, file))
|
|
535
|
+
exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
|
|
536
|
+
while (line[0] == 'N') {
|
|
537
|
+
nodeID++;
|
|
538
|
+
|
|
539
|
+
// Fill in the initial word :
|
|
540
|
+
clearKmer(&word);
|
|
541
|
+
clearKmer(&antiWord);
|
|
542
|
+
|
|
543
|
+
for (index = 0; index < wordLength - 1; index++) {
|
|
544
|
+
c = getc(file);
|
|
545
|
+
if (c == 'A')
|
|
546
|
+
nucleotide = ADENINE;
|
|
547
|
+
else if (c == 'C')
|
|
548
|
+
nucleotide = CYTOSINE;
|
|
549
|
+
else if (c == 'G')
|
|
550
|
+
nucleotide = GUANINE;
|
|
551
|
+
else if (c == 'T')
|
|
552
|
+
nucleotide = THYMINE;
|
|
553
|
+
else if (c == '\n')
|
|
554
|
+
exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
|
|
555
|
+
else
|
|
556
|
+
nucleotide = ADENINE;
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
pushNucleotide(&word, nucleotide);
|
|
560
|
+
if (double_strand) {
|
|
561
|
+
#ifdef COLOR
|
|
562
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
563
|
+
#else
|
|
564
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
565
|
+
#endif
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Scan through node
|
|
570
|
+
index = 0;
|
|
571
|
+
while((c = getc(file)) != '\n' && c != EOF) {
|
|
572
|
+
if (c == 'A')
|
|
573
|
+
nucleotide = ADENINE;
|
|
574
|
+
else if (c == 'C')
|
|
575
|
+
nucleotide = CYTOSINE;
|
|
576
|
+
else if (c == 'G')
|
|
577
|
+
nucleotide = GUANINE;
|
|
578
|
+
else if (c == 'T')
|
|
579
|
+
nucleotide = THYMINE;
|
|
580
|
+
else
|
|
581
|
+
nucleotide = ADENINE;
|
|
582
|
+
|
|
583
|
+
pushNucleotide(&word, nucleotide);
|
|
584
|
+
if (double_strand) {
|
|
585
|
+
#ifdef COLOR
|
|
586
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
587
|
+
#else
|
|
588
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
589
|
+
#endif
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Update mask if necessary
|
|
593
|
+
if (nodeMask) {
|
|
594
|
+
if (nodeMask->nodeID < nodeID || (nodeMask->nodeID == nodeID && index >= nodeMask->finish)) {
|
|
595
|
+
if (++nodeMaskIndex == nodeMaskCount)
|
|
596
|
+
nodeMask = NULL;
|
|
597
|
+
else
|
|
598
|
+
nodeMask++;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Check if not masked!
|
|
603
|
+
if (nodeMask) {
|
|
604
|
+
if (nodeMask->nodeID == nodeID && index >= nodeMask->start && index < nodeMask->finish) {
|
|
605
|
+
index++;
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
if (!double_strand || compareKmers(&word, &antiWord) <= 0)
|
|
611
|
+
recordKmerOccurence(&word, nodeID, index, kmerTable);
|
|
612
|
+
else
|
|
613
|
+
recordKmerOccurence(&antiWord, -nodeID, getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index, kmerTable);
|
|
614
|
+
|
|
615
|
+
index++;
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
if (fgets(line, maxline, file) == NULL)
|
|
619
|
+
break;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
fclose(file);
|
|
623
|
+
|
|
624
|
+
// Sort table
|
|
625
|
+
sortKmerOccurenceTable(kmerTable);
|
|
626
|
+
|
|
627
|
+
return kmerTable;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
static void ghostThreadSequenceThroughGraph(TightString * tString,
|
|
631
|
+
KmerOccurenceTable *
|
|
632
|
+
kmerTable, Graph * graph,
|
|
633
|
+
IDnum seqID, Category category,
|
|
634
|
+
boolean readTracking,
|
|
635
|
+
boolean double_strand,
|
|
636
|
+
ReferenceMapping * referenceMappings,
|
|
637
|
+
Coordinate referenceMappingCount,
|
|
638
|
+
IDnum refCount,
|
|
639
|
+
Annotation * annotations,
|
|
640
|
+
IDnum annotationCount,
|
|
641
|
+
boolean second_in_pair)
|
|
642
|
+
{
|
|
643
|
+
Kmer word;
|
|
644
|
+
Kmer antiWord;
|
|
645
|
+
Coordinate readNucleotideIndex;
|
|
646
|
+
KmerOccurence *kmerOccurence;
|
|
647
|
+
int wordLength = getWordLength(graph);
|
|
648
|
+
Nucleotide nucleotide;
|
|
649
|
+
IDnum refID;
|
|
650
|
+
Coordinate refCoord;
|
|
651
|
+
ReferenceMapping * refMap = NULL;
|
|
652
|
+
Coordinate uniqueIndex = 0;
|
|
653
|
+
Coordinate annotIndex = 0;
|
|
654
|
+
IDnum annotCount = 0;
|
|
655
|
+
boolean reversed;
|
|
656
|
+
SmallNodeList * nodePile = NULL;
|
|
657
|
+
Annotation * annotation = annotations;
|
|
658
|
+
|
|
659
|
+
Node *node = NULL;
|
|
660
|
+
Node *previousNode = NULL;
|
|
661
|
+
|
|
662
|
+
// Neglect any read which will not be short paired
|
|
663
|
+
if ((!readTracking && category % 2 == 0)
|
|
664
|
+
|| category / 2 >= CATEGORIES)
|
|
665
|
+
return;
|
|
666
|
+
|
|
667
|
+
// Neglect any string shorter than WORDLENGTH :
|
|
668
|
+
if (getLength(tString) < wordLength)
|
|
669
|
+
return;
|
|
670
|
+
|
|
671
|
+
// Verify that all short reads are reasonnably short
|
|
672
|
+
if (getLength(tString) > USHRT_MAX) {
|
|
673
|
+
velvetLog("Short read of length %lli, longer than limit %i\n",
|
|
674
|
+
(long long) getLength(tString), SHRT_MAX);
|
|
675
|
+
velvetLog("You should better declare this sequence as long, because it genuinely is!\n");
|
|
676
|
+
exit(1);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
clearKmer(&word);
|
|
680
|
+
clearKmer(&antiWord);
|
|
681
|
+
|
|
682
|
+
// Fill in the initial word :
|
|
683
|
+
for (readNucleotideIndex = 0;
|
|
684
|
+
readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
|
|
685
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
686
|
+
pushNucleotide(&word, nucleotide);
|
|
687
|
+
if (double_strand || second_in_pair) {
|
|
688
|
+
#ifdef COLOR
|
|
689
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
690
|
+
#else
|
|
691
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
692
|
+
#endif
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Go through sequence
|
|
697
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
698
|
+
// Shift word:
|
|
699
|
+
nucleotide = getNucleotide(readNucleotideIndex++, tString);
|
|
700
|
+
pushNucleotide(&word, nucleotide);
|
|
701
|
+
if (double_strand || second_in_pair) {
|
|
702
|
+
#ifdef COLOR
|
|
703
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
704
|
+
#else
|
|
705
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
706
|
+
#endif
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// Update annotation if necessary
|
|
710
|
+
if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) {
|
|
711
|
+
annotation = getNextAnnotation(annotation);
|
|
712
|
+
annotCount++;
|
|
713
|
+
annotIndex = 0;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
// Search for reference mapping
|
|
717
|
+
if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) {
|
|
718
|
+
refID = getAnnotSequenceID(annotation);
|
|
719
|
+
if (refID > 0)
|
|
720
|
+
refCoord = getStart(annotation) + annotIndex;
|
|
721
|
+
else
|
|
722
|
+
refCoord = getStart(annotation) - annotIndex;
|
|
723
|
+
|
|
724
|
+
refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount);
|
|
725
|
+
// If success
|
|
726
|
+
if (refMap) {
|
|
727
|
+
if (refID > 0)
|
|
728
|
+
node = getNodeInGraph(graph, refMap->nodeID);
|
|
729
|
+
else
|
|
730
|
+
node = getNodeInGraph(graph, -refMap->nodeID);
|
|
731
|
+
} else {
|
|
732
|
+
node = NULL;
|
|
733
|
+
if (previousNode)
|
|
734
|
+
break;
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
// if not.. look in table
|
|
738
|
+
else {
|
|
739
|
+
reversed = false;
|
|
740
|
+
if (double_strand) {
|
|
741
|
+
if (compareKmers(&word, &antiWord) <= 0) {
|
|
742
|
+
kmerOccurence =
|
|
743
|
+
findKmerInKmerOccurenceTable(&word,
|
|
744
|
+
kmerTable);
|
|
745
|
+
} else {
|
|
746
|
+
kmerOccurence =
|
|
747
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
748
|
+
kmerTable);
|
|
749
|
+
reversed = true;
|
|
750
|
+
}
|
|
751
|
+
} else {
|
|
752
|
+
if (!second_in_pair) {
|
|
753
|
+
kmerOccurence =
|
|
754
|
+
findKmerInKmerOccurenceTable(&word,
|
|
755
|
+
kmerTable);
|
|
756
|
+
} else {
|
|
757
|
+
kmerOccurence =
|
|
758
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
759
|
+
kmerTable);
|
|
760
|
+
reversed = true;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
if (kmerOccurence) {
|
|
765
|
+
if (!reversed)
|
|
766
|
+
node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
|
|
767
|
+
else
|
|
768
|
+
node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
|
|
769
|
+
} else {
|
|
770
|
+
node = NULL;
|
|
771
|
+
if (previousNode)
|
|
772
|
+
break;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation))
|
|
778
|
+
annotIndex++;
|
|
779
|
+
else
|
|
780
|
+
uniqueIndex++;
|
|
781
|
+
|
|
782
|
+
previousNode = node;
|
|
783
|
+
|
|
784
|
+
// Fill in graph
|
|
785
|
+
if (node && !isNodeMemorized(node, nodePile))
|
|
786
|
+
{
|
|
787
|
+
#ifdef _OPENMP
|
|
788
|
+
lockNode(node);
|
|
789
|
+
#endif
|
|
790
|
+
incrementReadStartCount(node, graph);
|
|
791
|
+
#ifdef _OPENMP
|
|
792
|
+
unLockNode(node);
|
|
793
|
+
#endif
|
|
794
|
+
memorizeNode(node, &nodePile);
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
unMemorizeNodes(&nodePile);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
static void threadSequenceThroughGraph(TightString * tString,
|
|
802
|
+
KmerOccurenceTable * kmerTable,
|
|
803
|
+
Graph * graph,
|
|
804
|
+
IDnum seqID, Category category,
|
|
805
|
+
boolean readTracking,
|
|
806
|
+
boolean double_strand,
|
|
807
|
+
ReferenceMapping * referenceMappings,
|
|
808
|
+
Coordinate referenceMappingCount,
|
|
809
|
+
IDnum refCount,
|
|
810
|
+
Annotation * annotations,
|
|
811
|
+
IDnum annotationCount,
|
|
812
|
+
boolean second_in_pair)
|
|
813
|
+
{
|
|
814
|
+
Kmer word;
|
|
815
|
+
Kmer antiWord;
|
|
816
|
+
Coordinate readNucleotideIndex;
|
|
817
|
+
Coordinate kmerIndex;
|
|
818
|
+
KmerOccurence *kmerOccurence;
|
|
819
|
+
int wordLength = getWordLength(graph);
|
|
820
|
+
|
|
821
|
+
PassageMarkerI marker = NULL_IDX;
|
|
822
|
+
PassageMarkerI previousMarker = NULL_IDX;
|
|
823
|
+
Node *node = NULL;
|
|
824
|
+
Node *previousNode = NULL;
|
|
825
|
+
Coordinate coord = 0;
|
|
826
|
+
Coordinate previousCoord = 0;
|
|
827
|
+
Nucleotide nucleotide;
|
|
828
|
+
boolean reversed;
|
|
829
|
+
|
|
830
|
+
IDnum refID;
|
|
831
|
+
Coordinate refCoord = 0;
|
|
832
|
+
ReferenceMapping * refMap;
|
|
833
|
+
Annotation * annotation = annotations;
|
|
834
|
+
Coordinate index = 0;
|
|
835
|
+
Coordinate uniqueIndex = 0;
|
|
836
|
+
Coordinate annotIndex = 0;
|
|
837
|
+
IDnum annotCount = 0;
|
|
838
|
+
SmallNodeList * nodePile = NULL;
|
|
839
|
+
|
|
840
|
+
// Neglect any string shorter than WORDLENGTH :
|
|
841
|
+
if (getLength(tString) < wordLength)
|
|
842
|
+
return;
|
|
843
|
+
|
|
844
|
+
clearKmer(&word);
|
|
845
|
+
clearKmer(&antiWord);
|
|
846
|
+
|
|
847
|
+
// Fill in the initial word :
|
|
848
|
+
for (readNucleotideIndex = 0;
|
|
849
|
+
readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
|
|
850
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
851
|
+
pushNucleotide(&word, nucleotide);
|
|
852
|
+
if (double_strand || second_in_pair) {
|
|
853
|
+
#ifdef COLOR
|
|
854
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
855
|
+
#else
|
|
856
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
857
|
+
#endif
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// Go through sequence
|
|
862
|
+
// printf("len %d\n", getLength(tString));
|
|
863
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
864
|
+
nucleotide = getNucleotide(readNucleotideIndex++, tString);
|
|
865
|
+
pushNucleotide(&word, nucleotide);
|
|
866
|
+
if (double_strand || second_in_pair) {
|
|
867
|
+
#ifdef COLOR
|
|
868
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
869
|
+
#else
|
|
870
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
871
|
+
#endif
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// Update annotation if necessary
|
|
875
|
+
if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) {
|
|
876
|
+
annotation = getNextAnnotation(annotation);
|
|
877
|
+
annotCount++;
|
|
878
|
+
annotIndex = 0;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Search for reference mapping
|
|
882
|
+
if (category == REFERENCE) {
|
|
883
|
+
if (referenceMappings)
|
|
884
|
+
refMap = findReferenceMapping(seqID, index, referenceMappings, referenceMappingCount);
|
|
885
|
+
else
|
|
886
|
+
refMap = NULL;
|
|
887
|
+
|
|
888
|
+
if (refMap) {
|
|
889
|
+
node = getNodeInGraph(graph, refMap->nodeID);
|
|
890
|
+
if (refMap->nodeID > 0) {
|
|
891
|
+
coord = refMap->nodeStart + (index - refMap->referenceStart);
|
|
892
|
+
} else {
|
|
893
|
+
coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (index - refMap->referenceStart);
|
|
894
|
+
}
|
|
895
|
+
} else {
|
|
896
|
+
node = NULL;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
// Search for reference-based mapping
|
|
900
|
+
else if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) {
|
|
901
|
+
refID = getAnnotSequenceID(annotation);
|
|
902
|
+
if (refID > 0)
|
|
903
|
+
refCoord = getStart(annotation) + annotIndex;
|
|
904
|
+
else
|
|
905
|
+
refCoord = getStart(annotation) - annotIndex;
|
|
906
|
+
|
|
907
|
+
refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount);
|
|
908
|
+
// If success
|
|
909
|
+
if (refMap) {
|
|
910
|
+
if (refID > 0) {
|
|
911
|
+
node = getNodeInGraph(graph, refMap->nodeID);
|
|
912
|
+
if (refMap->nodeID > 0) {
|
|
913
|
+
coord = refMap->nodeStart + (refCoord - refMap->referenceStart);
|
|
914
|
+
} else {
|
|
915
|
+
coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (refCoord - refMap->referenceStart);
|
|
916
|
+
}
|
|
917
|
+
} else {
|
|
918
|
+
node = getNodeInGraph(graph, -refMap->nodeID);
|
|
919
|
+
if (refMap->nodeID > 0) {
|
|
920
|
+
coord = getNodeLength(node) - refMap->nodeStart - (refCoord - refMap->referenceStart) - 1;
|
|
921
|
+
} else {
|
|
922
|
+
coord = refMap->nodeStart + refMap->length - (refCoord - refMap->referenceStart) - 1;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
} else {
|
|
926
|
+
node = NULL;
|
|
927
|
+
if (previousNode)
|
|
928
|
+
break;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
// Search in table
|
|
932
|
+
else {
|
|
933
|
+
reversed = false;
|
|
934
|
+
if (double_strand) {
|
|
935
|
+
if (compareKmers(&word, &antiWord) <= 0) {
|
|
936
|
+
kmerOccurence =
|
|
937
|
+
findKmerInKmerOccurenceTable(&word,
|
|
938
|
+
kmerTable);
|
|
939
|
+
} else {
|
|
940
|
+
kmerOccurence =
|
|
941
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
942
|
+
kmerTable);
|
|
943
|
+
reversed = true;
|
|
944
|
+
}
|
|
945
|
+
} else {
|
|
946
|
+
if (!second_in_pair) {
|
|
947
|
+
kmerOccurence =
|
|
948
|
+
findKmerInKmerOccurenceTable(&word,
|
|
949
|
+
kmerTable);
|
|
950
|
+
} else {
|
|
951
|
+
kmerOccurence =
|
|
952
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
953
|
+
kmerTable);
|
|
954
|
+
reversed = true;
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
if (kmerOccurence) {
|
|
959
|
+
if (!reversed) {
|
|
960
|
+
node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
|
|
961
|
+
coord = getKmerOccurencePosition(kmerOccurence);
|
|
962
|
+
} else {
|
|
963
|
+
node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
|
|
964
|
+
coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1;
|
|
965
|
+
}
|
|
966
|
+
} else {
|
|
967
|
+
node = NULL;
|
|
968
|
+
if (previousNode)
|
|
969
|
+
break;
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
// Increment positions
|
|
974
|
+
if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation))
|
|
975
|
+
annotIndex++;
|
|
976
|
+
else
|
|
977
|
+
uniqueIndex++;
|
|
978
|
+
|
|
979
|
+
// Fill in graph
|
|
980
|
+
if (node)
|
|
981
|
+
{
|
|
982
|
+
#ifdef _OPENMP
|
|
983
|
+
lockNode(node);
|
|
984
|
+
#endif
|
|
985
|
+
kmerIndex = readNucleotideIndex - wordLength;
|
|
986
|
+
|
|
987
|
+
if (previousNode == node
|
|
988
|
+
&& previousCoord == coord - 1) {
|
|
989
|
+
if (category / 2 >= CATEGORIES) {
|
|
990
|
+
setPassageMarkerFinish(marker,
|
|
991
|
+
kmerIndex +
|
|
992
|
+
1);
|
|
993
|
+
setFinishOffset(marker,
|
|
994
|
+
getNodeLength(node)
|
|
995
|
+
- coord - 1);
|
|
996
|
+
} else {
|
|
997
|
+
#ifndef SINGLE_COV_CAT
|
|
998
|
+
incrementVirtualCoverage(node, category / 2, 1);
|
|
999
|
+
incrementOriginalVirtualCoverage(node, category / 2, 1);
|
|
1000
|
+
#else
|
|
1001
|
+
incrementVirtualCoverage(node, 1);
|
|
1002
|
+
#endif
|
|
1003
|
+
}
|
|
1004
|
+
#ifdef _OPENMP
|
|
1005
|
+
unLockNode(node);
|
|
1006
|
+
#endif
|
|
1007
|
+
} else {
|
|
1008
|
+
if (category / 2 >= CATEGORIES) {
|
|
1009
|
+
marker =
|
|
1010
|
+
newPassageMarker(seqID,
|
|
1011
|
+
kmerIndex,
|
|
1012
|
+
kmerIndex + 1,
|
|
1013
|
+
coord,
|
|
1014
|
+
getNodeLength
|
|
1015
|
+
(node) -
|
|
1016
|
+
coord - 1);
|
|
1017
|
+
transposePassageMarker(marker,
|
|
1018
|
+
node);
|
|
1019
|
+
connectPassageMarkers
|
|
1020
|
+
(previousMarker, marker,
|
|
1021
|
+
graph);
|
|
1022
|
+
previousMarker = marker;
|
|
1023
|
+
} else {
|
|
1024
|
+
if (readTracking) {
|
|
1025
|
+
if (!isNodeMemorized(node, nodePile)) {
|
|
1026
|
+
addReadStart(node,
|
|
1027
|
+
seqID,
|
|
1028
|
+
coord,
|
|
1029
|
+
graph,
|
|
1030
|
+
kmerIndex);
|
|
1031
|
+
memorizeNode(node, &nodePile);
|
|
1032
|
+
} else {
|
|
1033
|
+
blurLastShortReadMarker
|
|
1034
|
+
(node, graph);
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
#ifndef SINGLE_COV_CAT
|
|
1039
|
+
incrementVirtualCoverage(node, category / 2, 1);
|
|
1040
|
+
incrementOriginalVirtualCoverage(node, category / 2, 1);
|
|
1041
|
+
#else
|
|
1042
|
+
incrementVirtualCoverage(node, 1);
|
|
1043
|
+
#endif
|
|
1044
|
+
}
|
|
1045
|
+
#ifdef _OPENMP
|
|
1046
|
+
lockTwoNodes(node, previousNode);
|
|
1047
|
+
#endif
|
|
1048
|
+
if (category != REFERENCE)
|
|
1049
|
+
createArc(previousNode, node, graph);
|
|
1050
|
+
#ifdef _OPENMP
|
|
1051
|
+
unLockTwoNodes(node, previousNode);
|
|
1052
|
+
#endif
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
previousNode = node;
|
|
1056
|
+
previousCoord = coord;
|
|
1057
|
+
}
|
|
1058
|
+
index++;
|
|
1059
|
+
}
|
|
1060
|
+
// printKmer(&word);
|
|
1061
|
+
|
|
1062
|
+
if (readTracking && category / 2 < CATEGORIES)
|
|
1063
|
+
unMemorizeNodes(&nodePile);
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
static void fillUpGraph(ReadSet * reads,
|
|
1067
|
+
KmerOccurenceTable * kmerTable,
|
|
1068
|
+
Graph * graph,
|
|
1069
|
+
boolean readTracking,
|
|
1070
|
+
boolean double_strand,
|
|
1071
|
+
ReferenceMapping * referenceMappings,
|
|
1072
|
+
Coordinate referenceMappingCount,
|
|
1073
|
+
IDnum refCount,
|
|
1074
|
+
char * roadmapFilename)
|
|
1075
|
+
{
|
|
1076
|
+
IDnum readIndex;
|
|
1077
|
+
RoadMapArray *roadmap = NULL;
|
|
1078
|
+
Coordinate *annotationOffset = NULL;
|
|
1079
|
+
struct timeval start, end, diff;
|
|
1080
|
+
|
|
1081
|
+
if (referenceMappings)
|
|
1082
|
+
{
|
|
1083
|
+
roadmap = importRoadMapArray(roadmapFilename);
|
|
1084
|
+
annotationOffset = callocOrExit(reads->readCount, Coordinate);
|
|
1085
|
+
for (readIndex = 1; readIndex < reads->readCount; readIndex++)
|
|
1086
|
+
annotationOffset[readIndex] = annotationOffset[readIndex - 1]
|
|
1087
|
+
+ getAnnotationCount(getRoadMapInArray(roadmap, readIndex - 1));
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
resetNodeStatus(graph);
|
|
1091
|
+
// Allocate memory for the read pairs
|
|
1092
|
+
if (!readStartsAreActivated(graph))
|
|
1093
|
+
activateReadStarts(graph);
|
|
1094
|
+
|
|
1095
|
+
gettimeofday(&start, NULL);
|
|
1096
|
+
#ifdef _OPENMP
|
|
1097
|
+
initSmallNodeListMemory();
|
|
1098
|
+
createNodeLocks(graph);
|
|
1099
|
+
#pragma omp parallel for
|
|
1100
|
+
#endif
|
|
1101
|
+
for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
|
|
1102
|
+
{
|
|
1103
|
+
Annotation * annotations = NULL;
|
|
1104
|
+
IDnum annotationCount = 0;
|
|
1105
|
+
Category category;
|
|
1106
|
+
boolean second_in_pair;
|
|
1107
|
+
|
|
1108
|
+
if (readIndex % 1000000 == 0)
|
|
1109
|
+
velvetLog("Ghost Threading through reads %ld / %ld\n",
|
|
1110
|
+
(long) readIndex, (long) reads->readCount);
|
|
1111
|
+
|
|
1112
|
+
category = reads->categories[readIndex];
|
|
1113
|
+
second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);
|
|
1114
|
+
|
|
1115
|
+
if (referenceMappings)
|
|
1116
|
+
{
|
|
1117
|
+
annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
|
|
1118
|
+
annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
|
|
1122
|
+
kmerTable,
|
|
1123
|
+
graph, readIndex + 1,
|
|
1124
|
+
category,
|
|
1125
|
+
readTracking, double_strand,
|
|
1126
|
+
referenceMappings, referenceMappingCount,
|
|
1127
|
+
refCount, annotations, annotationCount,
|
|
1128
|
+
second_in_pair);
|
|
1129
|
+
}
|
|
1130
|
+
createNodeReadStartArrays(graph);
|
|
1131
|
+
gettimeofday(&end, NULL);
|
|
1132
|
+
timersub(&end, &start, &diff);
|
|
1133
|
+
velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
|
|
1134
|
+
|
|
1135
|
+
gettimeofday(&start, NULL);
|
|
1136
|
+
#ifdef _OPENMP
|
|
1137
|
+
int threads = omp_get_max_threads();
|
|
1138
|
+
if (threads > 32)
|
|
1139
|
+
threads = 32;
|
|
1140
|
+
|
|
1141
|
+
#pragma omp parallel for num_threads(threads)
|
|
1142
|
+
#endif
|
|
1143
|
+
for (readIndex = 0; readIndex < reads->readCount; readIndex++)
|
|
1144
|
+
{
|
|
1145
|
+
Annotation * annotations = NULL;
|
|
1146
|
+
IDnum annotationCount = 0;
|
|
1147
|
+
Category category;
|
|
1148
|
+
boolean second_in_pair;
|
|
1149
|
+
|
|
1150
|
+
if (readIndex % 1000000 == 0)
|
|
1151
|
+
velvetLog("Threading through reads %li / %li\n",
|
|
1152
|
+
(long) readIndex, (long) reads->readCount);
|
|
1153
|
+
|
|
1154
|
+
category = reads->categories[readIndex];
|
|
1155
|
+
second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);
|
|
1156
|
+
|
|
1157
|
+
if (referenceMappings)
|
|
1158
|
+
{
|
|
1159
|
+
annotationCount = getAnnotationCount(getRoadMapInArray(roadmap, readIndex));
|
|
1160
|
+
annotations = getAnnotationInArray(roadmap->annotations, annotationOffset[readIndex]);
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
threadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
|
|
1164
|
+
kmerTable,
|
|
1165
|
+
graph, readIndex + 1, category,
|
|
1166
|
+
readTracking, double_strand,
|
|
1167
|
+
referenceMappings, referenceMappingCount,
|
|
1168
|
+
refCount, annotations, annotationCount, second_in_pair);
|
|
1169
|
+
}
|
|
1170
|
+
gettimeofday(&end, NULL);
|
|
1171
|
+
timersub(&end, &start, &diff);
|
|
1172
|
+
velvetLog(" === Threaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
|
|
1173
|
+
|
|
1174
|
+
#ifdef _OPENMP
|
|
1175
|
+
free(nodeLocks);
|
|
1176
|
+
nodeLocks = NULL;
|
|
1177
|
+
#endif
|
|
1178
|
+
|
|
1179
|
+
if (referenceMappings)
|
|
1180
|
+
{
|
|
1181
|
+
destroyRoadMapArray(roadmap);
|
|
1182
|
+
free (annotationOffset);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
orderNodeReadStartArrays(graph);
|
|
1186
|
+
|
|
1187
|
+
destroySmallNodeListMemmory();
|
|
1188
|
+
|
|
1189
|
+
destroyKmerOccurenceTable(kmerTable);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
Graph *importPreGraph(char *preGraphFilename, ReadSet * reads, char * roadmapFilename,
|
|
1193
|
+
boolean readTracking, short int accelerationBits)
|
|
1194
|
+
{
|
|
1195
|
+
boolean double_strand = false;
|
|
1196
|
+
Graph *graph = readPreGraphFile(preGraphFilename, &double_strand);
|
|
1197
|
+
Coordinate referenceMappingCount = 0;
|
|
1198
|
+
IDnum referenceCount = 0;
|
|
1199
|
+
|
|
1200
|
+
if (nodeCount(graph) == 0)
|
|
1201
|
+
return graph;
|
|
1202
|
+
|
|
1203
|
+
// If necessary compile reference -> node
|
|
1204
|
+
ReferenceMapping * referenceMappings = computeReferenceMappings(preGraphFilename, reads, &referenceMappingCount, &referenceCount);
|
|
1205
|
+
// Node -> reference maps
|
|
1206
|
+
NodeMask * nodeMasks = computeNodeMasks(referenceMappings, referenceMappingCount, graph);
|
|
1207
|
+
|
|
1208
|
+
// Map k-mers to nodes
|
|
1209
|
+
KmerOccurenceTable *kmerTable =
|
|
1210
|
+
referenceGraphKmers(preGraphFilename, accelerationBits, graph, double_strand, nodeMasks, referenceMappingCount);
|
|
1211
|
+
|
|
1212
|
+
free(nodeMasks);
|
|
1213
|
+
|
|
1214
|
+
// Map sequences -> kmers -> nodes
|
|
1215
|
+
fillUpGraph(reads, kmerTable, graph, readTracking, double_strand, referenceMappings, referenceMappingCount, referenceCount, roadmapFilename);
|
|
1216
|
+
|
|
1217
|
+
free(referenceMappings);
|
|
1218
|
+
|
|
1219
|
+
return graph;
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
static void addReadsToGraph(TightString * tString,
|
|
1223
|
+
KmerOccurenceTable * kmerTable,
|
|
1224
|
+
Graph * graph,
|
|
1225
|
+
IDnum seqID, Category category,
|
|
1226
|
+
boolean readTracking,
|
|
1227
|
+
boolean double_strand,
|
|
1228
|
+
boolean second_in_pair)
|
|
1229
|
+
{
|
|
1230
|
+
Kmer word;
|
|
1231
|
+
Kmer antiWord;
|
|
1232
|
+
Coordinate readNucleotideIndex;
|
|
1233
|
+
Coordinate kmerIndex;
|
|
1234
|
+
KmerOccurence *kmerOccurence;
|
|
1235
|
+
int wordLength = getWordLength(graph);
|
|
1236
|
+
|
|
1237
|
+
Node *node = NULL;
|
|
1238
|
+
Node *previousNode = NULL;
|
|
1239
|
+
Coordinate coord = 0;
|
|
1240
|
+
Coordinate previousCoord = 0;
|
|
1241
|
+
Nucleotide nucleotide;
|
|
1242
|
+
boolean reversed;
|
|
1243
|
+
|
|
1244
|
+
Coordinate index = 0;
|
|
1245
|
+
SmallNodeList * nodePile = NULL;
|
|
1246
|
+
|
|
1247
|
+
// Neglect any read which will not be short paired
|
|
1248
|
+
if (category / 2 >= CATEGORIES)
|
|
1249
|
+
return;
|
|
1250
|
+
|
|
1251
|
+
// Neglect any string shorter than WORDLENGTH :
|
|
1252
|
+
if (getLength(tString) < wordLength)
|
|
1253
|
+
return;
|
|
1254
|
+
|
|
1255
|
+
clearKmer(&word);
|
|
1256
|
+
clearKmer(&antiWord);
|
|
1257
|
+
|
|
1258
|
+
// Fill in the initial word :
|
|
1259
|
+
for (readNucleotideIndex = 0;
|
|
1260
|
+
readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
|
|
1261
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
1262
|
+
pushNucleotide(&word, nucleotide);
|
|
1263
|
+
if (double_strand || second_in_pair) {
|
|
1264
|
+
#ifdef COLOR
|
|
1265
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
1266
|
+
#else
|
|
1267
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
1268
|
+
#endif
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
// Go through sequence
|
|
1273
|
+
// printf("len %d\n", getLength(tString));
|
|
1274
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
1275
|
+
nucleotide = getNucleotide(readNucleotideIndex++, tString);
|
|
1276
|
+
pushNucleotide(&word, nucleotide);
|
|
1277
|
+
if (double_strand || second_in_pair) {
|
|
1278
|
+
#ifdef COLOR
|
|
1279
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
1280
|
+
#else
|
|
1281
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
1282
|
+
#endif
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// Search in table
|
|
1286
|
+
reversed = false;
|
|
1287
|
+
if (double_strand) {
|
|
1288
|
+
if (compareKmers(&word, &antiWord) <= 0) {
|
|
1289
|
+
kmerOccurence =
|
|
1290
|
+
findKmerInKmerOccurenceTable(&word,
|
|
1291
|
+
kmerTable);
|
|
1292
|
+
} else {
|
|
1293
|
+
kmerOccurence =
|
|
1294
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
1295
|
+
kmerTable);
|
|
1296
|
+
reversed = true;
|
|
1297
|
+
}
|
|
1298
|
+
} else {
|
|
1299
|
+
if (!second_in_pair) {
|
|
1300
|
+
kmerOccurence =
|
|
1301
|
+
findKmerInKmerOccurenceTable(&word,
|
|
1302
|
+
kmerTable);
|
|
1303
|
+
} else {
|
|
1304
|
+
kmerOccurence =
|
|
1305
|
+
findKmerInKmerOccurenceTable(&antiWord,
|
|
1306
|
+
kmerTable);
|
|
1307
|
+
reversed = true;
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (kmerOccurence) {
|
|
1312
|
+
if (!reversed) {
|
|
1313
|
+
node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
|
|
1314
|
+
coord = getKmerOccurencePosition(kmerOccurence);
|
|
1315
|
+
} else {
|
|
1316
|
+
node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
|
|
1317
|
+
coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1;
|
|
1318
|
+
}
|
|
1319
|
+
} else {
|
|
1320
|
+
node = NULL;
|
|
1321
|
+
if (previousNode)
|
|
1322
|
+
break;
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
// Fill in graph
|
|
1326
|
+
if (node)
|
|
1327
|
+
{
|
|
1328
|
+
#ifdef _OPENMP
|
|
1329
|
+
lockNode(node);
|
|
1330
|
+
#endif
|
|
1331
|
+
kmerIndex = readNucleotideIndex - wordLength;
|
|
1332
|
+
|
|
1333
|
+
if (previousNode != node || previousCoord != coord -1) {
|
|
1334
|
+
if (!isNodeMemorized(node, nodePile)) {
|
|
1335
|
+
addReadStart(node,
|
|
1336
|
+
seqID,
|
|
1337
|
+
coord,
|
|
1338
|
+
graph,
|
|
1339
|
+
kmerIndex);
|
|
1340
|
+
memorizeNode(node, &nodePile);
|
|
1341
|
+
} else {
|
|
1342
|
+
blurLastShortReadMarker
|
|
1343
|
+
(node, graph);
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
#ifdef _OPENMP
|
|
1347
|
+
unLockNode(node);
|
|
1348
|
+
#endif
|
|
1349
|
+
previousNode = node;
|
|
1350
|
+
previousCoord = coord;
|
|
1351
|
+
}
|
|
1352
|
+
index++;
|
|
1353
|
+
}
|
|
1354
|
+
// printKmer(&word);
|
|
1355
|
+
|
|
1356
|
+
if (category / 2 < CATEGORIES)
|
|
1357
|
+
unMemorizeNodes(&nodePile);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
static void fillUpConnectedGraph(ReadSet * reads,
|
|
1361
|
+
KmerOccurenceTable * kmerTable,
|
|
1362
|
+
Graph * graph,
|
|
1363
|
+
boolean readTracking,
|
|
1364
|
+
boolean double_strand)
|
|
1365
|
+
{
|
|
1366
|
+
IDnum refCount = 0; // refs not present in connected graphs
|
|
1367
|
+
IDnum readIndex;
|
|
1368
|
+
struct timeval start, end, diff;
|
|
1369
|
+
|
|
1370
|
+
resetNodeStatus(graph);
|
|
1371
|
+
// Allocate memory for the read pairs
|
|
1372
|
+
if (!readStartsAreActivated(graph))
|
|
1373
|
+
activateReadStarts(graph);
|
|
1374
|
+
|
|
1375
|
+
gettimeofday(&start, NULL);
|
|
1376
|
+
#ifdef _OPENMP
|
|
1377
|
+
initSmallNodeListMemory();
|
|
1378
|
+
createNodeLocks(graph);
|
|
1379
|
+
#pragma omp parallel for
|
|
1380
|
+
#endif
|
|
1381
|
+
for (readIndex = refCount; readIndex < reads->readCount; readIndex++)
|
|
1382
|
+
{
|
|
1383
|
+
Category category;
|
|
1384
|
+
boolean second_in_pair;
|
|
1385
|
+
|
|
1386
|
+
if (readIndex % 1000000 == 0)
|
|
1387
|
+
velvetLog("Ghost Threading through reads %ld / %ld\n",
|
|
1388
|
+
(long) readIndex, (long) reads->readCount);
|
|
1389
|
+
|
|
1390
|
+
category = reads->categories[readIndex];
|
|
1391
|
+
second_in_pair = reads->categories[readIndex] & 1 && isSecondInPair(reads, readIndex);
|
|
1392
|
+
|
|
1393
|
+
// referenceMappings = NULL, referenceMappingCount = 0
|
|
1394
|
+
// refCount = 0, annotations = NULL, annotationCount = 0
|
|
1395
|
+
ghostThreadSequenceThroughGraph(getTightStringInArray(reads->tSequences, readIndex),
|
|
1396
|
+
kmerTable,
|
|
1397
|
+
graph, readIndex + 1,
|
|
1398
|
+
category,
|
|
1399
|
+
readTracking, double_strand,
|
|
1400
|
+
NULL, 0,
|
|
1401
|
+
0, NULL, 0,
|
|
1402
|
+
second_in_pair);
|
|
1403
|
+
}
|
|
1404
|
+
createNodeReadStartArrays(graph);
|
|
1405
|
+
gettimeofday(&end, NULL);
|
|
1406
|
+
timersub(&end, &start, &diff);
|
|
1407
|
+
velvetLog(" === Ghost-Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);
|
|
1408
|
+
|
|
1409
|
+
gettimeofday(&start, NULL);
|
|
1410
|
+
#ifdef _OPENMP
|
|
1411
|
+
int threads = omp_get_max_threads();
|
|
1412
|
+
if (threads > 32)
|
|
1413
|
+
threads = 32;
|
|
1414
|
+
|
|
1415
|
+
#pragma omp parallel for num_threads(threads)
|
|
1416
|
+
#endif
|
|
1417
|
+
for (readIndex = 0; readIndex < reads->readCount; readIndex++)
|
|
1418
|
+
{
|
|
1419
|
+
Category category;
|
|
1420
|
+
boolean second_in_pair;
|
|
1421
|
+
|
|
1422
|
+
if (readIndex % 1000000 == 0)
|
|
1423
|
+
velvetLog("Adding reads %li / %li\n",
|
|
1424
|
+
(long) readIndex, (long) reads->readCount);
|
|
1425
|
+
|
|
1426
|
+
category = reads->categories[readIndex];
|
|
1427
|
+
second_in_pair = reads->categories[readIndex] % 2 && isSecondInPair(reads, readIndex);
|
|
1428
|
+
|
|
1429
|
+
addReadsToGraph(getTightStringInArray(reads->tSequences, readIndex),
|
|
1430
|
+
kmerTable,
|
|
1431
|
+
graph, readIndex + 1, category,
|
|
1432
|
+
readTracking, double_strand, second_in_pair);
|
|
1433
|
+
}
|
|
1434
|
+
gettimeofday(&end, NULL);
|
|
1435
|
+
timersub(&end, &start, &diff);
|
|
1436
|
+
velvetLog(" === Threaded in %ld.%06ld s\n", diff.tv_sec, diff.tv_usec);
|
|
1437
|
+
|
|
1438
|
+
#ifdef _OPENMP
|
|
1439
|
+
free(nodeLocks);
|
|
1440
|
+
nodeLocks = NULL;
|
|
1441
|
+
#endif
|
|
1442
|
+
|
|
1443
|
+
orderNodeReadStartArrays(graph);
|
|
1444
|
+
|
|
1445
|
+
destroySmallNodeListMemmory();
|
|
1446
|
+
|
|
1447
|
+
destroyKmerOccurenceTable(kmerTable);
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
Graph *importConnectedGraph(char *connectedGraphFilename, ReadSet * reads, char * roadmapFilename,
|
|
1451
|
+
boolean readTracking, short int accelerationBits)
|
|
1452
|
+
{
|
|
1453
|
+
boolean double_strand = false;
|
|
1454
|
+
Graph *graph = readConnectedGraphFile(connectedGraphFilename, &double_strand);
|
|
1455
|
+
|
|
1456
|
+
if (nodeCount(graph) == 0)
|
|
1457
|
+
return graph;
|
|
1458
|
+
|
|
1459
|
+
if (readTracking) {
|
|
1460
|
+
Coordinate referenceMappingCount = 0;
|
|
1461
|
+
NodeMask * nodeMasks = NULL;
|
|
1462
|
+
|
|
1463
|
+
// Map k-mers to nodes
|
|
1464
|
+
KmerOccurenceTable *kmerTable =
|
|
1465
|
+
referenceGraphKmers(connectedGraphFilename, accelerationBits, graph, doubleStrandedGraph(graph), nodeMasks, referenceMappingCount);
|
|
1466
|
+
|
|
1467
|
+
// Map sequences -> kmers -> nodes
|
|
1468
|
+
fillUpConnectedGraph(reads, kmerTable, graph, readTracking, double_strand);
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
return graph;
|
|
1472
|
+
}
|